/* * Copyright 2016 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include <tuple> #include "Benchmark.h" #include "Resources.h" #include "SkCpu.h" #include "SkImage.h" #include "SkImage_Base.h" #include "SkNx.h" #include "SkOpts.h" #include "SkPM4fPriv.h" #include "SkString.h" #define INNER_LOOPS 10 static inline void brute_srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { auto d = Sk4f_fromS32(*dst), s = Sk4f_fromS32( src); *dst = Sk4f_toS32(s + d * (1.0f - s[3])); } static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { if (src >= 0xFF000000) { *dst = src; return; } brute_srcover_srgb_srgb_1(dst, src); } static void brute_force_srcover_srgb_srgb( uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { while (ndst > 0) { int n = SkTMin(ndst, nsrc); for (int i = 0; i < n; i++) { brute_srcover_srgb_srgb_1(dst++, src[i]); } ndst -= n; } } static void trivial_srcover_srgb_srgb( uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { while (ndst > 0) { int n = SkTMin(ndst, nsrc); for (int i = 0; i < n; i++) { srcover_srgb_srgb_1(dst++, src[i]); } ndst -= n; } } static void best_non_simd_srcover_srgb_srgb( uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { uint64_t* ddst = reinterpret_cast<uint64_t*>(dst); auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) { srcover_srgb_srgb_1(dst++, *src++); srcover_srgb_srgb_1(dst, *src); }; while (ndst >0) { int count = SkTMin(ndst, nsrc); ndst -= count; const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src); const uint64_t* end = dsrc + (count >> 1); do { if ((~*dsrc & 0xFF000000FF000000) == 0) { do { *ddst++ = *dsrc++; } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0); } else if ((*dsrc & 0xFF000000FF000000) == 0) { do { dsrc++; ddst++; } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0); } else { srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++), reinterpret_cast<const uint32_t*>(dsrc++)); } } while (dsrc < end); if ((count & 1) != 0) { uint32_t s1; memcpy(&s1, dsrc, 4); srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst), s1); } } } class SrcOverVSkOptsBruteForce { public: static SkString Name() { return SkString{"VSkOptsBruteForce"}; } static void BlendN(uint32_t* dst, const uint32_t* src, int count) { brute_force_srcover_srgb_srgb(dst, src, count, count); } }; class SrcOverVSkOptsTrivial { public: static SkString Name() { return SkString{"VSkOptsTrivial"}; } static void BlendN(uint32_t* dst, const uint32_t* src, int count) { trivial_srcover_srgb_srgb(dst, src, count, count); } }; class SrcOverVSkOptsNonSimdCore { public: static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; } static void BlendN(uint32_t* dst, const uint32_t* src, int count) { best_non_simd_srcover_srgb_srgb(dst, src, count, count); } }; class SrcOverVSkOptsDefault { public: static SkString Name() { return SkString{"VSkOptsDefault"}; } static void BlendN(uint32_t* dst, const uint32_t* src, int count) { SkOpts::srcover_srgb_srgb(dst, src, count, count); } }; /////////////////////////////////////////////////////////////////////////////////////////////////// template <typename Blender> class LinearSrcOverBench : public Benchmark { public: LinearSrcOverBench(const char* fileName) : fFileName(fileName) { fName = "LinearSrcOver_"; fName.append(fileName); fName.append(Blender::Name()); } protected: bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; } const char* onGetName() override { return fName.c_str(); } void onPreDraw(SkCanvas*) override { if (!fPixmap.addr()) { sk_sp<SkImage> image = GetResourceAsImage(fFileName.c_str()); SkBitmap bm; SkColorSpace* legacyColorSpace = nullptr; if (!as_IB(image)->getROPixels(&bm, legacyColorSpace)) { SkFAIL("Could not read resource"); } bm.peekPixels(&fPixmap); fCount = fPixmap.rowBytesAsPixels(); fDst.reset(fCount); sk_bzero(fDst.get(), fPixmap.rowBytes()); } } void onDraw(int loops, SkCanvas*) override { SkASSERT(fPixmap.colorType() == kN32_SkColorType); const int width = fPixmap.rowBytesAsPixels(); for (int i = 0; i < loops * INNER_LOOPS; ++i) { const uint32_t* src = fPixmap.addr32(); for (int y = 0; y < fPixmap.height(); y++) { Blender::BlendN(fDst.get(), src, width); src += width; } } } void onPostDraw(SkCanvas*) override { // Make sure the compiler does not optimize away the operation. volatile uint32_t v = 0; for (int i = 0; i < fCount; i++) { v ^= fDst[i]; } } private: int fCount; SkAutoTArray<uint32_t> fDst; SkString fFileName; SkString fName; SkPixmap fPixmap; typedef Benchmark INHERITED; }; #define BENCHES(fileName) \ DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsBruteForce>(fileName); ) \ DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsTrivial>(fileName); ) \ DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsNonSimdCore>(fileName); ) \ DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsDefault>(fileName); ) BENCHES("yellow_rose.png") BENCHES("baby_tux.png") BENCHES("plane.png") BENCHES("mandrill_512.png") BENCHES("iconstrip.png")