diff -up chromium-48.0.2564.103/third_party/skia/src/opts/SkXfermode_opts.h.gcc6 chromium-48.0.2564.103/third_party/skia/src/opts/SkXfermode_opts.h --- chromium-48.0.2564.103/third_party/skia/src/opts/SkXfermode_opts.h.gcc6 2016-02-16 15:01:13.200131996 -0500 +++ chromium-48.0.2564.103/third_party/skia/src/opts/SkXfermode_opts.h 2016-02-17 13:31:50.216198258 -0500 @@ -109,76 +109,71 @@ XFERMODE(Lighten) { } #undef XFERMODE -// Some xfermodes use math like divide or sqrt that's best done in floats. -// We write it generically, then call it 1 or 2 pixels at a time (T == Sk4f or Sk8f). -#define XFERMODE(Name) struct Name { template T operator()(const T&, const T&); }; \ - template T Name::operator()(const T& d, const T& s) +// Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time. +#define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s) -static_assert(SK_A32_SHIFT == 24, ""); static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) { + static_assert(SK_A32_SHIFT == 24, ""); return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0); } -static inline Sk8f a_rgb(const Sk8f& a, const Sk8f& rgb) { - // TODO: SkNx_blend<0,0,0,1,0,0,0,1>(a, rgb) to let us use _mm256_blend_ps? - return a * Sk8f(0,0,0,1,0,0,0,1) + rgb * Sk8f(1,1,1,0,1,1,1,0); +static inline Sk4f alphas(const Sk4f& f) { + return SkNx_dup(f); } -static inline Sk4f alphas(const Sk4f& f) { return SkNx_shuffle<3,3,3,3> (f); } -static inline Sk8f alphas(const Sk8f& f) { return SkNx_shuffle<3,3,3,3,7,7,7,7>(f); } XFERMODE(ColorDodge) { auto sa = alphas(s), da = alphas(d), - isa = T(1)-sa, - ida = T(1)-da; + isa = Sk4f(1)-sa, + ida = Sk4f(1)-da; auto srcover = s + d*isa, dstover = d + s*ida, - otherwise = sa * T::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa; + otherwise = sa * Sk4f::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa; // Order matters here, preferring d==0 over s==sa. - auto colors = (d == 0).thenElse(dstover, - (s == sa).thenElse(srcover, - otherwise)); + auto colors = (d == Sk4f(0)).thenElse(dstover, + (s == sa).thenElse(srcover, + otherwise)); return a_rgb(srcover, colors); } XFERMODE(ColorBurn) { auto sa = alphas(s), da = alphas(d), - isa = T(1)-sa, - ida = T(1)-da; + isa = Sk4f(1)-sa, + ida = Sk4f(1)-da; auto srcover = s + d*isa, dstover = d + s*ida, - otherwise = sa*(da-T::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa; + otherwise = sa*(da-Sk4f::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa; // Order matters here, preferring d==da over s==0. - auto colors = (d == da).thenElse(dstover, - (s == 0).thenElse(srcover, - otherwise)); + auto colors = (d == da).thenElse(dstover, + (s == Sk4f(0)).thenElse(srcover, + otherwise)); return a_rgb(srcover, colors); } XFERMODE(SoftLight) { auto sa = alphas(s), da = alphas(d), - isa = T(1)-sa, - ida = T(1)-da; + isa = Sk4f(1)-sa, + ida = Sk4f(1)-da; // Some common terms. - auto m = (da > 0).thenElse(d / da, 0), - s2 = s*2, - m4 = m*4; + auto m = (da > Sk4f(0)).thenElse(d / da, Sk4f(0)), + s2 = Sk4f(2)*s, + m4 = Sk4f(4)*m; // The logic forks three ways: // 1. dark src? // 2. light src, dark dst? // 3. light src, light dst? - auto darkSrc = d*(sa + (s2 - sa)*(T(1) - m)), // Used in case 1. - darkDst = (m4*m4 + m4)*(m - 1) + m*7, // Used in case 2. - liteDst = m.sqrt() - m, // Used in case 3. - liteSrc = d*sa + da*(s2-sa)*(d*4 <= da).thenElse(darkDst, liteDst); // Case 2 or 3? + auto darkSrc = d*(sa + (s2 - sa)*(Sk4f(1) - m)), // Used in case 1. + darkDst = (m4*m4 + m4)*(m - Sk4f(1)) + Sk4f(7)*m, // Used in case 2. + liteDst = m.sqrt() - m, // Used in case 3. + liteSrc = d*sa + da*(s2-sa)*(Sk4f(4)*d <= da).thenElse(darkDst, liteDst); // Case 2 or 3? auto alpha = s + d*isa; - auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3? + auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3? return a_rgb(alpha, colors); } @@ -245,52 +240,53 @@ private: typedef SkProcCoeffXfermode INHERITED; }; -template -class FloatXfermode : public SkProcCoeffXfermode { +class Sk4fXfermode : public SkProcCoeffXfermode { public: - FloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) - : INHERITED(rec, mode) {} + typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f); + Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf) + : INHERITED(rec, mode) + , fProcF(procf) {} void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override { - BlendFn blend; - while (n >= 2) { - auto d = Sk8f::FromBytes((const uint8_t*)dst) * (1.0f/255), - s = Sk8f::FromBytes((const uint8_t*)src) * (1.0f/255), - b = blend(d, s); - if (aa) { - auto a255 = Sk8f(aa[0],aa[0],aa[0],aa[0], aa[1],aa[1],aa[1],aa[1]); - (b*a255 + d*(Sk8f(255)-a255) + 0.5).toBytes((uint8_t*)dst); - aa += 2; - } else { - (b * 255 + 0.5).toBytes((uint8_t*)dst); - } - dst += 2; - src += 2; - n -= 2; - } - if (n) { - auto d = Sk4f::FromBytes((const uint8_t*)dst) * (1.0f/255), - s = Sk4f::FromBytes((const uint8_t*)src) * (1.0f/255), - b = blend(d, s); - if (aa) { - auto a255 = Sk4f(aa[0],aa[0],aa[0],aa[0]); - (b*a255 + d*(Sk4f(255)-a255) + 0.5).toBytes((uint8_t*)dst); - aa++; - } else { - (b * 255 + 0.5).toBytes((uint8_t*)dst); - } + for (int i = 0; i < n; i++) { + dst[i] = aa ? this->xfer32(dst[i], src[i], aa[i]) + : this->xfer32(dst[i], src[i]); } } void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override { for (int i = 0; i < n; i++) { - SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); // Convert dst up to 8888. - this->xfer32(&dst32, src+i, 1, aa ? aa+i : nullptr); // Blend 1 pixel. - dst[i] = SkPixel32ToPixel16(dst32); // Repack dst to 565 and store. + SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); + dst32 = aa ? this->xfer32(dst32, src[i], aa[i]) + : this->xfer32(dst32, src[i]); + dst[i] = SkPixel32ToPixel16(dst32); } } private: + static Sk4f Load(SkPMColor c) { + return Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255); + } + static SkPMColor Round(const Sk4f& f) { + SkPMColor c; + (f * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c); + return c; + } + inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const { + return Round(fProcF(Load(dst), Load(src))); + } + + inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const { + Sk4f s(Load(src)), + d(Load(dst)), + b(fProcF(d,s)); + // We do aa in full float precision before going back down to bytes, because we can! + Sk4f a = Sk4f(aa) * Sk4f(1.0f/255); + b = b*a + d*(Sk4f(1)-a); + return Round(b); + } + + ProcF fProcF; typedef SkProcCoeffXfermode INHERITED; }; @@ -327,7 +323,7 @@ static SkXfermode* create_xfermode(const #undef CASE #define CASE(Mode) \ - case SkXfermode::k##Mode##_Mode: return new FloatXfermode(rec, mode) + case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode) CASE(ColorDodge); CASE(ColorBurn); CASE(SoftLight);