diff --git a/benchmarks/kis_composition_benchmark.h b/benchmarks/kis_composition_benchmark.h --- a/benchmarks/kis_composition_benchmark.h +++ b/benchmarks/kis_composition_benchmark.h @@ -30,12 +30,18 @@ void checkRoundingAlphaDarken_05_07(); void checkRoundingAlphaDarken_05_10(); void checkRoundingAlphaDarken_05_10_08(); + void checkRoundingAlphaDarkenF32_05_03(); + void checkRoundingAlphaDarkenF32_05_05(); + void checkRoundingAlphaDarkenF32_05_07(); + void checkRoundingAlphaDarkenF32_05_10(); + void checkRoundingAlphaDarkenF32_05_10_08(); void checkRoundingOver(); void checkRoundingOverRgbaF32(); void compareAlphaDarkenOps(); void compareAlphaDarkenOpsNoMask(); + void compareRgbF32AlphaDarkenOps(); void compareOverOps(); void compareOverOpsNoMask(); void compareRgbF32OverOps(); @@ -46,6 +52,9 @@ void testRgb8CompositeOverLegacy(); void testRgb8CompositeOverOptimized(); + void testRgbF32CompositeAlphaDarkenLegacy(); + void testRgbF32CompositeAlphaDarkenOptimized(); + void testRgbF32CompositeOverLegacy(); void testRgbF32CompositeOverOptimized(); diff --git a/benchmarks/kis_composition_benchmark.cpp b/benchmarks/kis_composition_benchmark.cpp --- a/benchmarks/kis_composition_benchmark.cpp +++ b/benchmarks/kis_composition_benchmark.cpp @@ -355,7 +355,7 @@ compareResult = compareTwoOpsPixels(tiles, 10); } else if (pixelSize == 16) { - compareResult = compareTwoOpsPixels(tiles, 0); + compareResult = compareTwoOpsPixels(tiles, 2e-7); } else { qFatal("Pixel size %i is not implemented", pixelSize); @@ -589,6 +589,41 @@ #endif } +void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_03() +{ +#ifdef HAVE_VC + checkRounding >(0.5, 0.3, -1, 16); +#endif +} + +void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_05() +{ +#ifdef HAVE_VC + checkRounding >(0.5, 0.5, -1, 16); +#endif +} + +void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_07() +{ +#ifdef HAVE_VC + checkRounding >(0.5, 0.7, -1, 16); +#endif +} + +void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_10() +{ +#ifdef HAVE_VC + checkRounding >(0.5, 1.0, -1, 16); +#endif +} + +void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_10_08() +{ +#ifdef HAVE_VC + checkRounding >(0.5, 1.0, 0.8, 16); +#endif +} + void KisCompositionBenchmark::checkRoundingOver() { #ifdef HAVE_VC @@ -615,6 +650,18 @@ delete opAct; } +void KisCompositionBenchmark::compareRgbF32AlphaDarkenOps() +{ + const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); + KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOp128(cs); + KoCompositeOp *opExp = new KoCompositeOpAlphaDarken(cs); + + QVERIFY(compareTwoOps(true, opAct, opExp)); + + delete opExp; + delete opAct; +} + void KisCompositionBenchmark::compareAlphaDarkenOpsNoMask() { const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); @@ -695,6 +742,22 @@ delete op; } +void KisCompositionBenchmark::testRgbF32CompositeAlphaDarkenLegacy() +{ + const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); + KoCompositeOp *op = new KoCompositeOpAlphaDarken(cs); + benchmarkCompositeOp(op, "Legacy"); + delete op; +} + +void KisCompositionBenchmark::testRgbF32CompositeAlphaDarkenOptimized() +{ + const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); + KoCompositeOp *op = KoOptimizedCompositeOpFactory::createAlphaDarkenOp128(cs); + benchmarkCompositeOp(op, "Optimized"); + delete op; +} + void KisCompositionBenchmark::testRgbF32CompositeOverLegacy() { const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); diff --git a/libs/pigment/compositeops/KoOptimizedCompositeOpAlphaDarken128.h b/libs/pigment/compositeops/KoOptimizedCompositeOpAlphaDarken128.h new file mode 100644 --- /dev/null +++ b/libs/pigment/compositeops/KoOptimizedCompositeOpAlphaDarken128.h @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2016 Thorsten Zachmann + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef KOOPTIMIZEDCOMPOSITEOPALPHADARKEN128_H +#define KOOPTIMIZEDCOMPOSITEOPALPHADARKEN128_H + +#include "KoCompositeOpBase.h" +#include "KoCompositeOpRegistry.h" +#include "KoStreamedMath.h" + +template +struct AlphaDarkenCompositor128 { + struct OptionalParams { + OptionalParams(const KoCompositeOp::ParameterInfo& params) + : flow(params.flow) + , averageOpacity(*params.lastOpacity * params.flow) + , premultipliedOpacity(params.opacity * params.flow) + { + } + float flow; + float averageOpacity; + float premultipliedOpacity; + }; + + struct Pixel { + channels_type red; + channels_type green; + channels_type blue; + channels_type alpha; + }; + + /** + * This is a vector equivalent of compositeOnePixelScalar(). It is considered + * to process Vc::float_v::Size pixels in a single pass. + * + * o the \p haveMask parameter points whether the real (non-null) mask + * pointer is passed to the function. + * o the \p src pointer may be aligned to vector boundary or may be + * not. In case not, it must be pointed with a special parameter + * \p src_aligned. + * o the \p dst pointer must always(!) be aligned to the boundary + * of a streaming vector. Unaligned writes are really expensive. + * o This function is *never* used if HAVE_VC is not present + */ + template + static ALWAYS_INLINE void compositeVector(const quint8 *src, quint8 *dst, const quint8 *mask, float opacity, const OptionalParams &oparams) + { + const Pixel *sp = reinterpret_cast(src); + Pixel *dp = reinterpret_cast(dst); + + Vc::float_v src_c1; + Vc::float_v src_c2; + Vc::float_v src_c3; + Vc::float_v src_alpha; + + const Vc::float_v::IndexType indexes(Vc::IndexesFromZero); + Vc::InterleavedMemoryWrapper data(const_cast(sp)); + (src_c1, src_c2, src_c3, src_alpha) = data[indexes]; + + Vc::float_v msk_norm_alpha; + if (haveMask) { + const Vc::float_v uint8Rec1((float)1.0 / 255.0); + Vc::float_v mask_vec = KoStreamedMath<_impl>::fetch_mask_8(mask); + msk_norm_alpha = mask_vec * uint8Rec1 * src_alpha; + } + else { + msk_norm_alpha = src_alpha; + } + Vc::float_v opacity_vec(oparams.premultipliedOpacity); + + src_alpha = msk_norm_alpha * opacity_vec; + + const Vc::float_v zeroValue(KoColorSpaceMathsTraits::zeroValue); + + Vc::float_v dst_c1; + Vc::float_v dst_c2; + Vc::float_v dst_c3; + Vc::float_v dst_alpha; + + Vc::InterleavedMemoryWrapper dataDest(dp); + (dst_c1, dst_c2, dst_c3, dst_alpha) = dataDest[indexes]; + + Vc::float_m empty_dst_pixels_mask = dst_alpha == zeroValue; + + if (!empty_dst_pixels_mask.isFull()) { + if (empty_dst_pixels_mask.isEmpty()) { + dst_c1 = (src_c1 - dst_c1) * src_alpha + dst_c1; + dst_c2 = (src_c2 - dst_c2) * src_alpha + dst_c2; + dst_c3 = (src_c3 - dst_c3) * src_alpha + dst_c3; + } + else { + dst_c1(empty_dst_pixels_mask) = src_c1; + dst_c2(empty_dst_pixels_mask) = src_c2; + dst_c3(empty_dst_pixels_mask) = src_c3; + Vc::float_m not_empty_dst_pixels_mask = !empty_dst_pixels_mask; + dst_c1(not_empty_dst_pixels_mask) = (src_c1 - dst_c1) * src_alpha + dst_c1; + dst_c2(not_empty_dst_pixels_mask) = (src_c2 - dst_c2) * src_alpha + dst_c2; + dst_c3(not_empty_dst_pixels_mask) = (src_c3 - dst_c3) * src_alpha + dst_c3; + } + } + else { + dst_c1 = src_c1; + dst_c2 = src_c2; + dst_c3 = src_c3; + } + + Vc::float_v fullFlowAlpha(dst_alpha); + + if (oparams.averageOpacity > opacity) { + Vc::float_v average_opacity_vec(oparams.averageOpacity); + Vc::float_m fullFlowAlpha_mask = average_opacity_vec > dst_alpha; + fullFlowAlpha(fullFlowAlpha_mask) = (average_opacity_vec - src_alpha) * (dst_alpha / average_opacity_vec) + src_alpha; + } + else { + Vc::float_m fullFlowAlpha_mask = opacity_vec > dst_alpha; + fullFlowAlpha(fullFlowAlpha_mask) = (opacity_vec - dst_alpha) * msk_norm_alpha + dst_alpha; + } + + if (oparams.flow == 1.0) { + dst_alpha = fullFlowAlpha; + } + else { + Vc::float_v zeroFlowAlpha = src_alpha + dst_alpha - src_alpha * dst_alpha; + Vc::float_v flow_norm_vec(oparams.flow); + dst_alpha = (fullFlowAlpha - zeroFlowAlpha) * flow_norm_vec + zeroFlowAlpha; + } + dataDest[indexes] = (dst_c1, dst_c2, dst_c3, dst_alpha); + } + + /** + * Composes one pixel of the source into the destination + */ + template + static ALWAYS_INLINE void compositeOnePixelScalar(const quint8 *s, quint8 *d, const quint8 *mask, float opacity, const OptionalParams &oparams) + { + using namespace Arithmetic; + const qint32 alpha_pos = 3; + + const channels_type *src = reinterpret_cast(s); + channels_type *dst = reinterpret_cast(d); + + float dstAlphaNorm = dst[alpha_pos]; + + const float uint8Rec1 = 1.0 / 255.0; + float mskAlphaNorm = haveMask ? float(*mask) * uint8Rec1 * src[alpha_pos] : src[alpha_pos]; + + opacity = oparams.premultipliedOpacity; + + float srcAlphaNorm = mskAlphaNorm * opacity; + + if (dstAlphaNorm != 0) { + dst[0] = lerp(dst[0], src[0], srcAlphaNorm); + dst[1] = lerp(dst[1], src[1], srcAlphaNorm); + dst[2] = lerp(dst[2], src[2], srcAlphaNorm); + } else { + const pixel_type *s = reinterpret_cast(src); + pixel_type *d = reinterpret_cast(dst); + *d = *s; + } + + float flow = oparams.flow; + float averageOpacity = oparams.averageOpacity; + + float fullFlowAlpha; + + if (averageOpacity > opacity) { + fullFlowAlpha = averageOpacity > dstAlphaNorm ? lerp(srcAlphaNorm, averageOpacity, dstAlphaNorm / averageOpacity) : dstAlphaNorm; + } else { + fullFlowAlpha = opacity > dstAlphaNorm ? lerp(dstAlphaNorm, opacity, mskAlphaNorm) : dstAlphaNorm; + } + + if (flow == 1.0) { + dst[alpha_pos] = fullFlowAlpha; + } else { + float zeroFlowAlpha = unionShapeOpacity(srcAlphaNorm, dstAlphaNorm); + dst[alpha_pos] = lerp(zeroFlowAlpha, fullFlowAlpha, flow); + } + } +}; + +/** + * An optimized version of a composite op for the use in 16 byte + * colorspaces with alpha channel placed at the last byte of + * the pixel: C1_C2_C3_A. + */ +template +class KoOptimizedCompositeOpAlphaDarken128 : public KoCompositeOp +{ +public: + KoOptimizedCompositeOpAlphaDarken128(const KoColorSpace* cs) + : KoCompositeOp(cs, COMPOSITE_ALPHA_DARKEN, i18n("Alpha darken"), KoCompositeOp::categoryMix()) {} + + using KoCompositeOp::composite; + + virtual void composite(const KoCompositeOp::ParameterInfo& params) const + { + if(params.maskRowStart) { + KoStreamedMath<_impl>::template genericComposite128 >(params); + } else { + KoStreamedMath<_impl>::template genericComposite128 >(params); + } + } +}; + +#endif // KOOPTIMIZEDCOMPOSITEOPALPHADARKEN128_H diff --git a/libs/pigment/compositeops/KoOptimizedCompositeOpFactory.h b/libs/pigment/compositeops/KoOptimizedCompositeOpFactory.h --- a/libs/pigment/compositeops/KoOptimizedCompositeOpFactory.h +++ b/libs/pigment/compositeops/KoOptimizedCompositeOpFactory.h @@ -42,6 +42,7 @@ public: static KoCompositeOp* createAlphaDarkenOp32(const KoColorSpace *cs); static KoCompositeOp* createOverOp32(const KoColorSpace *cs); + static KoCompositeOp* createAlphaDarkenOp128(const KoColorSpace *cs); static KoCompositeOp* createOverOp128(const KoColorSpace *cs); }; diff --git a/libs/pigment/compositeops/KoOptimizedCompositeOpFactory.cpp b/libs/pigment/compositeops/KoOptimizedCompositeOpFactory.cpp --- a/libs/pigment/compositeops/KoOptimizedCompositeOpFactory.cpp +++ b/libs/pigment/compositeops/KoOptimizedCompositeOpFactory.cpp @@ -42,6 +42,11 @@ return createOptimizedClass >(cs); } +KoCompositeOp* KoOptimizedCompositeOpFactory::createAlphaDarkenOp128(const KoColorSpace *cs) +{ + return createOptimizedClass >(cs); +} + KoCompositeOp* KoOptimizedCompositeOpFactory::createOverOp128(const KoColorSpace *cs) { return createOptimizedClass >(cs); diff --git a/libs/pigment/compositeops/KoOptimizedCompositeOpFactoryPerArch.h b/libs/pigment/compositeops/KoOptimizedCompositeOpFactoryPerArch.h --- a/libs/pigment/compositeops/KoOptimizedCompositeOpFactoryPerArch.h +++ b/libs/pigment/compositeops/KoOptimizedCompositeOpFactoryPerArch.h @@ -35,6 +35,9 @@ class KoOptimizedCompositeOpOver32; template +class KoOptimizedCompositeOpAlphaDarken128; + +template class KoOptimizedCompositeOpOver128; template class CompositeOp> diff --git a/libs/pigment/compositeops/KoOptimizedCompositeOpFactoryPerArch.cpp b/libs/pigment/compositeops/KoOptimizedCompositeOpFactoryPerArch.cpp --- a/libs/pigment/compositeops/KoOptimizedCompositeOpFactoryPerArch.cpp +++ b/libs/pigment/compositeops/KoOptimizedCompositeOpFactoryPerArch.cpp @@ -23,6 +23,7 @@ #include "KoOptimizedCompositeOpFactoryPerArch.h" #include "KoOptimizedCompositeOpAlphaDarken32.h" +#include "KoOptimizedCompositeOpAlphaDarken128.h" #include "KoOptimizedCompositeOpOver32.h" #include "KoOptimizedCompositeOpOver128.h" @@ -53,6 +54,14 @@ template<> template<> +KoOptimizedCompositeOpFactoryPerArch::ReturnType +KoOptimizedCompositeOpFactoryPerArch::create(ParamType param) +{ + return new KoOptimizedCompositeOpAlphaDarken128(param); +} + +template<> +template<> KoOptimizedCompositeOpFactoryPerArch::ReturnType KoOptimizedCompositeOpFactoryPerArch::create(ParamType param) {