Differential D7314 Diff 18183 libs/pigment/compositeops/KoOptimizedCompositeOpOver32.h

Changeset View

Standalone View

libs/pigment/compositeops/KoOptimizedCompositeOpOver32.h

Show All 20 Lines
21	#ifndef KOOPTIMIZEDCOMPOSITEOPOVER32_H_	21		#ifndef KOOPTIMIZEDCOMPOSITEOPOVER32_H_
22	#define KOOPTIMIZEDCOMPOSITEOPOVER32_H_	22		#define KOOPTIMIZEDCOMPOSITEOPOVER32_H_
23		23
24	#include "KoCompositeOpBase.h"	24		#include "KoCompositeOpBase.h"
25	#include "KoCompositeOpRegistry.h"	25		#include "KoCompositeOpRegistry.h"
26	#include "KoStreamedMath.h"	26		#include "KoStreamedMath.h"
27		27
28		28
		29		template<Vc::Implementation _impl>
		30		struct OptiDiv {
		31		static ALWAYS_INLINE float divScalar(const float& divident, const float& divisor) {
		32		#ifdef __SSE__
		33		float result;
		34
		35		__m128 x = _mm_set_ss(divisor);
		36		__m128 y = _mm_set_ss(divident);
		37		x = _mm_rcp_ss(x);
		38		x = _mm_mul_ss(x, y);
		39
		40
		41		_mm_store_ss(&result, x);
		42		return result;
		43		#else
		44		return divident / divisor;
		45		#endif
		46
		47		}
		48
		49		static ALWAYS_INLINE Vc::float_v divVector(Vc::float_v::AsArg divident, Vc::float_v::AsArg divisor) {
		50		#ifdef __SSE__
		51		return divident * Vc::reciprocal(divisor);
		52		#else
		53		return divident / divisor;
			dkazakovUnsubmitted Not Done In case SSE is not available we should use normal division, like `divident / divisor`, not `1.0 / divisor`. It is just faster. dkazakov: In case SSE is not available we should use normal division, like `divident / divisor`, not `1.0…
		54		#endif
		55
		56		}
		57
		58		};
		59
		60
29	template<typename channels_type, typename pixel_type, bool alphaLocked, bool allChannelsFlag>	61		template<typename channels_type, typename pixel_type, bool alphaLocked, bool allChannelsFlag>
30	struct OverCompositor32 {	62		struct OverCompositor32 {
31	struct OptionalParams {	63		struct OptionalParams {
32	OptionalParams(const KoCompositeOp::ParameterInfo& params)	64		OptionalParams(const KoCompositeOp::ParameterInfo& params)
33	: channelFlags(params.channelFlags)	65		: channelFlags(params.channelFlags)
34	{	66		{
35	}	67		}
36	const QBitArray &channelFlags;	68		const QBitArray &channelFlags;
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Line(s)		74	{
92	} else {	124		} else {
93	/**	125		/**
94	* The value of new_alpha can have some zero values,	126		* The value of new_alpha can have some zero values,
95	* which will result in NaN values while division. But	127		* which will result in NaN values while division. But
96	* when converted to integers these NaN values will	128		* when converted to integers these NaN values will
97	* be converted to zeroes, which is exactly what we need	129		* be converted to zeroes, which is exactly what we need
98	*/	130		*/
99	new_alpha = dst_alpha + (uint8Max - dst_alpha) * src_alpha * uint8MaxRec1;	131		new_alpha = dst_alpha + (uint8Max - dst_alpha) * src_alpha * uint8MaxRec1;
100	src_blend = src_alpha / new_alpha;	132
		133		// Optimized version of:
		134		// src_blend = src_alpha / new_alpha;
		135		src_blend = OptiDiv<_impl>::divVector(src_alpha, new_alpha);
		136
101	}	137		}
102		138
103	if (!(src_blend == oneValue).isFull()) {	139		if (!(src_blend == oneValue).isFull()) {
104	KoStreamedMath<_impl>::template fetch_colors_32<true>(dst, dst_c1, dst_c2, dst_c3);	140		KoStreamedMath<_impl>::template fetch_colors_32<true>(dst, dst_c1, dst_c2, dst_c3);
105		141
106	dst_c1 = src_blend * (src_c1 - dst_c1) + dst_c1;	142		dst_c1 = src_blend * (src_c1 - dst_c1) + dst_c1;
107	dst_c2 = src_blend * (src_c2 - dst_c2) + dst_c2;	143		dst_c2 = src_blend * (src_c2 - dst_c2) + dst_c2;
108	dst_c3 = src_blend * (src_c3 - dst_c3) + dst_c3;	144		dst_c3 = src_blend * (src_c3 - dst_c3) + dst_c3;
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Line(s)		185	} else if (dstAlpha == 0.0) {
151	srcBlendNorm = 1.0;	187		srcBlendNorm = 1.0;
152		188
153	if (!allChannelsFlag) {	189		if (!allChannelsFlag) {
154	pixel_type d = reinterpret_cast<pixel_type>(dst);	190		pixel_type d = reinterpret_cast<pixel_type>(dst);
155	*d = 0; // dstAlpha is already null	191		*d = 0; // dstAlpha is already null
156	}	192		}
157	} else {	193		} else {
158	dstAlpha += (uint8Max - dstAlpha) * srcAlpha * uint8Rec1;	194		dstAlpha += (uint8Max - dstAlpha) * srcAlpha * uint8Rec1;
159	srcBlendNorm = srcAlpha / dstAlpha;	195		// Optimized version of:
		196		// srcBlendNorm = srcAlpha / dstAlpha);
		197		srcBlendNorm = OptiDiv<_impl>::divScalar(srcAlpha, dstAlpha);
		198
160	}	199		}
161		200
162	if(allChannelsFlag) {	201		if(allChannelsFlag) {
163	if (srcBlendNorm == 1.0) {	202		if (srcBlendNorm == 1.0) {
164	if (!alphaLocked) {	203		if (!alphaLocked) {
165	const pixel_type s = reinterpret_cast<const pixel_type>(src);	204		const pixel_type s = reinterpret_cast<const pixel_type>(src);
166	pixel_type d = reinterpret_cast<pixel_type>(dst);	205		pixel_type d = reinterpret_cast<pixel_type>(dst);
167	d = s;	206		d = s;
▲ Show 20 Lines • Show All 81 Lines • Show Last 20 Lines