diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -13,7 +13,7 @@ set(LINK_VC_LIB) if(HAVE_VC) include_directories(${Vc_INCLUDE_DIR}) -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Vc_DEFINITIONS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Vc_DEFINITIONS}") set(LINK_VC_LIB ${Vc_LIBRARIES}) endif() @@ -38,7 +38,7 @@ set(kis_low_memory_benchmark_SRCS kis_low_memory_benchmark.cpp) set(kis_filter_selections_benchmark_SRCS kis_filter_selections_benchmark.cpp) if (UNIX) - #set(kis_composition_benchmark_SRCS kis_composition_benchmark.cpp) + set(kis_composition_benchmark_SRCS kis_composition_benchmark.cpp) endif() set(kis_thumbnail_benchmark_SRCS kis_thumbnail_benchmark.cpp) @@ -59,7 +59,7 @@ krita_add_benchmark(KisLowMemoryBenchmark TESTNAME krita-benchmarks-KisLowMemory ${kis_low_memory_benchmark_SRCS}) krita_add_benchmark(KisFilterSelectionsBenchmark TESTNAME krita-image-KisFilterSelectionsBenchmark ${kis_filter_selections_benchmark_SRCS}) if(UNIX) - #krita_add_benchmark(KisCompositionBenchmark TESTNAME krita-benchmarks-KisComposition ${kis_composition_benchmark_SRCS}) + krita_add_benchmark(KisCompositionBenchmark TESTNAME krita-benchmarks-KisComposition ${kis_composition_benchmark_SRCS}) endif() krita_add_benchmark(KisThumbnailBenchmark TESTNAME krita-benchmarks-KisThumbnail ${kis_thumbnail_benchmark_SRCS}) @@ -80,10 +80,10 @@ target_link_libraries(KisFilterSelectionsBenchmark kritaimage Qt5::Test) if(UNIX) - #target_link_libraries(KisCompositionBenchmark kritaimage Qt5::Test ${LINK_VC_LIB}) - #if(HAVE_VC) - # set_property(TARGET KisCompositionBenchmark APPEND PROPERTY COMPILE_OPTIONS "${Vc_ARCHITECTURE_FLAGS}") - #endif() + target_link_libraries(KisCompositionBenchmark kritaimage Qt5::Test ${LINK_VC_LIB}) + if(HAVE_VC) + set_property(TARGET KisCompositionBenchmark APPEND PROPERTY COMPILE_OPTIONS "${Vc_ARCHITECTURE_FLAGS}") + endif() endif() target_link_libraries(KisMaskGeneratorBenchmark kritaimage Qt5::Test) target_link_libraries(KisThumbnailBenchmark kritaimage Qt5::Test) diff --git a/libs/pigment/compositeops/KoOptimizedCompositeOpOver32.h b/libs/pigment/compositeops/KoOptimizedCompositeOpOver32.h --- a/libs/pigment/compositeops/KoOptimizedCompositeOpOver32.h +++ b/libs/pigment/compositeops/KoOptimizedCompositeOpOver32.h @@ -26,6 +26,38 @@ #include "KoStreamedMath.h" +template +struct OptiDiv { + static ALWAYS_INLINE float divScalar(const float& divident, const float& divisor) { +#ifdef __SSE__ + float result; + + __m128 x = _mm_set_ss(divisor); + __m128 y = _mm_set_ss(divident); + x = _mm_rcp_ss(x); + x = _mm_mul_ss(x, y); + + + _mm_store_ss(&result, x); + return result; +#else + return divident / divisor; +#endif + + } + + static ALWAYS_INLINE Vc::float_v divVector(Vc::float_v::AsArg divident, Vc::float_v::AsArg divisor) { +#ifdef __SSE__ + return divident * Vc::reciprocal(divisor); +#else + return divident / divisor; +#endif + + } + +}; + + template struct OverCompositor32 { struct OptionalParams { @@ -97,7 +129,11 @@ * be converted to zeroes, which is exactly what we need */ new_alpha = dst_alpha + (uint8Max - dst_alpha) * src_alpha * uint8MaxRec1; - src_blend = src_alpha / new_alpha; + + // Optimized version of: + // src_blend = src_alpha / new_alpha; + src_blend = OptiDiv<_impl>::divVector(src_alpha, new_alpha); + } if (!(src_blend == oneValue).isFull()) { @@ -156,7 +192,10 @@ } } else { dstAlpha += (uint8Max - dstAlpha) * srcAlpha * uint8Rec1; - srcBlendNorm = srcAlpha / dstAlpha; + // Optimized version of: + // srcBlendNorm = srcAlpha / dstAlpha); + srcBlendNorm = OptiDiv<_impl>::divScalar(srcAlpha, dstAlpha); + } if(allChannelsFlag) {