Paste P178

[GSoC] Krita AVX optimization for Gauss mask generator. dummy implementation.
ActivePublic

Authored by vanyossi on Mar 17 2018, 5:11 AM.
diff --git a/libs/image/kis_brush_mask_applicator_factories.cpp b/libs/image/kis_brush_mask_applicator_factories.cpp
index 2c54a61ef8..4a19684cdf 100644
--- a/libs/image/kis_brush_mask_applicator_factories.cpp
+++ b/libs/image/kis_brush_mask_applicator_factories.cpp
@@ -20,6 +20,7 @@
#include "kis_circle_mask_generator.h"
#include "kis_circle_mask_generator_p.h"
+#include "kis_gauss_circle_mask_generator_p.h"
#include "kis_brush_mask_applicators.h"
#include "kis_brush_mask_applicator_base.h"
@@ -42,6 +43,14 @@ MaskApplicatorFactory<KisCircleMaskGenerator, KisBrushMaskVectorApplicator>::cre
return new KisBrushMaskVectorApplicator<KisCircleMaskGenerator,Vc::CurrentImplementation::current()>(maskGenerator);
}
+template<>
+template<>
+MaskApplicatorFactory<KisGaussCircleMaskGenerator, KisBrushMaskVectorApplicator>::ReturnType
+MaskApplicatorFactory<KisGaussCircleMaskGenerator, KisBrushMaskVectorApplicator>::create<Vc::CurrentImplementation::current()>(ParamType maskGenerator)
+{
+ return new KisBrushMaskVectorApplicator<KisGaussCircleMaskGenerator,Vc::CurrentImplementation::current()>(maskGenerator);
+}
+
#if defined HAVE_VC
struct KisCircleMaskGenerator::FastRowProcessor
@@ -134,4 +143,84 @@ FastRowProcessor::process<Vc::CurrentImplementation::current()>(float* buffer, i
}
}
+
+struct KisGaussCircleMaskGenerator::FastRowProcessor
+{
+ FastRowProcessor(KisGaussCircleMaskGenerator *maskGenerator)
+ : d(maskGenerator->d.data()) {}
+
+ template<Vc::Implementation _impl>
+ void process(float* buffer, int width, float y, float cosa, float sina,
+ float centerX, float centerY);
+
+ KisGaussCircleMaskGenerator::Private *d;
+};
+
+template<> void KisGaussCircleMaskGenerator::
+FastRowProcessor::process<Vc::CurrentImplementation::current()>(float* buffer, int width, float y, float cosa, float sina,
+ float centerX, float centerY)
+{
+ float widthHalf = width/2;
+
+ float y_ = (y - widthHalf) / width;
+ float sinay_ = sina * y_;
+ float cosay_ = cosa * y_;
+
+ float gStd = .1515 * (d->fade + .30);
+ float gDen = 1/(2*3.1415 * pow2(gStd));
+ float gExpDen = 2 * pow2(gStd);
+
+ float gMax = gDen * std::exp( -0.02/ gExpDen);
+ float gMin = gDen * std::exp( -0.5/ gExpDen);
+
+
+
+ float* bufferPointer = buffer;
+
+ Vc::float_v currentIndices = Vc::float_v::IndexesFromZero() - widthHalf;
+
+ Vc::float_v increment((float)Vc::float_v::size());
+ Vc::float_v vCenterX(centerX);
+
+ Vc::float_v vCosa(cosa);
+ Vc::float_v vSina(sina);
+ Vc::float_v vCosaY_(cosay_);
+ Vc::float_v vSinaY_(sinay_);
+
+ Vc::float_v vYCoeff(d->ycoef);
+
+ Vc::float_v vOne(Vc::One);
+ Vc::float_v vZero(Vc::Zero);
+
+ for (int i=0; i < width; i+= Vc::float_v::size()){
+ Vc::float_v x_ = currentIndices / width;
+
+
+ Vc::float_v xr_ = -(pow2(x_) + pow2(y_)) / gExpDen;
+ Vc::float_v n = gDen * xr_.apply([](float f) { return std::exp(f); });
+ Vc::float_v vNormFade = -(n - gMin)/(gMax - gMin) + 1.1f;
+
+ Vc::float_m outsideMask = vNormFade > vOne;
+
+ if (!outsideMask.isFull()) {
+
+ Vc::float_v vFade = vNormFade;
+
+ Vc::float_m mask = vNormFade < vZero;
+ vFade.setZero(mask);
+
+ // Mask out the outer circe of the mask
+ vFade(outsideMask) = vOne;
+
+ vFade.store(bufferPointer, Vc::Aligned);
+ } else {
+ // Mask out everything outside the circle
+ vOne.store(bufferPointer, Vc::Aligned);
+ }
+ currentIndices = currentIndices + increment;
+
+ bufferPointer += Vc::float_v::size();
+ }
+}
+
#endif /* defined HAVE_VC */
diff --git a/libs/image/kis_gauss_circle_mask_generator.cpp b/libs/image/kis_gauss_circle_mask_generator.cpp
index 50977c95f6..155e41e47b 100644
--- a/libs/image/kis_gauss_circle_mask_generator.cpp
+++ b/libs/image/kis_gauss_circle_mask_generator.cpp
@@ -20,6 +20,25 @@
#include <compositeops/KoVcMultiArchBuildSupport.h> //MSVC requires that Vc come first
#include <cmath>
+#include <config-vc.h>
+#ifdef HAVE_VC
+#if defined(__clang__)
+#pragma GCC diagnostic ignored "-Wundef"
+#pragma GCC diagnostic ignored "-Wlocal-type-template-args"
+#endif
+#if defined _MSC_VER
+// Lets shut up the "possible loss of data" and "forcing value to bool 'true' or 'false'
+#pragma warning ( push )
+#pragma warning ( disable : 4244 )
+#pragma warning ( disable : 4800 )
+#endif
+#include <Vc/Vc>
+#include <Vc/IO>
+#if defined _MSC_VER
+#pragma warning ( pop )
+#endif
+#endif
+
#include <QDomDocument>
#include <QVector>
#include <QPointF>
@@ -29,8 +48,11 @@
#include "kis_fast_math.h"
#include "kis_base_mask_generator.h"
-#include "kis_gauss_circle_mask_generator.h"
#include "kis_antialiasing_fade_maker.h"
+#include "kis_brush_mask_applicator_factories.h"
+#include "kis_brush_mask_applicator_base.h"
+#include "kis_gauss_circle_mask_generator.h"
+#include "kis_gauss_circle_mask_generator_p.h"
#define M_SQRT_2 1.41421356237309504880
@@ -41,31 +63,6 @@
#endif
-struct Q_DECL_HIDDEN KisGaussCircleMaskGenerator::Private
-{
- Private(bool enableAntialiasing)
- : fadeMaker(*this, enableAntialiasing)
- {
- }
-
- Private(const Private &rhs)
- : ycoef(rhs.ycoef),
- fade(rhs.fade),
- center(rhs.center),
- distfactor(rhs.distfactor),
- alphafactor(rhs.alphafactor),
- fadeMaker(rhs.fadeMaker, *this)
- {
- }
-
- qreal ycoef;
- qreal fade;
- qreal center, distfactor, alphafactor;
- KisAntialiasingFadeMaker1D<Private> fadeMaker;
-
- inline quint8 value(qreal dist) const;
-};
-
KisGaussCircleMaskGenerator::KisGaussCircleMaskGenerator(qreal diameter, qreal ratio, qreal fh, qreal fv, int spikes, bool antialiasEdges)
: KisMaskGenerator(diameter, ratio, fh, fv, spikes, antialiasEdges, CIRCLE, GaussId),
d(new Private(antialiasEdges))
@@ -76,12 +73,16 @@ KisGaussCircleMaskGenerator::KisGaussCircleMaskGenerator(qreal diameter, qreal r
else if (d->fade == 1.0) d->fade = 1.0 - 1e-6; // would become undefined for fade == 0 or 1
d->center = (2.5 * (6761.0*d->fade-10000.0))/(M_SQRT_2*6761.0*d->fade);
d->alphafactor = 255.0 / (2.0 * erf(d->center));
+
+ d->applicator.reset(createOptimizedClass<MaskApplicatorFactory<KisGaussCircleMaskGenerator, KisBrushMaskVectorApplicator> >(this));
+
}
KisGaussCircleMaskGenerator::KisGaussCircleMaskGenerator(const KisGaussCircleMaskGenerator &rhs)
: KisMaskGenerator(rhs),
d(new Private(*rhs.d))
{
+ d->applicator.reset(createOptimizedClass<MaskApplicatorFactory<KisGaussCircleMaskGenerator, KisBrushMaskVectorApplicator> >(this));
}
KisMaskGenerator* KisGaussCircleMaskGenerator::clone() const
@@ -109,6 +110,21 @@ inline quint8 KisGaussCircleMaskGenerator::Private::value(qreal dist) const
return (quint8) 255 - ret;
}
+bool KisGaussCircleMaskGenerator::shouldSupersample() const
+{
+ return effectiveSrcWidth() < 10 || effectiveSrcHeight() < 10;
+}
+
+bool KisGaussCircleMaskGenerator::shouldVectorize() const
+{
+ return !shouldSupersample() && spikes() == 2;
+}
+
+KisBrushMaskApplicatorBase* KisGaussCircleMaskGenerator::applicator()
+{
+ return d->applicator.data();
+}
+
quint8 KisGaussCircleMaskGenerator::valueAt(qreal x, qreal y) const
{
if (isEmpty()) return 255;
diff --git a/libs/image/kis_gauss_circle_mask_generator.h b/libs/image/kis_gauss_circle_mask_generator.h
index ae6158c9ee..46632c47a1 100644
--- a/libs/image/kis_gauss_circle_mask_generator.h
+++ b/libs/image/kis_gauss_circle_mask_generator.h
@@ -20,16 +20,18 @@
#ifndef _KIS_GAUSS_MASK_GENERATOR_H_
#define _KIS_GAUSS_MASK_GENERATOR_H_
-#include <QScopedPointer>
#include "kritaimage_export.h"
+#include "kis_mask_generator.h"
+#include <QScopedPointer>
/**
* This mask generator uses a Gaussian-blurred circle
*/
class KRITAIMAGE_EXPORT KisGaussCircleMaskGenerator : public KisMaskGenerator
{
-
+public:
+ struct FastRowProcessor;
public:
KisGaussCircleMaskGenerator(qreal diameter, qreal ratio, qreal fh, qreal fv, int spikes, bool antialiasEdges);
@@ -41,6 +43,11 @@ public:
void setScale(qreal scaleX, qreal scaleY) override;
+ bool shouldSupersample() const override;
+
+ bool shouldVectorize() const override;
+ KisBrushMaskApplicatorBase* applicator() override;
+
private:
qreal norme(qreal a, qreal b) const {
diff --git a/libs/image/kis_gauss_circle_mask_generator_p.h b/libs/image/kis_gauss_circle_mask_generator_p.h
new file mode 100644
index 0000000000..56d16d07b1
--- /dev/null
+++ b/libs/image/kis_gauss_circle_mask_generator_p.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2008-2009 Cyrille Berger <cberger@cberger.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _KIS_GAUSS_MASK_GENERATOR_P_H_
+#define _KIS_GAUSS_MASK_GENERATOR_P_H_
+
+#include "kis_antialiasing_fade_maker.h"
+
+struct Q_DECL_HIDDEN KisGaussCircleMaskGenerator::Private
+{
+ Private(bool enableAntialiasing)
+ : fadeMaker(*this, enableAntialiasing)
+ {
+ }
+
+ Private(const Private &rhs)
+ : ycoef(rhs.ycoef),
+ fade(rhs.fade),
+ center(rhs.center),
+ distfactor(rhs.distfactor),
+ alphafactor(rhs.alphafactor),
+ fadeMaker(rhs.fadeMaker, *this)
+ {
+ }
+
+ qreal ycoef;
+ qreal fade;
+ qreal center, distfactor, alphafactor;
+ KisAntialiasingFadeMaker1D<Private> fadeMaker;
+
+ QScopedPointer<KisBrushMaskApplicatorBase> applicator;
+
+ inline quint8 value(qreal dist) const;
+};
+
+#endif /* _KIS_GAUSS_MASK_GENERATOR_P_H_ */
vanyossi created this paste.Mar 17 2018, 5:11 AM
vanyossi created this object in space S1 KDE Community.
vanyossi created this object with edit policy "Administrators".