diff --git a/libs/image/kis_convolution_worker_fft.h b/libs/image/kis_convolution_worker_fft.h --- a/libs/image/kis_convolution_worker_fft.h +++ b/libs/image/kis_convolution_worker_fft.h @@ -229,38 +229,48 @@ rect.x(), rect.y(), rect.width(), dataRect); - QVector channelPtr(info.numChannels()); - - for (int k = 0; k < channelPtr.size(); ++k) { - channelPtr[k] = (double*)m_channelFFT[k]; + const int channelCount = info.numChannels(); + QVector channelPtr(channelCount); + const auto channelPtrBegin = channelPtr.begin(); + const auto channelPtrEnd = channelPtr.end(); + + auto iFFt = m_channelFFT.constBegin(); + for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++iFFt) { + *i = (double*)*iFFt; } + // prepare cache, reused in all loops + QVector cacheRowStart(channelCount); + const auto cacheRowStartBegin = cacheRowStart.begin(); + for (int y = 0; y < rect.height(); ++y) { - QVector cacheRowStart(channelPtr); + // cache current channelPtr in cacheRowStart + memcpy(cacheRowStart.data(), channelPtr.data(), channelCount * sizeof(double*)); for (int x = 0; x < rect.width(); ++x) { const quint8 *data = hitSrc->oldRawData(); // no alpha is a rare case, so just multiply by 1.0 in that case double alphaValue = info.alphaRealPos >= 0 ? info.toDoubleFuncPtr[info.alphaCachePos](data, info.alphaRealPos) : 1.0; - - for (int k = 0; k < channelPtr.size(); ++k) { + int k = 0; + for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++k) { if (k != info.alphaCachePos) { const quint32 channelPos = info.convChannelList[k]->pos(); - *channelPtr[k] = info.toDoubleFuncPtr[k](data, channelPos) * alphaValue; + **i = info.toDoubleFuncPtr[k](data, channelPos) * alphaValue; } else { - *channelPtr[k] = alphaValue; + **i = alphaValue; } - channelPtr[k]++; + ++(*i); } hitSrc->nextPixel(); } - for (int k = 0; k < channelPtr.size(); ++k) { - channelPtr[k] = cacheRowStart[k] + cacheRowStride; + auto iRowStart = cacheRowStartBegin; + for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++iRowStart) { + *i = *iRowStart + cacheRowStride; } hitSrc->nextRow(); @@ -280,21 +290,20 @@ template inline qreal writeOneChannelFromCache(quint8* dstPtr, const quint32 channel, - const int channelPos, const FFTInfo &info, - const QVector &channelPtr, + double* channelValuePtr, const qreal additionalMultiplier = 0.0) { qreal channelPixelValue; if (additionalMultiplierActive) { - channelPixelValue = (*(channelPtr[channel]) * info.fftScale + info.absoluteOffset[channel]) * additionalMultiplier; + channelPixelValue = (*channelValuePtr * info.fftScale + info.absoluteOffset[channel]) * additionalMultiplier; } else { - channelPixelValue = *(channelPtr[channel]) * info.fftScale + info.absoluteOffset[channel]; + channelPixelValue = *channelValuePtr * info.fftScale + info.absoluteOffset[channel]; } limitValue(&channelPixelValue, info.minClamp[channel], info.maxClamp[channel]); - info.fromDoubleFuncPtr[channel](dstPtr, channelPos, channelPixelValue); + info.fromDoubleFuncPtr[channel](dstPtr, info.convChannelList[channel]->pos(), channelPixelValue); return channelPixelValue; } @@ -313,66 +322,76 @@ int initialOffset = cacheRowStride * halfKernelHeight + halfKernelWidth; - QVector channelPtr(info.numChannels()); + const int channelCount = info.numChannels(); + QVector channelPtr(channelCount); + const auto channelPtrBegin = channelPtr.begin(); + const auto channelPtrEnd = channelPtr.end(); - for (int k = 0; k < channelPtr.size(); ++k) { - channelPtr[k] = (double*)m_channelFFT[k] + initialOffset; + auto iFFt = m_channelFFT.constBegin(); + for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++iFFt) { + *i = (double*)*iFFt + initialOffset; } + // prepare cache, reused in all loops + QVector cacheRowStart(channelCount); + const auto cacheRowStartBegin = cacheRowStart.begin(); + for (int y = 0; y < rect.height(); ++y) { - QVector cacheRowStart(channelPtr); + // cache current channelPtr in cacheRowStart + memcpy(cacheRowStart.data(), channelPtr.data(), channelCount * sizeof(double*)); for (int x = 0; x < rect.width(); ++x) { quint8 *dstPtr = hitDst->rawData(); if (info.alphaCachePos >= 0) { qreal alphaValue = writeOneChannelFromCache(dstPtr, info.alphaCachePos, - info.convChannelList[info.alphaCachePos]->pos(), info, - channelPtr); + channelPtr.at(info.alphaCachePos)); if (alphaValue > std::numeric_limits::epsilon()) { qreal alphaValueInv = 1.0 / alphaValue; - for (int k = 0; k < channelPtr.size(); ++k) { + int k = 0; + for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++k) { if (k != info.alphaCachePos) { writeOneChannelFromCache(dstPtr, k, - info.convChannelList[k]->pos(), info, - channelPtr, + *i, alphaValueInv); } - ++channelPtr[k]; + ++(*i); } } else { - for (int k = 0; k < channelPtr.size(); ++k) { + int k = 0; + for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++k) { if (k != info.alphaCachePos) { info.fromDoubleFuncPtr[k](dstPtr, info.convChannelList[k]->pos(), 0.0); } - ++channelPtr[k]; + ++(*i); } } } else { - for (int k = 0; k < channelPtr.size(); ++k) { + int k = 0; + for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++k) { writeOneChannelFromCache(dstPtr, k, - info.convChannelList[k]->pos(), info, - channelPtr); - ++channelPtr[k]; + *i); + ++(*i); } } hitDst->nextPixel(); } - for (int k = 0; k < channelPtr.size(); ++k) { - channelPtr[k] = cacheRowStart[k] + cacheRowStride; + auto iRowStart = cacheRowStartBegin; + for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++iRowStart) { + *i = *iRowStart + cacheRowStride; } hitDst->nextRow();