aboutsummaryrefslogtreecommitdiff
path: root/editors
diff options
context:
space:
mode:
authorLi-Wen Hsu <lwhsu@FreeBSD.org>2020-03-18 18:35:00 +0000
committerLi-Wen Hsu <lwhsu@FreeBSD.org>2020-03-18 18:35:00 +0000
commitf6b3a4cfee885779267ff2a2fdfbf02d0d035e87 (patch)
treec1ffb728cc502ee03cf18182d694429b1b11ce93 /editors
parent9fc461018a631476c5640890f35d237117f4bea0 (diff)
downloadports-f6b3a4cfee885779267ff2a2fdfbf02d0d035e87.tar.gz
ports-f6b3a4cfee885779267ff2a2fdfbf02d0d035e87.zip
Fix build with clang10
PR: 244850 Reported by: cy Submitted by: Trond.Endrestol@ximalas.info Obtained from: https://github.com/LibreOffice/core (partially)
Notes
Notes: svn path=/head/; revision=528660
Diffstat (limited to 'editors')
-rw-r--r--editors/libreoffice/Makefile2
-rw-r--r--editors/libreoffice/files/patch-clang10931
2 files changed, 932 insertions, 1 deletions
diff --git a/editors/libreoffice/Makefile b/editors/libreoffice/Makefile
index e7d8fe68a278..139552eae889 100644
--- a/editors/libreoffice/Makefile
+++ b/editors/libreoffice/Makefile
@@ -1,6 +1,6 @@
# $FreeBSD$
-PORTREVISION= 3
+PORTREVISION= 4
.include "${.CURDIR}/Makefile.common"
diff --git a/editors/libreoffice/files/patch-clang10 b/editors/libreoffice/files/patch-clang10
new file mode 100644
index 000000000000..ae24b2160029
--- /dev/null
+++ b/editors/libreoffice/files/patch-clang10
@@ -0,0 +1,931 @@
+--- vcl/inc/unx/saltype.h 2019-12-05 20:59:23.000000000 +0100
++++ vcl/inc/unx/saltype.h 2020-03-17 18:23:05.585171000 +0100
+@@ -18,8 +18,8 @@
+ public:
+ explicit SalX11Screen(unsigned int nXScreen) : mnXScreen( nXScreen ) {}
+ unsigned int getXScreen() const { return mnXScreen; }
+- bool operator==(const SalX11Screen &rOther) { return rOther.mnXScreen == mnXScreen; }
+- bool operator!=(const SalX11Screen &rOther) { return rOther.mnXScreen != mnXScreen; }
++ bool operator==(const SalX11Screen &rOther) const { return rOther.mnXScreen == mnXScreen; }
++ bool operator!=(const SalX11Screen &rOther) const { return rOther.mnXScreen != mnXScreen; }
+ };
+
+ #endif // INCLUDED_VCL_INC_UNX_SALTYPE_H
+--- sd/source/ui/framework/factories/BasicPaneFactory.cxx 2019-12-05 20:59:23.000000000 +0100
++++ sd/source/ui/framework/factories/BasicPaneFactory.cxx 2020-03-17 20:51:22.331805000 +0100
+@@ -324,7 +324,7 @@
+ void SAL_CALL BasicPaneFactory::disposing (
+ const lang::EventObject& rEventObject)
+ {
+- if (mxConfigurationControllerWeak == rEventObject.Source)
++ if (mxConfigurationControllerWeak.get() == rEventObject.Source)
+ {
+ mxConfigurationControllerWeak.clear();
+ }
+--- sd/inc/OutlinerIterator.hxx 2019-12-05 20:59:23.000000000 +0100
++++ sd/inc/OutlinerIterator.hxx 2020-03-17 21:20:36.906085000 +0100
+@@ -122,7 +122,7 @@
+ @return
+ Returns <TRUE/> when both iterators point to the same object.
+ */
+- bool operator== (const Iterator& rIterator);
++ bool operator== (const Iterator& rIterator) const;
+ /** Test whether two iterators point to different objects. This is just
+ the negation of the result of the equality operator.
+ @param rIterator
+@@ -130,7 +130,7 @@
+ @return
+ Returns <TRUE/> when both iterators point to the different objects.
+ */
+- bool operator!= (const Iterator& rIterator);
++ bool operator!= (const Iterator& rIterator) const;
+ /** Reverse the direction of iteration. The position of the iterator is
+ not changed. Thus calling this method twice returns to the old state.
+ */
+--- sd/source/ui/view/OutlinerIterator.cxx.orig 2019-12-05 20:59:23.000000000 +0100
++++ sd/source/ui/view/OutlinerIterator.cxx 2020-03-17 21:24:11.082383000 +0100
+@@ -110,7 +110,7 @@
+ return *this;
+ }
+
+-bool Iterator::operator== (const Iterator& rIterator)
++bool Iterator::operator== (const Iterator& rIterator) const
+ {
+ if (!mxIterator || !rIterator.mxIterator)
+ return mxIterator.get() == rIterator.mxIterator.get();
+@@ -118,7 +118,7 @@
+ return *mxIterator == *rIterator.mxIterator;
+ }
+
+-bool Iterator::operator!= (const Iterator& rIterator)
++bool Iterator::operator!= (const Iterator& rIterator) const
+ {
+ return ! operator==(rIterator);
+ }
+--- compilerplugins/clang/simplifybool.cxx 2019-12-05 20:59:23.000000000 +0100
++++ compilerplugins/clang/simplifybool.cxx 2020-03-17 22:03:11.369300000 +0100
+@@ -241,7 +241,30 @@
+ << expr->getSourceRange();
+ return true;
+ }
+- if (auto binaryOp = dyn_cast<BinaryOperator>(expr->getSubExpr()->IgnoreParenImpCasts())) {
++ auto sub = expr->getSubExpr()->IgnoreParenImpCasts();
++ auto reversed = false;
++#if CLANG_VERSION >= 100000
++ if (auto const rewritten = dyn_cast<CXXRewrittenBinaryOperator>(sub)) {
++ if (rewritten->isReversed()) {
++ if (rewritten->getOperator() == BO_EQ) {
++ auto const sem = rewritten->getSemanticForm();
++ bool match;
++ if (auto const op1 = dyn_cast<BinaryOperator>(sem)) {
++ match = op1->getOpcode() == BO_EQ;
++ } else if (auto const op2 = dyn_cast<CXXOperatorCallExpr>(sem)) {
++ match = op2->getOperator() == OO_EqualEqual;
++ } else {
++ match = false;
++ }
++ if (match) {
++ sub = sem;
++ reversed = true;
++ }
++ }
++ }
++ }
++#endif
++ if (auto binaryOp = dyn_cast<BinaryOperator>(sub)) {
+ // Ignore macros, otherwise
+ // OSL_ENSURE(!b, ...);
+ // triggers.
+@@ -289,7 +312,7 @@
+ << binaryOp->getSourceRange();
+ }
+ }
+- if (auto binaryOp = dyn_cast<CXXOperatorCallExpr>(expr->getSubExpr()->IgnoreParenImpCasts())) {
++ if (auto binaryOp = dyn_cast<CXXOperatorCallExpr>(sub)) {
+ // Ignore macros, otherwise
+ // OSL_ENSURE(!b, ...);
+ // triggers.
+@@ -301,8 +324,8 @@
+ if (!(op == OO_EqualEqual || op == OO_ExclaimEqual))
+ return true;
+ BinaryOperator::Opcode negatedOpcode = BinaryOperator::negateComparisonOp(BinaryOperator::getOverloadedOpcode(op));
+- auto lhs = binaryOp->getArg(0)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType();
+- auto rhs = binaryOp->getArg(1)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType();
++ auto lhs = binaryOp->getArg(reversed ? 1 : 0)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType();
++ auto rhs = binaryOp->getArg(reversed ? 0 : 1)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType();
+ auto const negOp = findOperator(compiler, negatedOpcode, lhs, rhs);
+ if (!negOp)
+ return true;
+@@ -323,8 +346,10 @@
+ << expr->getSourceRange();
+ if (negOp != ASSUME_OPERATOR_EXISTS)
+ report(
+- DiagnosticsEngine::Note, "the presumed corresponding negated operator is declared here",
++ DiagnosticsEngine::Note, "the presumed corresponding negated operator for %0 and %1 is declared here",
+ negOp->getLocation())
++ << binaryOp->getArg(reversed ? 1 : 0)->IgnoreImpCasts()->getType()
++ << binaryOp->getArg(reversed ? 0 : 1)->IgnoreImpCasts()->getType()
+ << negOp->getSourceRange();
+ }
+ return true;
+--- cui/source/tabpages/tpline.cxx 2019-12-05 20:59:23.000000000 +0100
++++ cui/source/tabpages/tpline.cxx 2020-03-17 22:06:49.493222000 +0100
+@@ -491,7 +491,7 @@
+ else if( m_pLineEndList->Count() > static_cast<long>( nPos - 1 ) )
+ pItem.reset(new XLineStartItem( m_xLbStartStyle->get_active_text(), m_pLineEndList->GetLineEnd( nPos - 1 )->GetLineEnd() ));
+ pOld = GetOldItem( *rAttrs, XATTR_LINESTART );
+- if( pItem && ( !pOld || !( *static_cast<const XLineEndItem*>(pOld) == *pItem ) ) )
++ if( pItem && ( !pOld || *pOld != *pItem ) )
+ {
+ rAttrs->Put( *pItem );
+ bModified = true;
+--- sc/source/ui/view/viewfunc.cxx.orig 2019-12-05 20:59:23.000000000 +0100
++++ sc/source/ui/view/viewfunc.cxx 2020-03-17 23:58:50.978995000 +0100
+@@ -958,7 +958,7 @@
+
+ // this should be intercepted by the pool: ?!??!??
+
+- if (bFrame && rNewOuter == rOldOuter && rNewInner == rOldInner)
++ if (bFrame && &rNewOuter == &rOldOuter && &rNewInner == &rOldInner)
+ bFrame = false;
+
+ bFrame = bFrame
+--- sc/source/core/opencl/formulagroupcl.cxx 2019-12-05 20:59:23.000000000 +0100
++++ sc/source/core/opencl/formulagroupcl.cxx 2020-03-18 00:44:08.091710000 +0100
+@@ -1026,9 +1026,6 @@
+ /// Handling a Double Vector that is used as a sliding window input
+ /// to either a sliding window average or sum-of-products
+ /// Generate a sequential loop for reductions
+-class OpAverage;
+-class OpCount;
+-
+ template<class Base>
+ class DynamicKernelSlidingArgument : public Base
+ {
+@@ -1335,186 +1332,8 @@
+ }
+
+ /// Emit the definition for the auxiliary reduction kernel
+- virtual void GenSlidingWindowFunction( std::stringstream& ss )
+- {
+- if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+- {
+- std::string name = Base::GetName();
+- ss << "__kernel void " << name;
+- ss << "_reduction(__global double* A, "
+- "__global double *result,int arrayLength,int windowSize){\n";
+- ss << " double tmp, current_result =" <<
+- mpCodeGen->GetBottom();
+- ss << ";\n";
+- ss << " int writePos = get_group_id(1);\n";
+- ss << " int lidx = get_local_id(0);\n";
+- ss << " __local double shm_buf[256];\n";
+- if (mpDVR->IsStartFixed())
+- ss << " int offset = 0;\n";
+- else // if (!mpDVR->IsStartFixed())
+- ss << " int offset = get_group_id(1);\n";
+- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+- ss << " int end = windowSize;\n";
+- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+- ss << " int end = offset + windowSize;\n";
+- else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+- ss << " int end = windowSize + get_group_id(1);\n";
+- else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+- ss << " int end = windowSize;\n";
+- ss << " end = min(end, arrayLength);\n";
++ virtual void GenSlidingWindowFunction( std::stringstream& ss );
+
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " int loop = arrayLength/512 + 1;\n";
+- ss << " for (int l=0; l<loop; l++){\n";
+- ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
+- ss << " int loopOffset = l*512;\n";
+- ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
+- ss << " tmp = legalize(" << mpCodeGen->Gen2(
+- "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+- ss << " tmp = legalize(" << mpCodeGen->Gen2(
+- "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
+- ss << " } else if ((loopOffset + lidx + offset) < end)\n";
+- ss << " tmp = legalize(" << mpCodeGen->Gen2(
+- "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+- ss << " shm_buf[lidx] = tmp;\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " for (int i = 128; i >0; i/=2) {\n";
+- ss << " if (lidx < i)\n";
+- ss << " shm_buf[lidx] = ";
+- // Special case count
+- if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+- ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+- else
+- ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " }\n";
+- ss << " if (lidx == 0)\n";
+- ss << " current_result =";
+- if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+- ss << "current_result + shm_buf[0]";
+- else
+- ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
+- ss << ";\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " }\n";
+- ss << " if (lidx == 0)\n";
+- ss << " result[writePos] = current_result;\n";
+- ss << "}\n";
+- }
+- else
+- {
+- std::string name = Base::GetName();
+- /*sum reduction*/
+- ss << "__kernel void " << name << "_sum";
+- ss << "_reduction(__global double* A, "
+- "__global double *result,int arrayLength,int windowSize){\n";
+- ss << " double tmp, current_result =" <<
+- mpCodeGen->GetBottom();
+- ss << ";\n";
+- ss << " int writePos = get_group_id(1);\n";
+- ss << " int lidx = get_local_id(0);\n";
+- ss << " __local double shm_buf[256];\n";
+- if (mpDVR->IsStartFixed())
+- ss << " int offset = 0;\n";
+- else // if (!mpDVR->IsStartFixed())
+- ss << " int offset = get_group_id(1);\n";
+- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+- ss << " int end = windowSize;\n";
+- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+- ss << " int end = offset + windowSize;\n";
+- else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+- ss << " int end = windowSize + get_group_id(1);\n";
+- else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+- ss << " int end = windowSize;\n";
+- ss << " end = min(end, arrayLength);\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " int loop = arrayLength/512 + 1;\n";
+- ss << " for (int l=0; l<loop; l++){\n";
+- ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
+- ss << " int loopOffset = l*512;\n";
+- ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
+- ss << " tmp = legalize(";
+- ss << "(A[loopOffset + lidx + offset]+ tmp)";
+- ss << ", tmp);\n";
+- ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
+- ss << ", tmp);\n";
+- ss << " } else if ((loopOffset + lidx + offset) < end)\n";
+- ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
+- ss << ", tmp);\n";
+- ss << " shm_buf[lidx] = tmp;\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " for (int i = 128; i >0; i/=2) {\n";
+- ss << " if (lidx < i)\n";
+- ss << " shm_buf[lidx] = ";
+- ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " }\n";
+- ss << " if (lidx == 0)\n";
+- ss << " current_result =";
+- ss << "current_result + shm_buf[0]";
+- ss << ";\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " }\n";
+- ss << " if (lidx == 0)\n";
+- ss << " result[writePos] = current_result;\n";
+- ss << "}\n";
+- /*count reduction*/
+- ss << "__kernel void " << name << "_count";
+- ss << "_reduction(__global double* A, "
+- "__global double *result,int arrayLength,int windowSize){\n";
+- ss << " double tmp, current_result =" <<
+- mpCodeGen->GetBottom();
+- ss << ";\n";
+- ss << " int writePos = get_group_id(1);\n";
+- ss << " int lidx = get_local_id(0);\n";
+- ss << " __local double shm_buf[256];\n";
+- if (mpDVR->IsStartFixed())
+- ss << " int offset = 0;\n";
+- else // if (!mpDVR->IsStartFixed())
+- ss << " int offset = get_group_id(1);\n";
+- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+- ss << " int end = windowSize;\n";
+- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+- ss << " int end = offset + windowSize;\n";
+- else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+- ss << " int end = windowSize + get_group_id(1);\n";
+- else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+- ss << " int end = windowSize;\n";
+- ss << " end = min(end, arrayLength);\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " int loop = arrayLength/512 + 1;\n";
+- ss << " for (int l=0; l<loop; l++){\n";
+- ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
+- ss << " int loopOffset = l*512;\n";
+- ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
+- ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+- ss << ", tmp);\n";
+- ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
+- ss << ", tmp);\n";
+- ss << " } else if ((loopOffset + lidx + offset) < end)\n";
+- ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+- ss << ", tmp);\n";
+- ss << " shm_buf[lidx] = tmp;\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " for (int i = 128; i >0; i/=2) {\n";
+- ss << " if (lidx < i)\n";
+- ss << " shm_buf[lidx] = ";
+- ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " }\n";
+- ss << " if (lidx == 0)\n";
+- ss << " current_result =";
+- ss << "current_result + shm_buf[0];";
+- ss << ";\n";
+- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
+- ss << " }\n";
+- ss << " if (lidx == 0)\n";
+- ss << " result[writePos] = current_result;\n";
+- ss << "}\n";
+- }
+-
+- }
+-
+ virtual std::string GenSlidingWindowDeclRef( bool ) const
+ {
+ std::stringstream ss;
+@@ -1527,195 +1346,10 @@
+
+ /// Controls how the elements in the DoubleVectorRef are traversed
+ size_t GenReductionLoopHeader(
+- std::stringstream& ss, int nResultSize, bool& needBody )
+- {
+- assert(mpDVR);
+- size_t nCurWindowSize = mpDVR->GetRefRowSize();
+- std::string temp = Base::GetName() + "[gid0]";
+- ss << "tmp = ";
+- // Special case count
+- if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
+- {
+- ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
+- ss << "nCount = nCount-1;\n";
+- ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
+- ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
+- }
+- else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+- ss << temp << "+ tmp";
+- else
+- ss << mpCodeGen->Gen2(temp, "tmp");
+- ss << ";\n\t";
+- needBody = false;
+- return nCurWindowSize;
+- }
++ std::stringstream& ss, int nResultSize, bool& needBody );
+
+- virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
+- {
+- assert(Base::mpClmem == nullptr);
++ virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram );
+
+- openclwrapper::KernelEnv kEnv;
+- openclwrapper::setKernelEnv(&kEnv);
+- cl_int err;
+- size_t nInput = mpDVR->GetArrayLength();
+- size_t nCurWindowSize = mpDVR->GetRefRowSize();
+- // create clmem buffer
+- if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
+- throw Unhandled(__FILE__, __LINE__);
+- double* pHostBuffer = const_cast<double*>(
+- mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
+- size_t szHostBuffer = nInput * sizeof(double);
+- Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
+- cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
+- szHostBuffer,
+- pHostBuffer, &err);
+- SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
+-
+- mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+- CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+- sizeof(double) * w, nullptr, nullptr);
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+- SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
+-
+- // reproduce the reduction function name
+- std::string kernelName;
+- if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+- kernelName = Base::GetName() + "_reduction";
+- else
+- kernelName = Base::GetName() + "_sum_reduction";
+- cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+- if (err != CL_SUCCESS)
+- throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+- SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
+-
+- // set kernel arg of reduction kernel
+- // TODO(Wei Wei): use unique name for kernel
+- cl_mem buf = Base::GetCLBuffer();
+- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+- err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+- static_cast<void*>(&buf));
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+- err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+- err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+- err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+- // set work group size and execute
+- size_t global_work_size[] = { 256, static_cast<size_t>(w) };
+- size_t const local_work_size[] = { 256, 1 };
+- SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
+- err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+- global_work_size, local_work_size, 0, nullptr, nullptr);
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+- err = clFinish(kEnv.mpkCmdQueue);
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+- if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
+- {
+- /*average need more reduction kernel for count computing*/
+- std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
+- double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+- mpClmem2,
+- CL_TRUE, CL_MAP_READ, 0,
+- sizeof(double) * w, 0, nullptr, nullptr,
+- &err));
+- if (err != CL_SUCCESS)
+- throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+-
+- for (int i = 0; i < w; i++)
+- pAllBuffer[i] = resbuf[i];
+- err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+- if (err != CL_SUCCESS)
+- throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
+-
+- kernelName = Base::GetName() + "_count_reduction";
+- redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+- if (err != CL_SUCCESS)
+- throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+- SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
+-
+- // set kernel arg of reduction kernel
+- buf = Base::GetCLBuffer();
+- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+- err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+- static_cast<void*>(&buf));
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+- err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+- err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+- err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+- // set work group size and execute
+- size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
+- size_t const local_work_size1[] = { 256, 1 };
+- SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
+- err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+- global_work_size1, local_work_size1, 0, nullptr, nullptr);
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+- err = clFinish(kEnv.mpkCmdQueue);
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+- resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+- mpClmem2,
+- CL_TRUE, CL_MAP_READ, 0,
+- sizeof(double) * w, 0, nullptr, nullptr,
+- &err));
+- if (err != CL_SUCCESS)
+- throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+- for (int i = 0; i < w; i++)
+- pAllBuffer[i + w] = resbuf[i];
+- err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+- // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
+- if (CL_SUCCESS != err)
+- SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
+- if (mpClmem2)
+- {
+- err = clReleaseMemObject(mpClmem2);
+- SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+- mpClmem2 = nullptr;
+- }
+- mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+- cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
+- w * sizeof(double) * 2, pAllBuffer.get(), &err);
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+- SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
+- }
+- // set kernel arg
+- SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
+- err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
+- if (CL_SUCCESS != err)
+- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+- return 1;
+- }
+-
+ ~ParallelReductionVectorRef()
+ {
+ if (mpClmem2)
+@@ -2314,6 +1948,380 @@
+ }
+ virtual std::string BinFuncName() const override { return "fsop"; }
+ };
++
++template<class Base>
++void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss )
++{
++ if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
++ {
++ std::string name = Base::GetName();
++ ss << "__kernel void " << name;
++ ss << "_reduction(__global double* A, "
++ "__global double *result,int arrayLength,int windowSize){\n";
++ ss << " double tmp, current_result =" <<
++ mpCodeGen->GetBottom();
++ ss << ";\n";
++ ss << " int writePos = get_group_id(1);\n";
++ ss << " int lidx = get_local_id(0);\n";
++ ss << " __local double shm_buf[256];\n";
++ if (mpDVR->IsStartFixed())
++ ss << " int offset = 0;\n";
++ else // if (!mpDVR->IsStartFixed())
++ ss << " int offset = get_group_id(1);\n";
++ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++ ss << " int end = windowSize;\n";
++ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++ ss << " int end = offset + windowSize;\n";
++ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++ ss << " int end = windowSize + get_group_id(1);\n";
++ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++ ss << " int end = windowSize;\n";
++ ss << " end = min(end, arrayLength);\n";
++
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " int loop = arrayLength/512 + 1;\n";
++ ss << " for (int l=0; l<loop; l++){\n";
++ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
++ ss << " int loopOffset = l*512;\n";
++ ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
++ ss << " tmp = legalize(" << mpCodeGen->Gen2(
++ "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
++ ss << " tmp = legalize(" << mpCodeGen->Gen2(
++ "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
++ ss << " } else if ((loopOffset + lidx + offset) < end)\n";
++ ss << " tmp = legalize(" << mpCodeGen->Gen2(
++ "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
++ ss << " shm_buf[lidx] = tmp;\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " for (int i = 128; i >0; i/=2) {\n";
++ ss << " if (lidx < i)\n";
++ ss << " shm_buf[lidx] = ";
++ // Special case count
++ if (dynamic_cast<OpCount*>(mpCodeGen.get()))
++ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
++ else
++ ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " }\n";
++ ss << " if (lidx == 0)\n";
++ ss << " current_result =";
++ if (dynamic_cast<OpCount*>(mpCodeGen.get()))
++ ss << "current_result + shm_buf[0]";
++ else
++ ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
++ ss << ";\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " }\n";
++ ss << " if (lidx == 0)\n";
++ ss << " result[writePos] = current_result;\n";
++ ss << "}\n";
++ }
++ else
++ {
++ std::string name = Base::GetName();
++ /*sum reduction*/
++ ss << "__kernel void " << name << "_sum";
++ ss << "_reduction(__global double* A, "
++ "__global double *result,int arrayLength,int windowSize){\n";
++ ss << " double tmp, current_result =" <<
++ mpCodeGen->GetBottom();
++ ss << ";\n";
++ ss << " int writePos = get_group_id(1);\n";
++ ss << " int lidx = get_local_id(0);\n";
++ ss << " __local double shm_buf[256];\n";
++ if (mpDVR->IsStartFixed())
++ ss << " int offset = 0;\n";
++ else // if (!mpDVR->IsStartFixed())
++ ss << " int offset = get_group_id(1);\n";
++ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++ ss << " int end = windowSize;\n";
++ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++ ss << " int end = offset + windowSize;\n";
++ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++ ss << " int end = windowSize + get_group_id(1);\n";
++ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++ ss << " int end = windowSize;\n";
++ ss << " end = min(end, arrayLength);\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " int loop = arrayLength/512 + 1;\n";
++ ss << " for (int l=0; l<loop; l++){\n";
++ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
++ ss << " int loopOffset = l*512;\n";
++ ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
++ ss << " tmp = legalize(";
++ ss << "(A[loopOffset + lidx + offset]+ tmp)";
++ ss << ", tmp);\n";
++ ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
++ ss << ", tmp);\n";
++ ss << " } else if ((loopOffset + lidx + offset) < end)\n";
++ ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
++ ss << ", tmp);\n";
++ ss << " shm_buf[lidx] = tmp;\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " for (int i = 128; i >0; i/=2) {\n";
++ ss << " if (lidx < i)\n";
++ ss << " shm_buf[lidx] = ";
++ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " }\n";
++ ss << " if (lidx == 0)\n";
++ ss << " current_result =";
++ ss << "current_result + shm_buf[0]";
++ ss << ";\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " }\n";
++ ss << " if (lidx == 0)\n";
++ ss << " result[writePos] = current_result;\n";
++ ss << "}\n";
++ /*count reduction*/
++ ss << "__kernel void " << name << "_count";
++ ss << "_reduction(__global double* A, "
++ "__global double *result,int arrayLength,int windowSize){\n";
++ ss << " double tmp, current_result =" <<
++ mpCodeGen->GetBottom();
++ ss << ";\n";
++ ss << " int writePos = get_group_id(1);\n";
++ ss << " int lidx = get_local_id(0);\n";
++ ss << " __local double shm_buf[256];\n";
++ if (mpDVR->IsStartFixed())
++ ss << " int offset = 0;\n";
++ else // if (!mpDVR->IsStartFixed())
++ ss << " int offset = get_group_id(1);\n";
++ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++ ss << " int end = windowSize;\n";
++ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++ ss << " int end = offset + windowSize;\n";
++ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++ ss << " int end = windowSize + get_group_id(1);\n";
++ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++ ss << " int end = windowSize;\n";
++ ss << " end = min(end, arrayLength);\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " int loop = arrayLength/512 + 1;\n";
++ ss << " for (int l=0; l<loop; l++){\n";
++ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
++ ss << " int loopOffset = l*512;\n";
++ ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
++ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
++ ss << ", tmp);\n";
++ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
++ ss << ", tmp);\n";
++ ss << " } else if ((loopOffset + lidx + offset) < end)\n";
++ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
++ ss << ", tmp);\n";
++ ss << " shm_buf[lidx] = tmp;\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " for (int i = 128; i >0; i/=2) {\n";
++ ss << " if (lidx < i)\n";
++ ss << " shm_buf[lidx] = ";
++ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " }\n";
++ ss << " if (lidx == 0)\n";
++ ss << " current_result =";
++ ss << "current_result + shm_buf[0];";
++ ss << ";\n";
++ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
++ ss << " }\n";
++ ss << " if (lidx == 0)\n";
++ ss << " result[writePos] = current_result;\n";
++ ss << "}\n";
++ }
++
++}
++
++template<class Base>
++size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader(
++ std::stringstream& ss, int nResultSize, bool& needBody )
++{
++ assert(mpDVR);
++ size_t nCurWindowSize = mpDVR->GetRefRowSize();
++ std::string temp = Base::GetName() + "[gid0]";
++ ss << "tmp = ";
++ // Special case count
++ if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
++ {
++ ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
++ ss << "nCount = nCount-1;\n";
++ ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
++ ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
++ }
++ else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
++ ss << temp << "+ tmp";
++ else
++ ss << mpCodeGen->Gen2(temp, "tmp");
++ ss << ";\n\t";
++ needBody = false;
++ return nCurWindowSize;
++}
++
++template<class Base>
++size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
++{
++ assert(Base::mpClmem == nullptr);
++
++ openclwrapper::KernelEnv kEnv;
++ openclwrapper::setKernelEnv(&kEnv);
++ cl_int err;
++ size_t nInput = mpDVR->GetArrayLength();
++ size_t nCurWindowSize = mpDVR->GetRefRowSize();
++ // create clmem buffer
++ if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
++ throw Unhandled(__FILE__, __LINE__);
++ double* pHostBuffer = const_cast<double*>(
++ mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
++ size_t szHostBuffer = nInput * sizeof(double);
++ Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
++ cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
++ szHostBuffer,
++ pHostBuffer, &err);
++ SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
++
++ mpClmem2 = clCreateBuffer(kEnv.mpkContext,
++ CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
++ sizeof(double) * w, nullptr, nullptr);
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
++ SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
++
++ // reproduce the reduction function name
++ std::string kernelName;
++ if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
++ kernelName = Base::GetName() + "_reduction";
++ else
++ kernelName = Base::GetName() + "_sum_reduction";
++ cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
++ if (err != CL_SUCCESS)
++ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
++ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
++
++ // set kernel arg of reduction kernel
++ // TODO(Wei Wei): use unique name for kernel
++ cl_mem buf = Base::GetCLBuffer();
++ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
++ err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
++ static_cast<void*>(&buf));
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
++ err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
++ err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
++ err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++ // set work group size and execute
++ size_t global_work_size[] = { 256, static_cast<size_t>(w) };
++ size_t const local_work_size[] = { 256, 1 };
++ SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
++ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
++ global_work_size, local_work_size, 0, nullptr, nullptr);
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
++ err = clFinish(kEnv.mpkCmdQueue);
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clFinish", err, __FILE__, __LINE__);
++ if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
++ {
++ /*average need more reduction kernel for count computing*/
++ std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
++ double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
++ mpClmem2,
++ CL_TRUE, CL_MAP_READ, 0,
++ sizeof(double) * w, 0, nullptr, nullptr,
++ &err));
++ if (err != CL_SUCCESS)
++ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
++
++ for (int i = 0; i < w; i++)
++ pAllBuffer[i] = resbuf[i];
++ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
++ if (err != CL_SUCCESS)
++ throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
++
++ kernelName = Base::GetName() + "_count_reduction";
++ redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
++ if (err != CL_SUCCESS)
++ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
++ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
++
++ // set kernel arg of reduction kernel
++ buf = Base::GetCLBuffer();
++ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
++ err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
++ static_cast<void*>(&buf));
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
++ err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
++ err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
++ err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++ // set work group size and execute
++ size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
++ size_t const local_work_size1[] = { 256, 1 };
++ SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
++ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
++ global_work_size1, local_work_size1, 0, nullptr, nullptr);
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
++ err = clFinish(kEnv.mpkCmdQueue);
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clFinish", err, __FILE__, __LINE__);
++ resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
++ mpClmem2,
++ CL_TRUE, CL_MAP_READ, 0,
++ sizeof(double) * w, 0, nullptr, nullptr,
++ &err));
++ if (err != CL_SUCCESS)
++ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
++ for (int i = 0; i < w; i++)
++ pAllBuffer[i + w] = resbuf[i];
++ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
++ // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
++ if (CL_SUCCESS != err)
++ SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
++ if (mpClmem2)
++ {
++ err = clReleaseMemObject(mpClmem2);
++ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
++ mpClmem2 = nullptr;
++ }
++ mpClmem2 = clCreateBuffer(kEnv.mpkContext,
++ cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
++ w * sizeof(double) * 2, pAllBuffer.get(), &err);
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
++ SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
++ }
++ // set kernel arg
++ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
++ err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
++ if (CL_SUCCESS != err)
++ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++ return 1;
++}
++
+ namespace {
+ struct SumIfsArgs
+ {