[Libreoffice-commits] core.git: sc/source

Libreoffice Gerrit user logerrit at kemper.freedesktop.org
Mon Dec 3 14:32:31 UTC 2018


 sc/source/core/opencl/formulagroupcl.cxx |  372 +++++++++++++++++++++++++++++++
 1 file changed, 372 insertions(+)

New commits:
commit 1b489b74fe28007749bec8a3ebd56901a7652734
Author:     Luboš Luňák <l.lunak at collabora.com>
AuthorDate: Wed Nov 28 13:49:01 2018 +0100
Commit:     Luboš Luňák <l.lunak at collabora.com>
CommitDate: Mon Dec 3 15:32:05 2018 +0100

    restore functionality of OpenCL ParallelReductionVectorRef class
    
    These functions were removed in 149a4d7566, although they quite clearly
    do serve a purpose, and then e9586cf0b5 removed the no longer used
    variables.
    Without this, ParallelReductionVectorRef class simply does not work.
    E.g. SUM(B1:B250) would not sum all the cells, but just one (testcase
    is Test::testFormulaRefUpdateRange with OpenCL forced).
    
    Change-Id: Iad1aeba2fbae2b4fc1d4236030adbb9ce6d972a1
    Reviewed-on: https://gerrit.libreoffice.org/64235
    Tested-by: Jenkins
    Reviewed-by: Luboš Luňák <l.lunak at collabora.com>

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 71acf15e9590..4db3fb4ebe11 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -1221,6 +1221,7 @@ public:
                                std::shared_ptr<SlidingFunctionBase>& CodeGen, int index)
         : Base(config, s, ft, index)
         , mpCodeGen(CodeGen)
+        , mpClmem2(nullptr)
     {
         FormulaToken* t = ft->GetFormulaToken();
         if (t->GetType() != formula::svDoubleVectorRef)
@@ -1229,6 +1230,198 @@ public:
         bIsStartFixed = mpDVR->IsStartFixed();
         bIsEndFixed = mpDVR->IsEndFixed();
     }
+
+    /// Emit the definition for the auxiliary reduction kernel
+    virtual void GenSlidingWindowFunction( std::stringstream& ss )
+    {
+        if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+        {
+            std::string name = Base::GetName();
+            ss << "__kernel void " << name;
+            ss << "_reduction(__global double* A, "
+                "__global double *result,int arrayLength,int windowSize){\n";
+            ss << "    double tmp, current_result =" <<
+                mpCodeGen->GetBottom();
+            ss << ";\n";
+            ss << "    int writePos = get_group_id(1);\n";
+            ss << "    int lidx = get_local_id(0);\n";
+            ss << "    __local double shm_buf[256];\n";
+            if (mpDVR->IsStartFixed())
+                ss << "    int offset = 0;\n";
+            else // if (!mpDVR->IsStartFixed())
+                ss << "    int offset = get_group_id(1);\n";
+            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+                ss << "    int end = windowSize;\n";
+            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+                ss << "    int end = offset + windowSize;\n";
+            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+                ss << "    int end = windowSize + get_group_id(1);\n";
+            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+                ss << "    int end = windowSize;\n";
+            ss << "    end = min(end, arrayLength);\n";
+
+            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    int loop = arrayLength/512 + 1;\n";
+            ss << "    for (int l=0; l<loop; l++){\n";
+            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
+            ss << "    int loopOffset = l*512;\n";
+            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
+            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
+                "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
+                "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
+            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
+            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
+                "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+            ss << "    shm_buf[lidx] = tmp;\n";
+            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    for (int i = 128; i >0; i/=2) {\n";
+            ss << "        if (lidx < i)\n";
+            ss << "            shm_buf[lidx] = ";
+            // Special case count
+            if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+                ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+            else
+                ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
+            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    }\n";
+            ss << "        if (lidx == 0)\n";
+            ss << "            current_result =";
+            if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+                ss << "current_result + shm_buf[0]";
+            else
+                ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
+            ss << ";\n";
+            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    }\n";
+            ss << "    if (lidx == 0)\n";
+            ss << "        result[writePos] = current_result;\n";
+            ss << "}\n";
+        }
+        else
+        {
+            std::string name = Base::GetName();
+            /*sum reduction*/
+            ss << "__kernel void " << name << "_sum";
+            ss << "_reduction(__global double* A, "
+                "__global double *result,int arrayLength,int windowSize){\n";
+            ss << "    double tmp, current_result =" <<
+                mpCodeGen->GetBottom();
+            ss << ";\n";
+            ss << "    int writePos = get_group_id(1);\n";
+            ss << "    int lidx = get_local_id(0);\n";
+            ss << "    __local double shm_buf[256];\n";
+            if (mpDVR->IsStartFixed())
+                ss << "    int offset = 0;\n";
+            else // if (!mpDVR->IsStartFixed())
+                ss << "    int offset = get_group_id(1);\n";
+            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+                ss << "    int end = windowSize;\n";
+            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+                ss << "    int end = offset + windowSize;\n";
+            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+                ss << "    int end = windowSize + get_group_id(1);\n";
+            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+                ss << "    int end = windowSize;\n";
+            ss << "    end = min(end, arrayLength);\n";
+            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    int loop = arrayLength/512 + 1;\n";
+            ss << "    for (int l=0; l<loop; l++){\n";
+            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
+            ss << "    int loopOffset = l*512;\n";
+            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
+            ss << "        tmp = legalize(";
+            ss << "(A[loopOffset + lidx + offset]+ tmp)";
+            ss << ", tmp);\n";
+            ss << "        tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
+            ss << ", tmp);\n";
+            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
+            ss << "        tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
+            ss << ", tmp);\n";
+            ss << "    shm_buf[lidx] = tmp;\n";
+            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    for (int i = 128; i >0; i/=2) {\n";
+            ss << "        if (lidx < i)\n";
+            ss << "            shm_buf[lidx] = ";
+            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    }\n";
+            ss << "        if (lidx == 0)\n";
+            ss << "            current_result =";
+            ss << "current_result + shm_buf[0]";
+            ss << ";\n";
+            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    }\n";
+            ss << "    if (lidx == 0)\n";
+            ss << "        result[writePos] = current_result;\n";
+            ss << "}\n";
+            /*count reduction*/
+            ss << "__kernel void " << name << "_count";
+            ss << "_reduction(__global double* A, "
+                "__global double *result,int arrayLength,int windowSize){\n";
+            ss << "    double tmp, current_result =" <<
+                mpCodeGen->GetBottom();
+            ss << ";\n";
+            ss << "    int writePos = get_group_id(1);\n";
+            ss << "    int lidx = get_local_id(0);\n";
+            ss << "    __local double shm_buf[256];\n";
+            if (mpDVR->IsStartFixed())
+                ss << "    int offset = 0;\n";
+            else // if (!mpDVR->IsStartFixed())
+                ss << "    int offset = get_group_id(1);\n";
+            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+                ss << "    int end = windowSize;\n";
+            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+                ss << "    int end = offset + windowSize;\n";
+            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+                ss << "    int end = windowSize + get_group_id(1);\n";
+            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+                ss << "    int end = windowSize;\n";
+            ss << "    end = min(end, arrayLength);\n";
+            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    int loop = arrayLength/512 + 1;\n";
+            ss << "    for (int l=0; l<loop; l++){\n";
+            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
+            ss << "    int loopOffset = l*512;\n";
+            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
+            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+            ss << ", tmp);\n";
+            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
+            ss << ", tmp);\n";
+            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
+            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+            ss << ", tmp);\n";
+            ss << "    shm_buf[lidx] = tmp;\n";
+            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    for (int i = 128; i >0; i/=2) {\n";
+            ss << "        if (lidx < i)\n";
+            ss << "            shm_buf[lidx] = ";
+            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    }\n";
+            ss << "        if (lidx == 0)\n";
+            ss << "            current_result =";
+            ss << "current_result + shm_buf[0];";
+            ss << ";\n";
+            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+            ss << "    }\n";
+            ss << "    if (lidx == 0)\n";
+            ss << "        result[writePos] = current_result;\n";
+            ss << "}\n";
+        }
+
+    }
+
+    virtual std::string GenSlidingWindowDeclRef( bool ) const
+    {
+        std::stringstream ss;
+        if (!bIsStartFixed && !bIsEndFixed)
+            ss << Base::GetName() << "[i + gid0]";
+        else
+            ss << Base::GetName() << "[i]";
+        return ss.str();
+    }
+
     /// Controls how the elements in the DoubleVectorRef are traversed
     size_t GenReductionLoopHeader(
         std::stringstream& ss, int nResultSize, bool& needBody )
@@ -1254,6 +1447,183 @@ public:
         return nCurWindowSize;
     }
 
+    virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
+    {
+        assert(Base::mpClmem == nullptr);
+
+        openclwrapper::KernelEnv kEnv;
+        openclwrapper::setKernelEnv(&kEnv);
+        cl_int err;
+        size_t nInput = mpDVR->GetArrayLength();
+        size_t nCurWindowSize = mpDVR->GetRefRowSize();
+        // create clmem buffer
+        if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
+            throw Unhandled(__FILE__, __LINE__);
+        double* pHostBuffer = const_cast<double*>(
+            mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
+        size_t szHostBuffer = nInput * sizeof(double);
+        Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
+            cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
+            szHostBuffer,
+            pHostBuffer, &err);
+        SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
+
+        mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+            CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+            sizeof(double) * w, nullptr, nullptr);
+        if (CL_SUCCESS != err)
+            throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+        SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
+
+        // reproduce the reduction function name
+        std::string kernelName;
+        if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+            kernelName = Base::GetName() + "_reduction";
+        else
+            kernelName = Base::GetName() + "_sum_reduction";
+        cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+        if (err != CL_SUCCESS)
+            throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+        SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
+
+        // set kernel arg of reduction kernel
+        // TODO(Wei Wei): use unique name for kernel
+        cl_mem buf = Base::GetCLBuffer();
+        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+        err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+            static_cast<void*>(&buf));
+        if (CL_SUCCESS != err)
+            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+        err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+        if (CL_SUCCESS != err)
+            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+        err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+        if (CL_SUCCESS != err)
+            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+        err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+        if (CL_SUCCESS != err)
+            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+        // set work group size and execute
+        size_t global_work_size[] = { 256, static_cast<size_t>(w) };
+        size_t const local_work_size[] = { 256, 1 };
+        SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
+        err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+            global_work_size, local_work_size, 0, nullptr, nullptr);
+        if (CL_SUCCESS != err)
+            throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+        err = clFinish(kEnv.mpkCmdQueue);
+        if (CL_SUCCESS != err)
+            throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+        if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
+        {
+            /*average need more reduction kernel for count computing*/
+            std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
+            double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+                mpClmem2,
+                CL_TRUE, CL_MAP_READ, 0,
+                sizeof(double) * w, 0, nullptr, nullptr,
+                &err));
+            if (err != CL_SUCCESS)
+                throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+
+            for (int i = 0; i < w; i++)
+                pAllBuffer[i] = resbuf[i];
+            err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+            if (err != CL_SUCCESS)
+                throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
+
+            kernelName = Base::GetName() + "_count_reduction";
+            redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+            if (err != CL_SUCCESS)
+                throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+            SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
+
+            // set kernel arg of reduction kernel
+            buf = Base::GetCLBuffer();
+            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+            err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+                static_cast<void*>(&buf));
+            if (CL_SUCCESS != err)
+                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+            err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+            if (CL_SUCCESS != err)
+                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+            err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+            if (CL_SUCCESS != err)
+                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+            err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+            if (CL_SUCCESS != err)
+                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+
+            // set work group size and execute
+            size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
+            size_t const local_work_size1[] = { 256, 1 };
+            SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
+            err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+                global_work_size1, local_work_size1, 0, nullptr, nullptr);
+            if (CL_SUCCESS != err)
+                throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+            err = clFinish(kEnv.mpkCmdQueue);
+            if (CL_SUCCESS != err)
+                throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+            resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+                mpClmem2,
+                CL_TRUE, CL_MAP_READ, 0,
+                sizeof(double) * w, 0, nullptr, nullptr,
+                &err));
+            if (err != CL_SUCCESS)
+                throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+            for (int i = 0; i < w; i++)
+                pAllBuffer[i + w] = resbuf[i];
+            err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+            // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
+            if (CL_SUCCESS != err)
+                SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
+            if (mpClmem2)
+            {
+                err = clReleaseMemObject(mpClmem2);
+                SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+                mpClmem2 = nullptr;
+            }
+            mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+                cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
+                w * sizeof(double) * 2, pAllBuffer.get(), &err);
+            if (CL_SUCCESS != err)
+                throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+            SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
+        }
+        // set kernel arg
+        SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
+        err = clSetKernelArg(k, argno, sizeof(cl_mem), &(mpClmem2));
+        if (CL_SUCCESS != err)
+            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+        return 1;
+    }
+
+    ~ParallelReductionVectorRef()
+    {
+        if (mpClmem2)
+        {
+            cl_int err;
+            err = clReleaseMemObject(mpClmem2);
+            SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+            mpClmem2 = nullptr;
+        }
+    }
+
     size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
 
     size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
@@ -1267,6 +1637,8 @@ protected:
     const formula::DoubleVectorRefToken* mpDVR;
     // from parent nodes
     std::shared_ptr<SlidingFunctionBase> mpCodeGen;
+    // controls whether to invoke the reduction kernel during marshaling or not
+    cl_mem mpClmem2;
 };
 
 class Reduction : public SlidingFunctionBase


More information about the Libreoffice-commits mailing list