[Libreoffice-commits] core.git: Branch 'feature/calc-pluggable-opencl' - 4 commits - sc/inc sc/Library_scfilt.mk sc/source

Kohei Yoshida kohei.yoshida at collabora.com
Tue Sep 17 12:13:22 PDT 2013


 sc/Library_scfilt.mk                       |    2 
 sc/inc/formulagroup.hxx                    |    1 
 sc/source/core/opencl/formulagroupcl.cxx   |    5 
 sc/source/core/opencl/openclwrapper.cxx    |  251 +++++++++--------------------
 sc/source/core/opencl/openclwrapper.hxx    |    4 
 sc/source/core/tool/formulagroup.cxx       |   19 ++
 sc/source/filter/excel/excform.cxx         |    5 
 sc/source/filter/excel/read.cxx            |    6 
 sc/source/filter/ftools/clkernelthread.cxx |   28 +++
 sc/source/filter/inc/clkernelthread.hxx    |   26 +++
 sc/source/filter/inc/imp_op.hxx            |    5 
 11 files changed, 185 insertions(+), 167 deletions(-)

New commits:
commit 945c7e6ab8c36307acb61e807c75d278179a5ed4
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date:   Tue Sep 17 15:14:00 2013 -0400

    Let's treat oclMatrixSolve equally. No special treatment for this guy.
    
    Change-Id: I79d36ad7c95bf4cc8cd6bb4fd55dcedd5cd70684

diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index ce0f662..7a8b205 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -125,7 +125,9 @@ const char* pKernelNames[] = {
     "oclMaxDiv",
     "oclAverageDiv"
     "oclMinDiv",
-    "oclSub"
+    "oclSub",
+
+    "oclMatrixSolve"
 };
 
 }
@@ -2341,20 +2343,22 @@ bool OclCalc::oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOcl
     }
     clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpOclMatrixSrc, 0, NULL, NULL );
     CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    cl_kernel kernel_solve = clCreateKernel( kEnv.mpkProgram, "oclMatrixSolve", &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateKernel" );
-    clStatus = clSetKernelArg( kernel_solve, 0, sizeof(cl_mem), (void *)&mpClmemLeftData );
+    Kernel* pKernelMatrix = fetchKernel("oclMatrixSolve");
+    if (!pKernelMatrix)
+        return false;
+
+    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( kernel_solve, 1, sizeof(cl_mem), (void *)&mpClmemRightData );
+    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( kernel_solve, 2, sizeof(cl_mem), (void *)&clpPData );
+    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpPData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( kernel_solve, 3, sizeof(cl_mem), (void *)&clpYData );
+    clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&clpYData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( kernel_solve, 4, sizeof(cl_mem), (void *)&clpNData );
+    clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clpNData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
 
-    clStatus = clEnqueueNDRangeKernel( kEnv.mpkCmdQueue, kernel_solve, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+    clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
     CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
     clFinish( kEnv.mpkCmdQueue );
     for ( uint i = 0; i < nDim; i++ )
@@ -2370,8 +2374,6 @@ bool OclCalc::oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOcl
     clStatus = clReleaseMemObject( mpClmemRightData );
     CHECK_OPENCL( clStatus, "clReleaseMemObject" );
     mpClmemRightData = NULL;
-    clStatus = clReleaseKernel( kernel_solve );
-    CHECK_OPENCL( clStatus, "clReleaseKernel" );
     clStatus = clReleaseMemObject( clpPData );
     CHECK_OPENCL( clStatus, "clReleaseKernel" );
     clStatus = clReleaseMemObject( clpYData );
@@ -2453,20 +2455,23 @@ bool OclCalc::oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclM
     clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, fpOclMatrixSrc, 0, NULL, NULL );
     CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
 
-    cl_kernel kernel_solve = clCreateKernel( kEnv.mpkProgram, "oclMatrixSolve", &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateKernel" );
-    clStatus = clSetKernelArg( kernel_solve, 0, sizeof(cl_mem), (void *)&mpClmemLeftData );
+    Kernel* pKernelMatrix = fetchKernel("oclMatrixSolve");
+    if (!pKernelMatrix)
+        return false;
+
+    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( kernel_solve, 1, sizeof(cl_mem), (void *)&mpClmemRightData );
+    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( kernel_solve, 2, sizeof(cl_mem), (void *)&clpPData );
+    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpPData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( kernel_solve, 3, sizeof(cl_mem), (void *)&clpYData );
+    clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&clpYData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( kernel_solve, 4, sizeof(cl_mem), (void *)&clpNData );
+    clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clpNData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
 
-    clStatus = clEnqueueNDRangeKernel( kEnv.mpkCmdQueue, kernel_solve, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+    clStatus = clEnqueueNDRangeKernel(
+        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
     CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
     clFinish( kEnv.mpkCmdQueue );
     for ( uint i = 0; i < nDim; i++ )
@@ -2482,8 +2487,6 @@ bool OclCalc::oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclM
     clStatus = clReleaseMemObject( mpClmemRightData );
     CHECK_OPENCL( clStatus, "clReleaseMemObject" );
     mpClmemRightData = NULL;
-    clStatus = clReleaseKernel( kernel_solve );
-    CHECK_OPENCL( clStatus, "clReleaseKernel" );
     clStatus = clReleaseMemObject( clpPData );
     CHECK_OPENCL( clStatus, "clReleaseKernel" );
     clStatus = clReleaseMemObject( clpYData );
commit 7cda070f05aaf120d3045a9235bfc249500a1034
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date:   Tue Sep 17 14:59:49 2013 -0400

    Compile kernel when fetching the Kernel instance.
    
    To make the code a bit cleaner.
    
    Change-Id: Id129cea834e950e422e55e6c2504c1f88c5dbeab

diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index c468669..ce0f662 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -152,15 +152,25 @@ int OpenclDevice::setKernelEnv( KernelEnv *envInfo )
     return 1;
 }
 
-Kernel* OpenclDevice::checkKernelName( const char *kernelName )
+Kernel* OpenclDevice::fetchKernel( const char *kernelName )
 {
+    cl_int nStatus;
     for (size_t i = 0, n = gpuEnv.maKernels.size(); i < n; ++i)
     {
         Kernel* pKernel = &gpuEnv.maKernels[i];
         if (!strcasecmp(kernelName, pKernel->mpName))
         {
             printf("found the kernel named %s.\n", kernelName);
-            return pKernel;
+            if (!pKernel->mpKernel && gpuEnv.mpArryPrograms[0])
+            {
+                pKernel->mpKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelName, &nStatus);
+                if (nStatus != CL_SUCCESS)
+                    pKernel->mpKernel = NULL;
+
+                printf("Kernel named '%s' has been compiled\n", kernelName);
+            }
+
+            return pKernel->mpKernel ?  pKernel : NULL;
         }
     }
 
@@ -1000,15 +1010,10 @@ bool OclCalc::oclHostArithmeticOperator64Bits( const char* aKernelName, double *
 {
     cl_int clStatus = 0;
     size_t global_work_size[1];
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
     clFinish( kEnv.mpkCmdQueue );
     cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE,
                           nRowSize * sizeof(double), NULL, &clStatus);
@@ -1048,16 +1053,10 @@ bool OclCalc::oclMoreColHostArithmeticOperator64Bits( int nDataSize,int neOpSize
     cl_int clStatus = 0;
     size_t global_work_size[1];
     const char *aKernelName = "oclMoreColArithmeticOperator";
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(double), NULL, &clStatus );
     CHECK_OPENCL( clStatus, "clCreateBuffer" );
     clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
@@ -1095,15 +1094,10 @@ bool OclCalc::oclHostArithmeticStash64Bits( const char* aKernelName, const doubl
     cl_int clStatus = 0;
     size_t global_work_size[1];
     setKernelEnv( &kEnv );
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel( kEnv.mpkProgram, aKernelName, &clStatus );
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
     clFinish( kEnv.mpkCmdQueue );
 
     cl_mem clLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
@@ -1149,16 +1143,10 @@ bool OclCalc::oclHostFormulaStash64Bits( const char* aKernelName, const double*
     cl_int clStatus = 0;
     size_t global_work_size[1];
     setKernelEnv( &kEnv );
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     cl_mem clSrcData   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
         nBufferSize * sizeof(double), (void *)dpSrcData, &clStatus );
     CHECK_OPENCL( clStatus, "clCreateBuffer" );
@@ -1205,16 +1193,10 @@ bool OclCalc::oclHostFormulaStatistics64Bits( const char* aKernelName, double *&
 {
     cl_int clStatus = 0;
     size_t global_work_size[1];
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus );
     CHECK_OPENCL( clStatus, "clCreateBuffer" );
     clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData);
@@ -1252,18 +1234,12 @@ bool OclCalc::oclHostFormulaStatistics64Bits( const char* aKernelName, double *&
 bool OclCalc::oclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize )
 {
     const char *cpKernelName = "oclFormulaCount";
-    Kernel* pKernel = checkKernelName(cpKernelName);
+    Kernel* pKernel = fetchKernel(cpKernelName);
     if (!pKernel)
         return false;
 
     cl_int clStatus;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     size_t global_work_size[1];
     clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
     CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
@@ -1310,15 +1286,10 @@ bool OclCalc::oclHostFormulaSumProduct64Bits( double *dpSumProMergeLfData, doubl
     memset(dpOutput,0,nSize);
     const char *cpFirstKernelName = "oclSignedMul";
     const char *cpSecondKernelName = "oclFormulaSumproduct";
-    Kernel* pKernel1 = checkKernelName(cpFirstKernelName);
+    Kernel* pKernel1 = fetchKernel(cpFirstKernelName);
     if (!pKernel1)
         return false;
 
-    if (!pKernel1->mpKernel)
-    {
-        pKernel1->mpKernel = clCreateKernel(kEnv.mpkProgram, cpFirstKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
     clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, dpSumProMergeLfData, 0, NULL, NULL );
     CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
     clFinish(kEnv.mpkCmdQueue);
@@ -1348,15 +1319,10 @@ bool OclCalc::oclHostFormulaSumProduct64Bits( double *dpSumProMergeLfData, doubl
     clStatus = clReleaseMemObject( mpClmemMergeRtData );
     CHECK_OPENCL( clStatus, "clReleaseMemObject" );
 
-    Kernel* pKernel2 = checkKernelName(cpSecondKernelName);
+    Kernel* pKernel2 = fetchKernel(cpSecondKernelName);
     if (!pKernel2)
         return false;
 
-    if (!pKernel2->mpKernel)
-    {
-        pKernel2->mpKernel = clCreateKernel(kEnv.mpkProgram, cpSecondKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
     cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(double), NULL, &clStatus );
     CHECK_OPENCL( clStatus, "clCreateBuffer" );
     cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize;
@@ -1576,16 +1542,10 @@ bool OclCalc::oclHostArithmeticOperator32Bits( const char* aKernelName,double *r
     cl_int clStatus = 0;
     size_t global_work_size[1];
 
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus );
     CHECK_OPENCL( clStatus, "clCreateBuffer" );
     clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
@@ -1620,16 +1580,10 @@ bool OclCalc::oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize
     cl_int clStatus = 0;
     size_t global_work_size[1];
     const char *aKernelName = "oclMoreColArithmeticOperator";
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel( kEnv.mpkProgram, aKernelName, &clStatus );
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus );
     CHECK_OPENCL( clStatus, "clCreateBuffer" );
     clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
@@ -1665,18 +1619,11 @@ bool OclCalc::oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize
 
 bool OclCalc::oclHostFormulaStatistics32Bits(const char* aKernelName,double *output,int size)
 {
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
     cl_int clStatus = 0;
-
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     size_t global_work_size[1];
 
     cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(float), NULL, &clStatus );
@@ -1713,16 +1660,10 @@ bool OclCalc::oclHostArithmeticStash32Bits( const char* aKernelName, const doubl
     cl_int clStatus = 0;
     size_t global_work_size[1];
     setKernelEnv( &kEnv );
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     float *fpLeftData = (float *)malloc( sizeof(float) * nRowSize );
     float *fpRightData = (float *)malloc( sizeof(float) * nRowSize );
     float *fpResult = (float *)malloc( sizeof(float) * nRowSize );
@@ -1782,16 +1723,10 @@ bool OclCalc::oclHostFormulaStash32Bits( const char* aKernelName, const double*
     cl_int clStatus = 0;
     size_t global_work_size[1];
     setKernelEnv( &kEnv );
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     float *fpSrcData = (float *)malloc( sizeof(float) * nBufferSize );
     float *fpResult = (float *)malloc( sizeof(float) * size );
     for(int i=0;i<nBufferSize;i++)
@@ -1848,18 +1783,13 @@ bool OclCalc::oclHostFormulaStash32Bits( const char* aKernelName, const double*
 bool OclCalc::oclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize )
 {
     const char *cpKernelName = "oclFormulaCount";
-    Kernel* pKernel = checkKernelName(cpKernelName);
+    Kernel* pKernel = fetchKernel(cpKernelName);
     if (!pKernel)
         return false;
 
     cl_int clStatus;
     size_t global_work_size[1];
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
     clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
     CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
     clFinish( kEnv.mpkCmdQueue );
@@ -1909,16 +1839,10 @@ bool OclCalc::oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float
     memset(dpOutput,0,nSize);
     const char *cpFirstKernelName = "oclSignedMul";
     const char *cpSecondKernelName = "oclFormulaSumproduct";
-    Kernel* pKernel1 = checkKernelName(cpFirstKernelName);
+    Kernel* pKernel1 = fetchKernel(cpFirstKernelName);
     if (!pKernel1)
         return false;
 
-    if (!pKernel1->mpKernel)
-    {
-        pKernel1->mpKernel = clCreateKernel(kEnv.mpkProgram, cpFirstKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, fpSumProMergeLfData, 0, NULL, NULL );
     CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
     clFinish( kEnv.mpkCmdQueue );
@@ -1947,15 +1871,10 @@ bool OclCalc::oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float
     clStatus = clReleaseMemObject( mpClmemMergeRtData );
     CHECK_OPENCL( clStatus, "clReleaseMemObject" );
 
-    Kernel* pKernel2 = checkKernelName(cpSecondKernelName);
+    Kernel* pKernel2 = fetchKernel(cpSecondKernelName);
     if (!pKernel2)
         return false;
 
-    if (!pKernel2->mpKernel)
-    {
-        pKernel2->mpKernel = clCreateKernel(kEnv.mpkProgram, cpSecondKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
     cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(float), NULL, &clStatus );
     CHECK_OPENCL( clStatus, "clCreateBuffer" );
     cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize;
@@ -2065,18 +1984,11 @@ bool OclCalc::oclGroundWaterGroup( uint *eOp, uint eOpNum, const double *pOpArra
             break;
         }
     }
-    Kernel* pKernel = checkKernelName(kernelName);
+    Kernel* pKernel = fetchKernel(kernelName);
     if (!pKernel)
         return false;
 
     cl_int clStatus;
-
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, kernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     size_t global_work_size[1];
     if ( ( eOpNum == 1 ) && ( eOp[0] == ocSub ) )
         subFlag = true;
@@ -2239,27 +2151,13 @@ double *OclCalc::oclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, co
         assert( false );
     }
 
-    Kernel* pKernel = checkKernelName(kernelName);
+    Kernel* pKernel = fetchKernel(kernelName);
     if (!pKernel)
         return NULL;
 
     cl_int clStatus;
     size_t global_work_size[1];
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, kernelName, &clStatus);
-        CHECK_OPENCL_PTR( clStatus, "clCreateKernel" );
-    }
-
-    if (!pKernel->mpKernel)
-    {
-        fprintf( stderr, "\n\n*** Error: Could not clCreateKernel '%s' ***\n\n", kernelName );
-        fprintf( stderr, "\tprobably your binary cache is out of date\n"
-                "\tplease delete kernel-*.bin in your cwd\n\n\n" );
-        return NULL;
-    }
-
     // Ugh - horrible redundant copying ...
 
     cl_mem valuesCl = NULL, subtractCl = NULL, outputCl = NULL;
@@ -2411,16 +2309,10 @@ bool OclCalc::oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOcl
     clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpNData, npDim, 0, NULL, NULL );
 
     CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
-
     clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
     clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clpPData);
@@ -2527,15 +2419,10 @@ bool OclCalc::oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclM
     for ( uint i = 0; i < nDim; i++ )
         npDim[i] = nDim;
     clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpNData, npDim, 0, NULL, NULL );
-    Kernel* pKernel = checkKernelName(aKernelName);
+    Kernel* pKernel = fetchKernel(aKernelName);
     if (!pKernel)
         return false;
 
-    if (!pKernel->mpKernel)
-    {
-        pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
-        CHECK_OPENCL( clStatus, "clCreateKernel" );
-    }
     clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
     clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clpPData);
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index bf76e51..dfa8fbb 100644
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -177,7 +177,7 @@ public:
 
     static int initOpenclAttr( OpenCLEnv * env );
     static int setKernelEnv( KernelEnv *envInfo );
-    static Kernel* checkKernelName( const char *kernelName );
+    static Kernel* fetchKernel( const char *kernelName );
 
     static int getOpenclState();
     static void setOpenclState( int state );
commit 2a41e0b317a66e308fbb1947030f776013f844c2
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date:   Tue Sep 17 14:43:59 2013 -0400

    Compile kernels for real.
    
    Change-Id: I7c5e6707e6f733b26d5bb6d6b0d48b0f338625bc

diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 4c29223..5697b1b 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -1096,9 +1096,9 @@ SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice(const OUString* pDeviceId,
     return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect);
 }
 
-SAL_DLLPUBLIC_EXPORT void compileKernels()
+SAL_DLLPUBLIC_EXPORT void compileKernels(const OUString* pDeviceId)
 {
-    sc::opencl::compileKernels();
+    sc::opencl::compileKernels(pDeviceId);
 }
 
 }
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index a0c1e7a..c468669 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -2851,8 +2851,33 @@ bool switchOpenclDevice(const OUString* pDevice, bool bAutoSelect)
     return !OpenclDevice::initOpenclRunEnv(0);
 }
 
-void compileKernels()
+void compileKernels(const OUString* pDeviceId)
 {
+    if (!pDeviceId)
+        return;
+
+    if (pDeviceId->isEmpty())
+        return;
+
+    if (!switchOpenclDevice(pDeviceId, false))
+        return;
+
+    cl_program pProgram = OpenclDevice::gpuEnv.mpArryPrograms[0];
+    if (!pProgram)
+        return;
+
+    cl_int nStatus;
+    for (size_t i = 0, n = OpenclDevice::gpuEnv.maKernels.size(); i < n; ++i)
+    {
+        Kernel& r = OpenclDevice::gpuEnv.maKernels[i];
+        if (r.mpKernel)
+            continue;
+
+        r.mpKernel = clCreateKernel(pProgram, r.mpName, &nStatus);
+        if (nStatus != CL_SUCCESS)
+            r.mpKernel = NULL;
+    }
+
 }
 
 }}
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index 30e3838..bf76e51 100644
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -266,7 +266,7 @@ const std::vector<OpenclPlatformInfo>& fillOpenCLInfo();
  */
 bool switchOpenclDevice(const OUString* pDeviceId, bool bAutoSelect);
 
-void compileKernels();
+void compileKernels(const OUString* pDeviceId);
 
 }}
 
diff --git a/sc/source/core/tool/formulagroup.cxx b/sc/source/core/tool/formulagroup.cxx
index c07d280..6a20a0c 100644
--- a/sc/source/core/tool/formulagroup.cxx
+++ b/sc/source/core/tool/formulagroup.cxx
@@ -338,7 +338,7 @@ typedef FormulaGroupInterpreter* (*__createFormulaGroupOpenCLInterpreter)(void);
 typedef size_t (*__getOpenCLPlatformCount)(void);
 typedef void (*__fillOpenCLInfo)(OpenclPlatformInfo*, size_t);
 typedef bool (*__switchOpenClDevice)(const OUString*, bool);
-typedef void (*__compileKernels)(void);
+typedef void (*__compileKernels)(const OUString*);
 
 #endif
 
@@ -460,7 +460,8 @@ void FormulaGroupInterpreter::switchOpenCLDevice(const OUString& rDeviceId, bool
 
 void FormulaGroupInterpreter::compileKernels()
 {
-    if (!ScInterpreter::GetGlobalConfig().mbOpenCLEnabled)
+    const ScCalcConfig& rConfig = ScInterpreter::GetGlobalConfig();
+    if (!rConfig.mbOpenCLEnabled)
         // OpenCL is not enabled.
         return;
 
@@ -472,7 +473,7 @@ void FormulaGroupInterpreter::compileKernels()
     if (!fn)
         return;
 
-    reinterpret_cast<__compileKernels>(fn)();
+    reinterpret_cast<__compileKernels>(fn)(&rConfig.maOpenCLDevice);
 }
 
 void FormulaGroupInterpreter::generateRPNCode(ScDocument& rDoc, const ScAddress& rPos, ScTokenArray& rCode)
commit 4211e6cd2e3fd91bc6276576af76648de4b07752
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date:   Tue Sep 17 12:41:07 2013 -0400

    Set up a thread to compile OpenCL kernels during file load.
    
    It's still a no-op & we need to have a smart mechanism to conditionally
    trigger it rather than running it in all documents.
    
    Change-Id: Ia875ebb9405b5de5c5d31418de84c5ca7a62f302

diff --git a/sc/Library_scfilt.mk b/sc/Library_scfilt.mk
index 75ee0fe..546b474 100644
--- a/sc/Library_scfilt.mk
+++ b/sc/Library_scfilt.mk
@@ -45,6 +45,7 @@ $(eval $(call gb_Library_use_libraries,scfilt,\
 	msfilter \
 	oox \
 	sal \
+	salhelper \
 	sax \
 	sb \
 	sc \
@@ -130,6 +131,7 @@ $(eval $(call gb_Library_add_exception_objects,scfilt,\
 	sc/source/filter/excel/xltools \
 	sc/source/filter/excel/xltracer \
 	sc/source/filter/excel/xlview \
+	sc/source/filter/ftools/clkernelthread \
 	sc/source/filter/ftools/fapihelper \
 	sc/source/filter/ftools/fprogressbar \
 	sc/source/filter/ftools/ftools \
diff --git a/sc/inc/formulagroup.hxx b/sc/inc/formulagroup.hxx
index bfd4bbc..c6f32ac 100644
--- a/sc/inc/formulagroup.hxx
+++ b/sc/inc/formulagroup.hxx
@@ -56,6 +56,7 @@ class SC_DLLPUBLIC FormulaGroupInterpreter
     static FormulaGroupInterpreter *getStatic();
     static void fillOpenCLInfo(std::vector<OpenclPlatformInfo>& rPlatforms);
     static void switchOpenCLDevice(const OUString& rDeviceId, bool bAutoSelect);
+    static void compileKernels();
 
     virtual ScMatrixRef inverseMatrix(const ScMatrix& rMat) = 0;
     virtual bool interpret(ScDocument& rDoc, const ScAddress& rTopPos, const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode) = 0;
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 567d033..4c29223 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -1096,6 +1096,11 @@ SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice(const OUString* pDeviceId,
     return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect);
 }
 
+SAL_DLLPUBLIC_EXPORT void compileKernels()
+{
+    sc::opencl::compileKernels();
+}
+
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 3994029..a0c1e7a 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -2851,6 +2851,10 @@ bool switchOpenclDevice(const OUString* pDevice, bool bAutoSelect)
     return !OpenclDevice::initOpenclRunEnv(0);
 }
 
+void compileKernels()
+{
+}
+
 }}
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index 057f02b..30e3838 100644
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -266,6 +266,8 @@ const std::vector<OpenclPlatformInfo>& fillOpenCLInfo();
  */
 bool switchOpenclDevice(const OUString* pDeviceId, bool bAutoSelect);
 
+void compileKernels();
+
 }}
 
 #endif
diff --git a/sc/source/core/tool/formulagroup.cxx b/sc/source/core/tool/formulagroup.cxx
index dd765a8..c07d280 100644
--- a/sc/source/core/tool/formulagroup.cxx
+++ b/sc/source/core/tool/formulagroup.cxx
@@ -338,6 +338,7 @@ typedef FormulaGroupInterpreter* (*__createFormulaGroupOpenCLInterpreter)(void);
 typedef size_t (*__getOpenCLPlatformCount)(void);
 typedef void (*__fillOpenCLInfo)(OpenclPlatformInfo*, size_t);
 typedef bool (*__switchOpenClDevice)(const OUString*, bool);
+typedef void (*__compileKernels)(void);
 
 #endif
 
@@ -457,6 +458,23 @@ void FormulaGroupInterpreter::switchOpenCLDevice(const OUString& rDeviceId, bool
 #endif
 }
 
+void FormulaGroupInterpreter::compileKernels()
+{
+    if (!ScInterpreter::GetGlobalConfig().mbOpenCLEnabled)
+        // OpenCL is not enabled.
+        return;
+
+    osl::Module* pModule = getOpenCLModule();
+    if (!pModule)
+        return;
+
+    oslGenericFunction fn = pModule->getFunctionSymbol("compileKernels");
+    if (!fn)
+        return;
+
+    reinterpret_cast<__compileKernels>(fn)();
+}
+
 void FormulaGroupInterpreter::generateRPNCode(ScDocument& rDoc, const ScAddress& rPos, ScTokenArray& rCode)
 {
     // First, generate an RPN (reverse polish notation) token array.
diff --git a/sc/source/filter/excel/excform.cxx b/sc/source/filter/excel/excform.cxx
index c99f67a..49ca6ca 100644
--- a/sc/source/filter/excel/excform.cxx
+++ b/sc/source/filter/excel/excform.cxx
@@ -136,6 +136,11 @@ void ImportExcel::Formula(
         }
     }
 
+    if (!mxCLKernelThread.is())
+    {
+        mxCLKernelThread.set(new sc::CLBuildKernelThread);
+        mxCLKernelThread->launch();
+    }
     ConvErr eErr = pFormConv->Convert( pResult, maStrm, nFormLen, true, FT_CellFormula);
 
     ScFormulaCell* pCell = NULL;
diff --git a/sc/source/filter/excel/read.cxx b/sc/source/filter/excel/read.cxx
index ae89246..14396f6 100644
--- a/sc/source/filter/excel/read.cxx
+++ b/sc/source/filter/excel/read.cxx
@@ -763,6 +763,9 @@ FltError ImportExcel::Read( void )
             eLastErr = SCWARN_IMPORT_COLUMN_OVERFLOW;
     }
 
+    if (mxCLKernelThread.is())
+        mxCLKernelThread->join();
+
     return eLastErr;
 }
 
@@ -1316,6 +1319,9 @@ FltError ImportExcel8::Read( void )
             GetPivotTableManager().MaybeRefreshPivotTables();
     }
 
+    if (mxCLKernelThread.is())
+        mxCLKernelThread->join();
+
     return eLastErr;
 }
 
diff --git a/sc/source/filter/ftools/clkernelthread.cxx b/sc/source/filter/ftools/clkernelthread.cxx
new file mode 100644
index 0000000..f6d8c63
--- /dev/null
+++ b/sc/source/filter/ftools/clkernelthread.cxx
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "clkernelthread.hxx"
+#include "formulagroup.hxx"
+
+using namespace std;
+
+namespace sc {
+
+CLBuildKernelThread::CLBuildKernelThread() : salhelper::Thread("opencl-build-kernel-thread") {}
+
+CLBuildKernelThread::~CLBuildKernelThread() {}
+
+void CLBuildKernelThread::execute()
+{
+    sc::FormulaGroupInterpreter::compileKernels();
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/inc/clkernelthread.hxx b/sc/source/filter/inc/clkernelthread.hxx
new file mode 100644
index 0000000..32586e7
--- /dev/null
+++ b/sc/source/filter/inc/clkernelthread.hxx
@@ -0,0 +1,26 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "salhelper/thread.hxx"
+
+namespace sc {
+
+class CLBuildKernelThread : public salhelper::Thread
+{
+public:
+    CLBuildKernelThread();
+    virtual ~CLBuildKernelThread();
+
+protected:
+    virtual void execute();
+};
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/inc/imp_op.hxx b/sc/source/filter/inc/imp_op.hxx
index 8e004f3..613ae30 100644
--- a/sc/source/filter/inc/imp_op.hxx
+++ b/sc/source/filter/inc/imp_op.hxx
@@ -30,6 +30,9 @@
 #include "otlnbuff.hxx"
 #include "colrowst.hxx"
 #include "excdefs.hxx"
+#include "rtl/ref.hxx"
+#include "clkernelthread.hxx"
+
 #include <boost/shared_ptr.hpp>
 #include <boost/ptr_container/ptr_vector.hpp>
 
@@ -79,6 +82,8 @@ private:
 class ImportExcel : public ImportTyp, protected XclImpRoot
 {
 protected:
+    rtl::Reference<sc::CLBuildKernelThread> mxCLKernelThread;
+
     static const double     fExcToTwips;        // Umrechnung 1/256 Zeichen -> Twips
 
     RootData*               pExcRoot;


More information about the Libreoffice-commits mailing list