[Libreoffice-commits] core.git: Branch 'feature/calc-pluggable-opencl' - 4 commits - sc/inc sc/Library_scfilt.mk sc/source
Kohei Yoshida
kohei.yoshida at collabora.com
Tue Sep 17 12:13:22 PDT 2013
sc/Library_scfilt.mk | 2
sc/inc/formulagroup.hxx | 1
sc/source/core/opencl/formulagroupcl.cxx | 5
sc/source/core/opencl/openclwrapper.cxx | 251 +++++++++--------------------
sc/source/core/opencl/openclwrapper.hxx | 4
sc/source/core/tool/formulagroup.cxx | 19 ++
sc/source/filter/excel/excform.cxx | 5
sc/source/filter/excel/read.cxx | 6
sc/source/filter/ftools/clkernelthread.cxx | 28 +++
sc/source/filter/inc/clkernelthread.hxx | 26 +++
sc/source/filter/inc/imp_op.hxx | 5
11 files changed, 185 insertions(+), 167 deletions(-)
New commits:
commit 945c7e6ab8c36307acb61e807c75d278179a5ed4
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date: Tue Sep 17 15:14:00 2013 -0400
Let's treat oclMatrixSolve equally. No special treatment for this guy.
Change-Id: I79d36ad7c95bf4cc8cd6bb4fd55dcedd5cd70684
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index ce0f662..7a8b205 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -125,7 +125,9 @@ const char* pKernelNames[] = {
"oclMaxDiv",
"oclAverageDiv"
"oclMinDiv",
- "oclSub"
+ "oclSub",
+
+ "oclMatrixSolve"
};
}
@@ -2341,20 +2343,22 @@ bool OclCalc::oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOcl
}
clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpOclMatrixSrc, 0, NULL, NULL );
CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- cl_kernel kernel_solve = clCreateKernel( kEnv.mpkProgram, "oclMatrixSolve", &clStatus );
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- clStatus = clSetKernelArg( kernel_solve, 0, sizeof(cl_mem), (void *)&mpClmemLeftData );
+ Kernel* pKernelMatrix = fetchKernel("oclMatrixSolve");
+ if (!pKernelMatrix)
+ return false;
+
+ clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg( kernel_solve, 1, sizeof(cl_mem), (void *)&mpClmemRightData );
+ clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg( kernel_solve, 2, sizeof(cl_mem), (void *)&clpPData );
+ clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpPData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg( kernel_solve, 3, sizeof(cl_mem), (void *)&clpYData );
+ clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&clpYData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg( kernel_solve, 4, sizeof(cl_mem), (void *)&clpNData );
+ clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clpNData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clEnqueueNDRangeKernel( kEnv.mpkCmdQueue, kernel_solve, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+ clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
clFinish( kEnv.mpkCmdQueue );
for ( uint i = 0; i < nDim; i++ )
@@ -2370,8 +2374,6 @@ bool OclCalc::oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOcl
clStatus = clReleaseMemObject( mpClmemRightData );
CHECK_OPENCL( clStatus, "clReleaseMemObject" );
mpClmemRightData = NULL;
- clStatus = clReleaseKernel( kernel_solve );
- CHECK_OPENCL( clStatus, "clReleaseKernel" );
clStatus = clReleaseMemObject( clpPData );
CHECK_OPENCL( clStatus, "clReleaseKernel" );
clStatus = clReleaseMemObject( clpYData );
@@ -2453,20 +2455,23 @@ bool OclCalc::oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclM
clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, fpOclMatrixSrc, 0, NULL, NULL );
CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- cl_kernel kernel_solve = clCreateKernel( kEnv.mpkProgram, "oclMatrixSolve", &clStatus );
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- clStatus = clSetKernelArg( kernel_solve, 0, sizeof(cl_mem), (void *)&mpClmemLeftData );
+ Kernel* pKernelMatrix = fetchKernel("oclMatrixSolve");
+ if (!pKernelMatrix)
+ return false;
+
+ clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg( kernel_solve, 1, sizeof(cl_mem), (void *)&mpClmemRightData );
+ clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg( kernel_solve, 2, sizeof(cl_mem), (void *)&clpPData );
+ clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpPData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg( kernel_solve, 3, sizeof(cl_mem), (void *)&clpYData );
+ clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&clpYData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg( kernel_solve, 4, sizeof(cl_mem), (void *)&clpNData );
+ clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clpNData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clEnqueueNDRangeKernel( kEnv.mpkCmdQueue, kernel_solve, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+ clStatus = clEnqueueNDRangeKernel(
+ kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
clFinish( kEnv.mpkCmdQueue );
for ( uint i = 0; i < nDim; i++ )
@@ -2482,8 +2487,6 @@ bool OclCalc::oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclM
clStatus = clReleaseMemObject( mpClmemRightData );
CHECK_OPENCL( clStatus, "clReleaseMemObject" );
mpClmemRightData = NULL;
- clStatus = clReleaseKernel( kernel_solve );
- CHECK_OPENCL( clStatus, "clReleaseKernel" );
clStatus = clReleaseMemObject( clpPData );
CHECK_OPENCL( clStatus, "clReleaseKernel" );
clStatus = clReleaseMemObject( clpYData );
commit 7cda070f05aaf120d3045a9235bfc249500a1034
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date: Tue Sep 17 14:59:49 2013 -0400
Compile kernel when fetching the Kernel instance.
To make the code a bit cleaner.
Change-Id: Id129cea834e950e422e55e6c2504c1f88c5dbeab
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index c468669..ce0f662 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -152,15 +152,25 @@ int OpenclDevice::setKernelEnv( KernelEnv *envInfo )
return 1;
}
-Kernel* OpenclDevice::checkKernelName( const char *kernelName )
+Kernel* OpenclDevice::fetchKernel( const char *kernelName )
{
+ cl_int nStatus;
for (size_t i = 0, n = gpuEnv.maKernels.size(); i < n; ++i)
{
Kernel* pKernel = &gpuEnv.maKernels[i];
if (!strcasecmp(kernelName, pKernel->mpName))
{
printf("found the kernel named %s.\n", kernelName);
- return pKernel;
+ if (!pKernel->mpKernel && gpuEnv.mpArryPrograms[0])
+ {
+ pKernel->mpKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelName, &nStatus);
+ if (nStatus != CL_SUCCESS)
+ pKernel->mpKernel = NULL;
+
+ printf("Kernel named '%s' has been compiled\n", kernelName);
+ }
+
+ return pKernel->mpKernel ? pKernel : NULL;
}
}
@@ -1000,15 +1010,10 @@ bool OclCalc::oclHostArithmeticOperator64Bits( const char* aKernelName, double *
{
cl_int clStatus = 0;
size_t global_work_size[1];
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
clFinish( kEnv.mpkCmdQueue );
cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE,
nRowSize * sizeof(double), NULL, &clStatus);
@@ -1048,16 +1053,10 @@ bool OclCalc::oclMoreColHostArithmeticOperator64Bits( int nDataSize,int neOpSize
cl_int clStatus = 0;
size_t global_work_size[1];
const char *aKernelName = "oclMoreColArithmeticOperator";
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(double), NULL, &clStatus );
CHECK_OPENCL( clStatus, "clCreateBuffer" );
clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
@@ -1095,15 +1094,10 @@ bool OclCalc::oclHostArithmeticStash64Bits( const char* aKernelName, const doubl
cl_int clStatus = 0;
size_t global_work_size[1];
setKernelEnv( &kEnv );
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel( kEnv.mpkProgram, aKernelName, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
clFinish( kEnv.mpkCmdQueue );
cl_mem clLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
@@ -1149,16 +1143,10 @@ bool OclCalc::oclHostFormulaStash64Bits( const char* aKernelName, const double*
cl_int clStatus = 0;
size_t global_work_size[1];
setKernelEnv( &kEnv );
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
cl_mem clSrcData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
nBufferSize * sizeof(double), (void *)dpSrcData, &clStatus );
CHECK_OPENCL( clStatus, "clCreateBuffer" );
@@ -1205,16 +1193,10 @@ bool OclCalc::oclHostFormulaStatistics64Bits( const char* aKernelName, double *&
{
cl_int clStatus = 0;
size_t global_work_size[1];
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus );
CHECK_OPENCL( clStatus, "clCreateBuffer" );
clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData);
@@ -1252,18 +1234,12 @@ bool OclCalc::oclHostFormulaStatistics64Bits( const char* aKernelName, double *&
bool OclCalc::oclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize )
{
const char *cpKernelName = "oclFormulaCount";
- Kernel* pKernel = checkKernelName(cpKernelName);
+ Kernel* pKernel = fetchKernel(cpKernelName);
if (!pKernel)
return false;
cl_int clStatus;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
size_t global_work_size[1];
clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
@@ -1310,15 +1286,10 @@ bool OclCalc::oclHostFormulaSumProduct64Bits( double *dpSumProMergeLfData, doubl
memset(dpOutput,0,nSize);
const char *cpFirstKernelName = "oclSignedMul";
const char *cpSecondKernelName = "oclFormulaSumproduct";
- Kernel* pKernel1 = checkKernelName(cpFirstKernelName);
+ Kernel* pKernel1 = fetchKernel(cpFirstKernelName);
if (!pKernel1)
return false;
- if (!pKernel1->mpKernel)
- {
- pKernel1->mpKernel = clCreateKernel(kEnv.mpkProgram, cpFirstKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, dpSumProMergeLfData, 0, NULL, NULL );
CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
clFinish(kEnv.mpkCmdQueue);
@@ -1348,15 +1319,10 @@ bool OclCalc::oclHostFormulaSumProduct64Bits( double *dpSumProMergeLfData, doubl
clStatus = clReleaseMemObject( mpClmemMergeRtData );
CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- Kernel* pKernel2 = checkKernelName(cpSecondKernelName);
+ Kernel* pKernel2 = fetchKernel(cpSecondKernelName);
if (!pKernel2)
return false;
- if (!pKernel2->mpKernel)
- {
- pKernel2->mpKernel = clCreateKernel(kEnv.mpkProgram, cpSecondKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(double), NULL, &clStatus );
CHECK_OPENCL( clStatus, "clCreateBuffer" );
cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize;
@@ -1576,16 +1542,10 @@ bool OclCalc::oclHostArithmeticOperator32Bits( const char* aKernelName,double *r
cl_int clStatus = 0;
size_t global_work_size[1];
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus );
CHECK_OPENCL( clStatus, "clCreateBuffer" );
clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
@@ -1620,16 +1580,10 @@ bool OclCalc::oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize
cl_int clStatus = 0;
size_t global_work_size[1];
const char *aKernelName = "oclMoreColArithmeticOperator";
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel( kEnv.mpkProgram, aKernelName, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus );
CHECK_OPENCL( clStatus, "clCreateBuffer" );
clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
@@ -1665,18 +1619,11 @@ bool OclCalc::oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize
bool OclCalc::oclHostFormulaStatistics32Bits(const char* aKernelName,double *output,int size)
{
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
cl_int clStatus = 0;
-
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
size_t global_work_size[1];
cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(float), NULL, &clStatus );
@@ -1713,16 +1660,10 @@ bool OclCalc::oclHostArithmeticStash32Bits( const char* aKernelName, const doubl
cl_int clStatus = 0;
size_t global_work_size[1];
setKernelEnv( &kEnv );
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
float *fpLeftData = (float *)malloc( sizeof(float) * nRowSize );
float *fpRightData = (float *)malloc( sizeof(float) * nRowSize );
float *fpResult = (float *)malloc( sizeof(float) * nRowSize );
@@ -1782,16 +1723,10 @@ bool OclCalc::oclHostFormulaStash32Bits( const char* aKernelName, const double*
cl_int clStatus = 0;
size_t global_work_size[1];
setKernelEnv( &kEnv );
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
float *fpSrcData = (float *)malloc( sizeof(float) * nBufferSize );
float *fpResult = (float *)malloc( sizeof(float) * size );
for(int i=0;i<nBufferSize;i++)
@@ -1848,18 +1783,13 @@ bool OclCalc::oclHostFormulaStash32Bits( const char* aKernelName, const double*
bool OclCalc::oclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize )
{
const char *cpKernelName = "oclFormulaCount";
- Kernel* pKernel = checkKernelName(cpKernelName);
+ Kernel* pKernel = fetchKernel(cpKernelName);
if (!pKernel)
return false;
cl_int clStatus;
size_t global_work_size[1];
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, cpKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
clFinish( kEnv.mpkCmdQueue );
@@ -1909,16 +1839,10 @@ bool OclCalc::oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float
memset(dpOutput,0,nSize);
const char *cpFirstKernelName = "oclSignedMul";
const char *cpSecondKernelName = "oclFormulaSumproduct";
- Kernel* pKernel1 = checkKernelName(cpFirstKernelName);
+ Kernel* pKernel1 = fetchKernel(cpFirstKernelName);
if (!pKernel1)
return false;
- if (!pKernel1->mpKernel)
- {
- pKernel1->mpKernel = clCreateKernel(kEnv.mpkProgram, cpFirstKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, fpSumProMergeLfData, 0, NULL, NULL );
CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
clFinish( kEnv.mpkCmdQueue );
@@ -1947,15 +1871,10 @@ bool OclCalc::oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float
clStatus = clReleaseMemObject( mpClmemMergeRtData );
CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- Kernel* pKernel2 = checkKernelName(cpSecondKernelName);
+ Kernel* pKernel2 = fetchKernel(cpSecondKernelName);
if (!pKernel2)
return false;
- if (!pKernel2->mpKernel)
- {
- pKernel2->mpKernel = clCreateKernel(kEnv.mpkProgram, cpSecondKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(float), NULL, &clStatus );
CHECK_OPENCL( clStatus, "clCreateBuffer" );
cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize;
@@ -2065,18 +1984,11 @@ bool OclCalc::oclGroundWaterGroup( uint *eOp, uint eOpNum, const double *pOpArra
break;
}
}
- Kernel* pKernel = checkKernelName(kernelName);
+ Kernel* pKernel = fetchKernel(kernelName);
if (!pKernel)
return false;
cl_int clStatus;
-
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, kernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
size_t global_work_size[1];
if ( ( eOpNum == 1 ) && ( eOp[0] == ocSub ) )
subFlag = true;
@@ -2239,27 +2151,13 @@ double *OclCalc::oclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, co
assert( false );
}
- Kernel* pKernel = checkKernelName(kernelName);
+ Kernel* pKernel = fetchKernel(kernelName);
if (!pKernel)
return NULL;
cl_int clStatus;
size_t global_work_size[1];
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, kernelName, &clStatus);
- CHECK_OPENCL_PTR( clStatus, "clCreateKernel" );
- }
-
- if (!pKernel->mpKernel)
- {
- fprintf( stderr, "\n\n*** Error: Could not clCreateKernel '%s' ***\n\n", kernelName );
- fprintf( stderr, "\tprobably your binary cache is out of date\n"
- "\tplease delete kernel-*.bin in your cwd\n\n\n" );
- return NULL;
- }
-
// Ugh - horrible redundant copying ...
cl_mem valuesCl = NULL, subtractCl = NULL, outputCl = NULL;
@@ -2411,16 +2309,10 @@ bool OclCalc::oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOcl
clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpNData, npDim, 0, NULL, NULL );
CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
-
clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clpPData);
@@ -2527,15 +2419,10 @@ bool OclCalc::oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclM
for ( uint i = 0; i < nDim; i++ )
npDim[i] = nDim;
clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpNData, npDim, 0, NULL, NULL );
- Kernel* pKernel = checkKernelName(aKernelName);
+ Kernel* pKernel = fetchKernel(aKernelName);
if (!pKernel)
return false;
- if (!pKernel->mpKernel)
- {
- pKernel->mpKernel = clCreateKernel(kEnv.mpkProgram, aKernelName, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateKernel" );
- }
clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
CHECK_OPENCL( clStatus, "clSetKernelArg" );
clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clpPData);
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index bf76e51..dfa8fbb 100644
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -177,7 +177,7 @@ public:
static int initOpenclAttr( OpenCLEnv * env );
static int setKernelEnv( KernelEnv *envInfo );
- static Kernel* checkKernelName( const char *kernelName );
+ static Kernel* fetchKernel( const char *kernelName );
static int getOpenclState();
static void setOpenclState( int state );
commit 2a41e0b317a66e308fbb1947030f776013f844c2
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date: Tue Sep 17 14:43:59 2013 -0400
Compile kernels for real.
Change-Id: I7c5e6707e6f733b26d5bb6d6b0d48b0f338625bc
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 4c29223..5697b1b 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -1096,9 +1096,9 @@ SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice(const OUString* pDeviceId,
return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect);
}
-SAL_DLLPUBLIC_EXPORT void compileKernels()
+SAL_DLLPUBLIC_EXPORT void compileKernels(const OUString* pDeviceId)
{
- sc::opencl::compileKernels();
+ sc::opencl::compileKernels(pDeviceId);
}
}
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index a0c1e7a..c468669 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -2851,8 +2851,33 @@ bool switchOpenclDevice(const OUString* pDevice, bool bAutoSelect)
return !OpenclDevice::initOpenclRunEnv(0);
}
-void compileKernels()
+void compileKernels(const OUString* pDeviceId)
{
+ if (!pDeviceId)
+ return;
+
+ if (pDeviceId->isEmpty())
+ return;
+
+ if (!switchOpenclDevice(pDeviceId, false))
+ return;
+
+ cl_program pProgram = OpenclDevice::gpuEnv.mpArryPrograms[0];
+ if (!pProgram)
+ return;
+
+ cl_int nStatus;
+ for (size_t i = 0, n = OpenclDevice::gpuEnv.maKernels.size(); i < n; ++i)
+ {
+ Kernel& r = OpenclDevice::gpuEnv.maKernels[i];
+ if (r.mpKernel)
+ continue;
+
+ r.mpKernel = clCreateKernel(pProgram, r.mpName, &nStatus);
+ if (nStatus != CL_SUCCESS)
+ r.mpKernel = NULL;
+ }
+
}
}}
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index 30e3838..bf76e51 100644
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -266,7 +266,7 @@ const std::vector<OpenclPlatformInfo>& fillOpenCLInfo();
*/
bool switchOpenclDevice(const OUString* pDeviceId, bool bAutoSelect);
-void compileKernels();
+void compileKernels(const OUString* pDeviceId);
}}
diff --git a/sc/source/core/tool/formulagroup.cxx b/sc/source/core/tool/formulagroup.cxx
index c07d280..6a20a0c 100644
--- a/sc/source/core/tool/formulagroup.cxx
+++ b/sc/source/core/tool/formulagroup.cxx
@@ -338,7 +338,7 @@ typedef FormulaGroupInterpreter* (*__createFormulaGroupOpenCLInterpreter)(void);
typedef size_t (*__getOpenCLPlatformCount)(void);
typedef void (*__fillOpenCLInfo)(OpenclPlatformInfo*, size_t);
typedef bool (*__switchOpenClDevice)(const OUString*, bool);
-typedef void (*__compileKernels)(void);
+typedef void (*__compileKernels)(const OUString*);
#endif
@@ -460,7 +460,8 @@ void FormulaGroupInterpreter::switchOpenCLDevice(const OUString& rDeviceId, bool
void FormulaGroupInterpreter::compileKernels()
{
- if (!ScInterpreter::GetGlobalConfig().mbOpenCLEnabled)
+ const ScCalcConfig& rConfig = ScInterpreter::GetGlobalConfig();
+ if (!rConfig.mbOpenCLEnabled)
// OpenCL is not enabled.
return;
@@ -472,7 +473,7 @@ void FormulaGroupInterpreter::compileKernels()
if (!fn)
return;
- reinterpret_cast<__compileKernels>(fn)();
+ reinterpret_cast<__compileKernels>(fn)(&rConfig.maOpenCLDevice);
}
void FormulaGroupInterpreter::generateRPNCode(ScDocument& rDoc, const ScAddress& rPos, ScTokenArray& rCode)
commit 4211e6cd2e3fd91bc6276576af76648de4b07752
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date: Tue Sep 17 12:41:07 2013 -0400
Set up a thread to compile OpenCL kernels during file load.
It's still a no-op & we need to have a smart mechanism to conditionally
trigger it rather than running it in all documents.
Change-Id: Ia875ebb9405b5de5c5d31418de84c5ca7a62f302
diff --git a/sc/Library_scfilt.mk b/sc/Library_scfilt.mk
index 75ee0fe..546b474 100644
--- a/sc/Library_scfilt.mk
+++ b/sc/Library_scfilt.mk
@@ -45,6 +45,7 @@ $(eval $(call gb_Library_use_libraries,scfilt,\
msfilter \
oox \
sal \
+ salhelper \
sax \
sb \
sc \
@@ -130,6 +131,7 @@ $(eval $(call gb_Library_add_exception_objects,scfilt,\
sc/source/filter/excel/xltools \
sc/source/filter/excel/xltracer \
sc/source/filter/excel/xlview \
+ sc/source/filter/ftools/clkernelthread \
sc/source/filter/ftools/fapihelper \
sc/source/filter/ftools/fprogressbar \
sc/source/filter/ftools/ftools \
diff --git a/sc/inc/formulagroup.hxx b/sc/inc/formulagroup.hxx
index bfd4bbc..c6f32ac 100644
--- a/sc/inc/formulagroup.hxx
+++ b/sc/inc/formulagroup.hxx
@@ -56,6 +56,7 @@ class SC_DLLPUBLIC FormulaGroupInterpreter
static FormulaGroupInterpreter *getStatic();
static void fillOpenCLInfo(std::vector<OpenclPlatformInfo>& rPlatforms);
static void switchOpenCLDevice(const OUString& rDeviceId, bool bAutoSelect);
+ static void compileKernels();
virtual ScMatrixRef inverseMatrix(const ScMatrix& rMat) = 0;
virtual bool interpret(ScDocument& rDoc, const ScAddress& rTopPos, const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode) = 0;
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 567d033..4c29223 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -1096,6 +1096,11 @@ SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice(const OUString* pDeviceId,
return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect);
}
+SAL_DLLPUBLIC_EXPORT void compileKernels()
+{
+ sc::opencl::compileKernels();
+}
+
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 3994029..a0c1e7a 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -2851,6 +2851,10 @@ bool switchOpenclDevice(const OUString* pDevice, bool bAutoSelect)
return !OpenclDevice::initOpenclRunEnv(0);
}
+void compileKernels()
+{
+}
+
}}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index 057f02b..30e3838 100644
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -266,6 +266,8 @@ const std::vector<OpenclPlatformInfo>& fillOpenCLInfo();
*/
bool switchOpenclDevice(const OUString* pDeviceId, bool bAutoSelect);
+void compileKernels();
+
}}
#endif
diff --git a/sc/source/core/tool/formulagroup.cxx b/sc/source/core/tool/formulagroup.cxx
index dd765a8..c07d280 100644
--- a/sc/source/core/tool/formulagroup.cxx
+++ b/sc/source/core/tool/formulagroup.cxx
@@ -338,6 +338,7 @@ typedef FormulaGroupInterpreter* (*__createFormulaGroupOpenCLInterpreter)(void);
typedef size_t (*__getOpenCLPlatformCount)(void);
typedef void (*__fillOpenCLInfo)(OpenclPlatformInfo*, size_t);
typedef bool (*__switchOpenClDevice)(const OUString*, bool);
+typedef void (*__compileKernels)(void);
#endif
@@ -457,6 +458,23 @@ void FormulaGroupInterpreter::switchOpenCLDevice(const OUString& rDeviceId, bool
#endif
}
+void FormulaGroupInterpreter::compileKernels()
+{
+ if (!ScInterpreter::GetGlobalConfig().mbOpenCLEnabled)
+ // OpenCL is not enabled.
+ return;
+
+ osl::Module* pModule = getOpenCLModule();
+ if (!pModule)
+ return;
+
+ oslGenericFunction fn = pModule->getFunctionSymbol("compileKernels");
+ if (!fn)
+ return;
+
+ reinterpret_cast<__compileKernels>(fn)();
+}
+
void FormulaGroupInterpreter::generateRPNCode(ScDocument& rDoc, const ScAddress& rPos, ScTokenArray& rCode)
{
// First, generate an RPN (reverse polish notation) token array.
diff --git a/sc/source/filter/excel/excform.cxx b/sc/source/filter/excel/excform.cxx
index c99f67a..49ca6ca 100644
--- a/sc/source/filter/excel/excform.cxx
+++ b/sc/source/filter/excel/excform.cxx
@@ -136,6 +136,11 @@ void ImportExcel::Formula(
}
}
+ if (!mxCLKernelThread.is())
+ {
+ mxCLKernelThread.set(new sc::CLBuildKernelThread);
+ mxCLKernelThread->launch();
+ }
ConvErr eErr = pFormConv->Convert( pResult, maStrm, nFormLen, true, FT_CellFormula);
ScFormulaCell* pCell = NULL;
diff --git a/sc/source/filter/excel/read.cxx b/sc/source/filter/excel/read.cxx
index ae89246..14396f6 100644
--- a/sc/source/filter/excel/read.cxx
+++ b/sc/source/filter/excel/read.cxx
@@ -763,6 +763,9 @@ FltError ImportExcel::Read( void )
eLastErr = SCWARN_IMPORT_COLUMN_OVERFLOW;
}
+ if (mxCLKernelThread.is())
+ mxCLKernelThread->join();
+
return eLastErr;
}
@@ -1316,6 +1319,9 @@ FltError ImportExcel8::Read( void )
GetPivotTableManager().MaybeRefreshPivotTables();
}
+ if (mxCLKernelThread.is())
+ mxCLKernelThread->join();
+
return eLastErr;
}
diff --git a/sc/source/filter/ftools/clkernelthread.cxx b/sc/source/filter/ftools/clkernelthread.cxx
new file mode 100644
index 0000000..f6d8c63
--- /dev/null
+++ b/sc/source/filter/ftools/clkernelthread.cxx
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "clkernelthread.hxx"
+#include "formulagroup.hxx"
+
+using namespace std;
+
+namespace sc {
+
+CLBuildKernelThread::CLBuildKernelThread() : salhelper::Thread("opencl-build-kernel-thread") {}
+
+CLBuildKernelThread::~CLBuildKernelThread() {}
+
+void CLBuildKernelThread::execute()
+{
+ sc::FormulaGroupInterpreter::compileKernels();
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/inc/clkernelthread.hxx b/sc/source/filter/inc/clkernelthread.hxx
new file mode 100644
index 0000000..32586e7
--- /dev/null
+++ b/sc/source/filter/inc/clkernelthread.hxx
@@ -0,0 +1,26 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "salhelper/thread.hxx"
+
+namespace sc {
+
+class CLBuildKernelThread : public salhelper::Thread
+{
+public:
+ CLBuildKernelThread();
+ virtual ~CLBuildKernelThread();
+
+protected:
+ virtual void execute();
+};
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/inc/imp_op.hxx b/sc/source/filter/inc/imp_op.hxx
index 8e004f3..613ae30 100644
--- a/sc/source/filter/inc/imp_op.hxx
+++ b/sc/source/filter/inc/imp_op.hxx
@@ -30,6 +30,9 @@
#include "otlnbuff.hxx"
#include "colrowst.hxx"
#include "excdefs.hxx"
+#include "rtl/ref.hxx"
+#include "clkernelthread.hxx"
+
#include <boost/shared_ptr.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
@@ -79,6 +82,8 @@ private:
class ImportExcel : public ImportTyp, protected XclImpRoot
{
protected:
+ rtl::Reference<sc::CLBuildKernelThread> mxCLKernelThread;
+
static const double fExcToTwips; // Umrechnung 1/256 Zeichen -> Twips
RootData* pExcRoot;
More information about the Libreoffice-commits
mailing list