[Libreoffice-commits] core.git: Branch 'feature/calc-group-interpreter' - sc/source
Michael Meeks
michael.meeks at suse.com
Mon Jul 8 02:47:18 PDT 2013
sc/source/core/opencl/formulagroupcl.cxx | 142 +-
sc/source/core/opencl/oclkernels.hxx | 181 +-
sc/source/core/opencl/openclwrapper.cxx | 1982 ++++++++++++++++++++-----------
sc/source/core/opencl/openclwrapper.hxx | 137 +-
4 files changed, 1581 insertions(+), 861 deletions(-)
New commits:
commit c963a6f0e655e951a189e88b0bec2f6808b5736f
Author: Michael Meeks <michael.meeks at suse.com>
Date: Mon Jul 8 10:49:05 2013 +0100
Latest cleanup and improvements of opencl backend.
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
old mode 100644
new mode 100755
index ca064fe..857f045
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -57,7 +57,7 @@ public:
const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode);
};
-ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix(const ScMatrix& rMat)
+ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix(const ScMatrix& /* rMat */)
{
return ScMatrixRef();
}
@@ -65,35 +65,32 @@ ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix(const ScMatrix& rMat)
bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress& rTopPos,
const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode)
{
- size_t rowSize = xGroup->mnLength; //, srcSize = 0;
+ size_t rowSize = xGroup->mnLength;
fprintf(stderr,"rowSize at begin is ...%ld.\n",(long)rowSize);
- int *rangeStart =NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100))
- int *rangeEnd = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100))
// The row quantity can be gotten from p2->GetArrayLength()
- int count1 =0,count2 =0,count3=0;
- int oclOp=0;
- double *srcData = NULL; // Point to the input data from CPU
- double *rResult=NULL; // Point to the output data from GPU
- double *leftData=NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData)
- double *rightData=NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData)
- // The rightData can't be zero for "/"
-
- leftData = (double *)malloc(sizeof(double) * rowSize);
- rightData = (double *)malloc(sizeof(double) * rowSize);
- rResult = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C)
- srcData = (double *)calloc(rowSize,sizeof(double));
-
- rangeStart =(int *)malloc(sizeof(int) * rowSize);
- rangeEnd =(int *)malloc(sizeof(int) * rowSize);
-
- memset(rResult,0,rowSize);
- if(NULL==leftData||NULL==rightData||
- NULL==rResult||NULL==rangeStart||NULL==rangeEnd)
+ int nCount1 = 0, nCount2 = 0, nCount3 = 0;
+ int nOclOp = 0;
+ double *rResult = NULL; // Point to the output data from GPU
+ rResult = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C)
+ if(NULL==rResult)
{
printf("malloc err\n");
return false;
}
- // printf("rowSize is %d.\n",rowsize);
+ memset(rResult,0,rowSize);
+ float * fpOclSrcData = NULL; // Point to the input data from CPU
+ uint * npOclStartPos = NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100))
+ uint * npOclEndPos = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100))
+ float * fpLeftData = NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData)
+ float * fpRightData = NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData)
+ // The rightData can't be zero for "/"
+ static OclCalc ocl_calc;
+ // Don't know how large the size will be applied previously, so create them as the rowSize or 65536
+ // Don't know which formulae will be used previously, so create buffers for different formulae used probably
+ ocl_calc.CreateBuffer(fpOclSrcData,npOclStartPos,npOclEndPos,rowSize);
+ ocl_calc.CreateBuffer(fpLeftData,fpRightData,rowSize);
+ //printf("pptrr is %d,%d,%d\n",fpOclSrcData,npOclStartPos,npOclEndPos);
+///////////////////////////////////////////////////////////////////////////////////////////
// Until we implement group calculation for real, decompose the group into
// individual formula token arrays for individual calculation.
@@ -125,26 +122,23 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
size_t nRowSize = nRowEnd - nRowStart + 1;
ScMatrixRef pMat(new ScMatrix(nColSize, nRowSize, 0.0));
- //srcSize = rowSize+nRowSize-rowSize%nRowSize;//align as nRowSize
- //srcData = (double *)calloc(srcSize,sizeof(double));
- rangeStart[i] = nRowStart;//record the start position
- rangeEnd[i] = nRowEnd;//record the end position
+ npOclStartPos[i] = nRowStart; // record the start position
+ npOclEndPos[i] = nRowEnd; // record the end position
for (size_t nCol = 0; nCol < nColSize; ++nCol)
{
const double* pArray = rArrays[nCol];
-
- //printf("pArray is %p.\n",pArray);
if( NULL==pArray )
{
fprintf(stderr,"Error: pArray is NULL!\n");
return false;
}
- //fprintf(stderr,"(rowSize+nRowSize-1) is %d.\n",rowSize+nRowSize-1);
+
for( size_t u=0; u<rowSize; u++ )
{
- srcData[u] = pArray[u];// note:rowSize<=srcSize
- //fprintf(stderr,"srcData[%d] is %f.\n",u,srcData[u]);
+ // Many video cards can't support double type in kernel, so need transfer the double to float
+ fpOclSrcData[u] = (float)pArray[u];
+ //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpOclSrcData[u]);
}
for (size_t nRow = 0; nRow < nRowSize; ++nRow)
@@ -177,26 +171,26 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
OpCode eOp = pCur->GetOpCode();
if(eOp==0)
{
- if(count3%2==0)
- leftData[count1++] = pCur->GetDouble();
- else
- rightData[count2++] = pCur->GetDouble();
- count3++;
- }
- else if( eOp!=ocOpen && eOp!=ocClose )
- oclOp = eOp;
-
-// if(count1>0){//dbg
-// fprintf(stderr,"leftData is %f.\n",leftData[count1-1]);
-// count1--;
-// }
-// if(count2>0){//dbg
-// fprintf(stderr,"rightData is %f.\n",rightData[count2-1]);
-// count2--;
-// }
+ if(nCount3%2==0)
+ fpLeftData[nCount1++] = (float)pCur->GetDouble();
+ else
+ fpRightData[nCount2++] = (float)pCur->GetDouble();
+ nCount3++;
+ }
+ else if( eOp!=ocOpen && eOp!=ocClose )
+ nOclOp = eOp;
+
+// if(count1>0){//dbg
+// fprintf(stderr,"leftData is %f.\n",leftData[count1-1]);
+// count1--;
+// }
+// if(count2>0){//dbg
+// fprintf(stderr,"rightData is %f.\n",rightData[count2-1]);
+// count2--;
+// }
}
- if(!getenv("SC_GPU"))
+ if(!getenv("SC_GPU")||!ocl_calc.GetOpenclState())
{
fprintf(stderr,"ccCPU flow...\n\n");
ScCompiler aComp(&rDoc, aTmpPos, aCode2);
@@ -211,34 +205,42 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
} // for loop end (xGroup->mnLength)
// For GPU calculation
- if(getenv("SC_GPU"))
+ if(getenv("SC_GPU")&&ocl_calc.GetOpenclState())
{
fprintf(stderr,"ggGPU flow...\n\n");
- printf(" oclOp is... %d\n",oclOp);
+ printf(" oclOp is... %d\n",nOclOp);
osl_getSystemTime(&aTimeBefore); //timer
- static OclCalc ocl_calc;
- switch(oclOp)
+ switch(nOclOp)
{
case ocAdd:
- ocl_calc.OclHostSignedAdd(leftData,rightData,rResult,count1);
+ ocl_calc.OclHostSignedAdd32Bits(fpLeftData,fpRightData,rResult,nCount1);
break;
case ocSub:
- ocl_calc.OclHostSignedSub(leftData,rightData,rResult,count1);
+ ocl_calc.OclHostSignedSub32Bits(fpLeftData,fpRightData,rResult,nCount1);
break;
case ocMul:
- ocl_calc.OclHostSignedMul(leftData,rightData,rResult,count1);
+ ocl_calc.OclHostSignedMul32Bits(fpLeftData,fpRightData,rResult,nCount1);
break;
case ocDiv:
- ocl_calc.OclHostSignedDiv(leftData,rightData,rResult,count1);
+ ocl_calc.OclHostSignedDiv32Bits(fpLeftData,fpRightData,rResult,nCount1);
break;
case ocMax:
- ocl_calc.OclHostFormulaMax(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ ocl_calc.OclHostFormulaMax32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocMin:
- ocl_calc.OclHostFormulaMin(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ ocl_calc.OclHostFormulaMin32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocAverage:
- ocl_calc.OclHostFormulaAverage(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ ocl_calc.OclHostFormulaAverage32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
+ break;
+ case ocSum:
+ //ocl_calc.OclHostFormulaSum(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ break;
+ case ocCount:
+ //ocl_calc.OclHostFormulaCount(rangeStart,rangeEnd,rResult,rowSize);
+ break;
+ case ocSumProduct:
+ //ocl_calc.OclHostFormulaSumProduct(srcData,rangeStart,rangeEnd,rResult,rowSize);
break;
default:
fprintf(stderr,"No OpenCL function for this calculation.\n");
@@ -254,26 +256,16 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
/////////////////////////////////////////////////////
//rResult[i];
-// for(sal_Int32 i = 0; i < rowSize; ++i){//dbg output results
-// fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]);
-// }
+// for(sal_Int32 i = 0; i < rowSize; ++i){//dbg output results
+// fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]);
+// }
// Insert the double data, in rResult[i] back into the document
rDoc.SetFormulaResults(rTopPos, rResult, xGroup->mnLength);
}
- if(leftData)
- free(leftData);
- if(rightData)
- free(rightData);
- if(rangeStart)
- free(rangeStart);
- if(rangeEnd)
- free(rangeEnd);
if(rResult)
free(rResult);
- if(srcData)
- free(srcData);
if(getenv("SC_GPUSAMPLE")){
//fprintf(stderr,"FormulaGroupInterpreter::interpret(),iniflag...%d\n",ocl_calc.GetOpenclState());
diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx
old mode 100644
new mode 100755
index 3269f3a..6c90126
--- a/sc/source/core/opencl/oclkernels.hxx
+++ b/sc/source/core/opencl/oclkernels.hxx
@@ -6,153 +6,158 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
+
#ifndef _OCL_KERNEL_H_
#define _OCL_KERNEL_H_
#ifndef USE_EXTERNAL_KERNEL
#define KERNEL( ... )# __VA_ARGS__
-
/////////////////////////////////////////////
const char *kernel_src = KERNEL(
__kernel void hello(__global uint *buffer)
{
-size_t idx = get_global_id(0);
-
-buffer[idx]=idx;
-
+ size_t idx = get_global_id(0);
+ buffer[idx]=idx;
}
__kernel void oclformula(__global float *data,
- const uint type)
+ const uint type)
{
- const unsigned int i = get_global_id(0);
-
- switch (type)
- {
- case 0: //MAX
- {
- //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
- if(data[2*i]>data[2*i+1])
- data[i] = data[2*i];
- else
- data[i] = data[2*i+1];
- break;
- }
- case 1: //MIN
- {
- //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
- if(data[2*i]<data[2*i+1])
- data[i] = data[2*i];
- else
- data[i] = data[2*i+1];
- break;
- }
- case 2: //SUM
- case 3: //AVG
- {
- //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
- data[i] = data[2*i] + data[2*i+1];
- break;
- }
- default:
- break;
-
- }
+ const unsigned int i = get_global_id(0);
+
+ switch (type)
+ {
+ case 0: //MAX
+ {
+ //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
+ if(data[2*i]>data[2*i+1])
+ data[i] = data[2*i];
+ else
+ data[i] = data[2*i+1];
+ break;
+ }
+ case 1: //MIN
+ {
+ //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
+ if(data[2*i]<data[2*i+1])
+ data[i] = data[2*i];
+ else
+ data[i] = data[2*i+1];
+ break;
+ }
+ case 2: //SUM
+ case 3: //AVG
+ {
+ //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
+ data[i] = data[2*i] + data[2*i+1];
+ break;
+ }
+ default:
+ break;
+
+ }
}
__kernel void oclSignedAdd(__global float *ltData,__global float *rtData,__global float *otData)
{
- const unsigned int id = get_global_id(0);
- otData[id] = ltData[id] + rtData[id];
+ const unsigned int id = get_global_id(0);
+ otData[id] = ltData[id] + rtData[id];
}
__kernel void oclSignedSub(__global float *ltData,__global float *rtData,__global float *otData)
{
- const unsigned int id = get_global_id(0);
- otData[id] = ltData[id] - rtData[id];
+ const unsigned int id = get_global_id(0);
+ otData[id] = ltData[id] - rtData[id];
}
__kernel void oclSignedMul(__global float *ltData,__global float *rtData,__global float *otData)
{
- int id = get_global_id(0);
- otData[id] =ltData[id] * rtData[id];
+ int id = get_global_id(0);
+ otData[id] =ltData[id] * rtData[id];
}
__kernel void oclSignedDiv(__global float *ltData,__global float *rtData,__global float *otData)
{
- const unsigned int id = get_global_id(0);
- otData[id] = ltData[id] / rtData[id];
+ const unsigned int id = get_global_id(0);
+ otData[id] = ltData[id] / rtData[id];
}
__kernel void oclFormulaMin(__global float *input,__global int *start,__global int *end,__global float *output)
{
- const unsigned int id = get_global_id(0);
- int i=0;
- unsigned int startFlag = start[id];
- unsigned int endFlag = end[id];
- float min = input[startFlag];
- for(i=startFlag;i<=endFlag;i++)
- {
- if(input[i]<min)
- min = input[i];
- }
- output[id] = min;
+ const unsigned int id = get_global_id(0);
+ int i=0;
+ unsigned int startFlag = start[id];
+ unsigned int endFlag = end[id];
+ float min = input[startFlag];
+ for(i=startFlag;i<=endFlag;i++)
+ {
+ if(input[i]<min)
+ min = input[i];
+ }
+ output[id] = min;
}
__kernel void oclFormulaMax(__global float *input,__global int *start,__global int *end,__global float *output)
{
- const unsigned int id = get_global_id(0);
- int i=0;
- unsigned int startFlag = start[id];
- unsigned int endFlag = end[id];
- float max = input[startFlag];
- for(i=startFlag;i<=endFlag;i++)
- {
- if(input[i]>max)
- max = input[i];
- }
- output[id] = max;
+ const unsigned int id = get_global_id(0);
+ int i=0;
+ unsigned int startFlag = start[id];
+ unsigned int endFlag = end[id];
+ float max = input[startFlag];
+ for(i=startFlag;i<=endFlag;i++)
+ {
+ if(input[i]>max)
+ max = input[i];
+ }
+ output[id] = max;
}
-
-__kernel void oclFormulaSum(__global float *data,
- const uint type)
+//Sum
+__kernel void oclFormulaSum(__global float *input,__global int *start,__global int *end,__global float *output)
{
-
+ const unsigned int nId = get_global_id(0);
+ float fSum = 0.0f;
+ for(int i = start[nId]; i<=end[nId]; i++)
+ fSum += input[i];
+ output[nId] = fSum ;
}
-
-__kernel void oclFormulaCount(__global float *data,
- const uint type)
+//Count
+__kernel void oclFormulaCount(__global int *start,__global int *end,__global float *output)
{
-
+ const unsigned int nId = get_global_id(0);
+ output[nId] = end[nId] - start[nId] + 1;
}
__kernel void oclFormulaAverage(__global float *input,__global int *start,__global int *end,__global float *output)
{
- const unsigned int id = get_global_id(0);
- int i=0;
- float sum=0;
- for(i = start[id];i<=end[id];i++)
- sum += input[i];
- output[id] = sum / (end[id]-start[id]+1);
+ const unsigned int id = get_global_id(0);
+ int i=0;
+ float sum=0;
+ for(i = start[id];i<=end[id];i++)
+ sum += input[i];
+ output[id] = sum / (end[id]-start[id]+1);
}
-
-__kernel void oclFormulaSumproduct(__global float *data,
- const uint type)
+//Sumproduct
+__kernel void oclFormulaSumproduct(__global float *firstCol,__global float *secondCol,__global int *start,__global int *end,__global float *output)
{
-
+ const int nId = get_global_id(0);
+ int nCount = start[nId] - end[nId] + 1;
+ int nStartA = start[nId*2];
+ int nStartB = start[nId*2+1];
+ for(int i = 0; i<nCount; i++)
+ output[nId] += firstCol[nStartA+i]*secondCol[nStartB+i];
}
__kernel void oclFormulaMinverse(__global float *data,
- const uint type)
+ const uint type)
{
}
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
old mode 100644
new mode 100755
index 20d60e2..b008346
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -22,6 +22,7 @@ using namespace std;
GPUEnv OpenclDevice::gpuEnv;
int OpenclDevice::isInited =0;
+
#ifdef SAL_WIN32
#define OPENCL_DLL_NAME "opencllo.dll"
@@ -32,62 +33,62 @@ HINSTANCE HOpenclDll = NULL;
int OpenclDevice::LoadOpencl()
{
- //fprintf(stderr, " LoadOpenclDllxx... \n");
- OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
- OpenclDll = LoadLibrary(OPENCL_DLL_NAME);
- if (!static_cast<HINSTANCE>(OpenclDll))
- {
- fprintf(stderr, " Load opencllo.dll failed! \n");
- FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
- return OCLERR;
- }
- fprintf(stderr, " Load opencllo.dll successfully!\n");
- return OCLSUCCESS;
+ //fprintf(stderr, " LoadOpenclDllxx... \n");
+ OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
+ OpenclDll = LoadLibrary(OPENCL_DLL_NAME);
+ if (!static_cast<HINSTANCE>(OpenclDll))
+ {
+ fprintf(stderr, " Load opencllo.dll failed! \n");
+ FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+ return OCLERR;
+ }
+ fprintf(stderr, " Load opencllo.dll successfully!\n");
+ return OCLSUCCESS;
}
void OpenclDevice::FreeOpenclDll()
{
- fprintf(stderr, " Free opencllo.dll ... \n");
- if(!static_cast<HINSTANCE>(OpenclDll))
- FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+ fprintf(stderr, " Free opencllo.dll ... \n");
+ if(!static_cast<HINSTANCE>(OpenclDll))
+ FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
}
#endif
int OpenclDevice::InitEnv()
{
#ifdef SAL_WIN32
- while(1)
+ while(1)
{
- if(1==LoadOpencl())
- break;
- }
+ if(1==LoadOpencl())
+ break;
+ }
#endif
- InitOpenclRunEnv(0,NULL);
- return 1;
+ InitOpenclRunEnv(0,NULL);
+ return 1;
}
int OpenclDevice::ReleaseOpenclRunEnv() {
- ReleaseOpenclEnv(&gpuEnv);
+ ReleaseOpenclEnv(&gpuEnv);
#ifdef SAL_WIN32
- FreeOpenclDll();
+ FreeOpenclDll();
#endif
return 1;
}
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName) {
- strcpy(gpuEnv.kernelNames[kCount], kName);
- gpuEnv.kernelCount++;
+ strcpy(gpuEnv.mArrykernelNames[kCount], kName);
+ gpuEnv.mnKernelCount++;
return 0;
}
int OpenclDevice::RegistOpenclKernel() {
- if (!gpuEnv.isUserCreated) {
+ if (!gpuEnv.mnIsUserCreated) {
memset(&gpuEnv, 0, sizeof(gpuEnv));
}
- gpuEnv.fileCount = 0; //argc;
- gpuEnv.kernelCount = 0UL;
+ gpuEnv.mnFileCount = 0; //argc;
+ gpuEnv.mnKernelCount = 0UL;
AddKernelConfig(0, (const char*) "hello");
AddKernelConfig(1, (const char*) "oclformula");
@@ -99,34 +100,39 @@ int OpenclDevice::RegistOpenclKernel() {
AddKernelConfig(7, (const char*) "oclFormulaSumproduct");
AddKernelConfig(8, (const char*) "oclFormulaMinverse");
- AddKernelConfig(9, (const char*) "oclSignedAdd");
+ AddKernelConfig(9, (const char*) "oclSignedAdd");
AddKernelConfig(10, (const char*) "oclSignedSub");
AddKernelConfig(11, (const char*) "oclSignedMul");
AddKernelConfig(12, (const char*) "oclSignedDiv");
- return 0;
+ return 0;
}
OpenclDevice::OpenclDevice(){
- //InitEnv();
+ //InitEnv();
}
OpenclDevice::~OpenclDevice() {
- //ReleaseOpenclRunEnv();
+ //ReleaseOpenclRunEnv();
}
+int OpenclDevice::SetKernelEnv(KernelEnv *envInfo)
+{
+ envInfo->mpkContext = gpuEnv.mpContext;
+ envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
+ envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
+
+ return 1;
+}
int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName){
//printf("CheckKernelName,total count of kernels...%d\n", gpuEnv.kernelCount);
int kCount;
- for(kCount=0; kCount < gpuEnv.kernelCount; kCount++) {
- if(strcasecmp(kernelName, gpuEnv.kernelNames[kCount]) == 0) {
- printf("match %s kernel right\n",kernelName);
- break;
+ for(kCount=0; kCount < gpuEnv.mnKernelCount; kCount++) {
+ if(strcasecmp(kernelName, gpuEnv.mArrykernelNames[kCount]) == 0) {
+ printf("match %s kernel right\n",kernelName);
+ break;
}
}
- envInfo->context = gpuEnv.context;
- envInfo->commandQueue = gpuEnv.commandQueue;
- envInfo->program = gpuEnv.programs[0];
- envInfo->kernel = gpuEnv.kernels[kCount];
- strcpy(envInfo->kernelName, kernelName);
+ envInfo->mpkKernel = gpuEnv.mpArryKernels[kCount];
+ strcpy(envInfo->mckKernelName, kernelName);
if (envInfo == (KernelEnv *) NULL)
{
printf("get err func and env\n");
@@ -145,7 +151,7 @@ int OpenclDevice::ConvertToString(const char *filename, char **source) {
printf("open kernel file %s.\n",filename);
if (file != NULL) {
- printf("Open ok!\n");
+ printf("Open ok!\n");
fseek(file, 0, SEEK_END);
file_size = ftell(file);
@@ -169,35 +175,35 @@ int OpenclDevice::ConvertToString(const char *filename, char **source) {
}
int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) {
- unsigned int i = 0;
- cl_int status;
- char *str = NULL;
- FILE *fd = NULL;
- cl_uint numDevices=0;
- status = clGetDeviceIDs(gpuEnv.platform, // platform
- CL_DEVICE_TYPE_GPU, // device_type
- 0, // num_entries
- NULL, // devices
- &numDevices);
- for (i = 0; i <numDevices; i++) {
- char fileName[256] = { 0 }, cl_name[128] = { 0 };
- if (gpuEnv.devices[i] != 0) {
- char deviceName[1024];
- status = clGetDeviceInfo(gpuEnv.devices[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL);
- CHECK_OPENCL(status);
- str = (char*) strstr(clFileName, (char*) ".cl");
- memcpy(cl_name, clFileName, str - clFileName);
- cl_name[str - clFileName] = '\0';
- sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
- fd = fopen(fileName, "rb");
- status = (fd != NULL) ? 1 : 0;
- }
- }
- if (fd != NULL) {
- *fhandle = fd;
- }
-
- return status;
+ unsigned int i = 0;
+ cl_int status;
+ char *str = NULL;
+ FILE *fd = NULL;
+ cl_uint numDevices=0;
+ status = clGetDeviceIDs(gpuEnv.mpPlatformID, // platform
+ CL_DEVICE_TYPE_GPU, // device_type
+ 0, // num_entries
+ NULL, // devices ID
+ &numDevices);
+ for (i = 0; i <numDevices; i++) {
+ char fileName[256] = { 0 }, cl_name[128] = { 0 };
+ if (gpuEnv.mpArryDevsID[i] != 0) {
+ char deviceName[1024];
+ status = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL);
+ CHECK_OPENCL(status);
+ str = (char*) strstr(clFileName, (char*) ".cl");
+ memcpy(cl_name, clFileName, str - clFileName);
+ cl_name[str - clFileName] = '\0';
+ sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
+ fd = fopen(fileName, "rb");
+ status = (fd != NULL) ? 1 : 0;
+ }
+ }
+ if (fd != NULL) {
+ *fhandle = fd;
+ }
+
+ return status;
}
@@ -221,20 +227,20 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
unsigned int i = 0;
cl_int status;
size_t *binarySizes, numDevices;
- cl_device_id *devices;
+ cl_device_id *mpArryDevsID;
char **binaries, *str = NULL;
status = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
sizeof(numDevices), &numDevices, NULL);
CHECK_OPENCL(status)
- devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
- if (devices == NULL) {
+ mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
+ if (mpArryDevsID == NULL) {
return 0;
}
/* grab the handles to all of the devices in the program. */
status = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
- sizeof(cl_device_id) * numDevices, devices, NULL);
+ sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL);
CHECK_OPENCL(status)
/* figure out the sizes of each of the binaries. */
@@ -271,7 +277,7 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
if (binarySizes[i] != 0) {
char deviceName[1024];
- status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
+ status = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
sizeof(deviceName), deviceName, NULL);
CHECK_OPENCL(status)
@@ -306,24 +312,24 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
binarySizes = NULL;
}
- if (devices != NULL) {
- free(devices);
- devices = NULL;
+ if (mpArryDevsID != NULL) {
+ free(mpArryDevsID);
+ mpArryDevsID = NULL;
}
return 1;
}
int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) {
- if (gpuEnv.isUserCreated) {
+ if (gpuEnv.mnIsUserCreated) {
return 1;
}
- gpuEnv.context = env->context;
- gpuEnv.platform = env->platform;
- gpuEnv.dev = env->devices;
- gpuEnv.commandQueue = env->commandQueue;
+ gpuEnv.mpContext = env->mpOclContext;
+ gpuEnv.mpPlatformID = env->mpOclPlatformID;
+ gpuEnv.mpDevID = env->mpOclDevsID;
+ gpuEnv.mpCmdQueue = env->mpOclCmdQueue;
- gpuEnv.isUserCreated = 1;
+ gpuEnv.mnIsUserCreated = 1;
return 0;
}
@@ -331,14 +337,14 @@ int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) {
int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env) {
int status;
- env->kernel = clCreateKernel(gpuEnv.programs[0], kernelname, &status);
- env->context = gpuEnv.context;
- env->commandQueue = gpuEnv.commandQueue;
+ env->mpkKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelname, &status);
+ env->mpkContext = gpuEnv.mpContext;
+ env->mpkCmdQueue = gpuEnv.mpCmdQueue;
return status != CL_SUCCESS ? 1 : 0;
}
int OpenclDevice::ReleaseKernel(KernelEnv * env) {
- int status = clReleaseKernel(env->kernel);
+ int status = clReleaseKernel(env->mpkKernel);
return status != CL_SUCCESS ? 1 : 0;
}
@@ -350,24 +356,24 @@ int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) {
return 1;
}
- for (i = 0; i < gpuEnv.fileCount; i++) {
- if (gpuEnv.programs[i]) {
- status = clReleaseProgram(gpuEnv.programs[i]);
+ for (i = 0; i < gpuEnv.mnFileCount; i++) {
+ if (gpuEnv.mpArryPrograms[i]) {
+ status = clReleaseProgram(gpuEnv.mpArryPrograms[i]);
CHECK_OPENCL(status)
- gpuEnv.programs[i] = NULL;
+ gpuEnv.mpArryPrograms[i] = NULL;
}
}
- if (gpuEnv.commandQueue) {
- clReleaseCommandQueue(gpuEnv.commandQueue);
- gpuEnv.commandQueue = NULL;
+ if (gpuEnv.mpCmdQueue) {
+ clReleaseCommandQueue(gpuEnv.mpCmdQueue);
+ gpuEnv.mpCmdQueue = NULL;
}
- if (gpuEnv.context) {
- clReleaseContext(gpuEnv.context);
- gpuEnv.context = NULL;
+ if (gpuEnv.mpContext) {
+ clReleaseContext(gpuEnv.mpContext);
+ gpuEnv.mpContext = NULL;
}
isInited = 0;
- gpuInfo->isUserCreated = 0;
- free(gpuInfo->devices);
+ gpuInfo->mnIsUserCreated = 0;
+ free(gpuInfo->mpArryDevsID);
return 1;
}
@@ -385,9 +391,9 @@ int OpenclDevice::RunKernelWrapper(cl_kernel_function function,
int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached,
const char * clFileName) {
int i;
- for (i = 0; i < gpuEnvCached->fileCount; i++) {
- if (strcasecmp(gpuEnvCached->kernelSrcFile[i], clFileName) == 0) {
- if (gpuEnvCached->programs[i] != NULL) {
+ for (i = 0; i < gpuEnvCached->mnFileCount; i++) {
+ if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) {
+ if (gpuEnvCached->mpArryPrograms[i] != NULL) {
return 1;
}
}
@@ -404,27 +410,27 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
size_t source_size[1];
int b_error, binary_status, binaryExisted, idx;
size_t numDevices;
- cl_device_id *devices;
+ cl_device_id *mpArryDevsID;
FILE *fd, *fd1;
const char* filename = "kernel.cl";
- fprintf(stderr, "CompileKernelFile ... \n");
+ fprintf(stderr, "CompileKernelFile ... \n");
if (CachedOfKernerPrg(gpuInfo, filename) == 1) {
return 1;
}
- idx = gpuInfo->fileCount;
+ idx = gpuInfo->mnFileCount;
source = kernel_src;
source_size[0] = strlen(source);
binaryExisted = 0;
if ((binaryExisted = BinaryGenerated(filename, &fd)) == 1) {
- status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_NUM_DEVICES,
+ status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
sizeof(numDevices), &numDevices, NULL);
CHECK_OPENCL(status)
- devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
- if (devices == NULL) {
+ mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
+ if (mpArryDevsID == NULL) {
return 0;
}
@@ -451,50 +457,50 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
fclose(fd);
fd = NULL;
// grab the handles to all of the devices in the context.
- status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES,
- sizeof(cl_device_id) * numDevices, devices, NULL);
+ status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL);
CHECK_OPENCL(status)
- gpuInfo->programs[idx] = clCreateProgramWithBinary(gpuInfo->context,
- numDevices, devices, &length, (const unsigned char**) &binary,
+ gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary(gpuInfo->mpContext,
+ numDevices, mpArryDevsID, &length, (const unsigned char**) &binary,
&binary_status, &status);
CHECK_OPENCL(status)
free(binary);
- free(devices);
- devices = NULL;
+ free(mpArryDevsID);
+ mpArryDevsID = NULL;
} else {
// create a CL program using the kernel source
- gpuEnv.programs[idx] = clCreateProgramWithSource(gpuEnv.context,
+ gpuEnv.mpArryPrograms[idx] = clCreateProgramWithSource(gpuEnv.mpContext,
1, &source, source_size, &status);
CHECK_OPENCL(status);
}
- if (gpuInfo->programs[idx] == (cl_program) NULL) {
+ if (gpuInfo->mpArryPrograms[idx] == (cl_program) NULL) {
return 0;
}
//char options[512];
// create a cl program executable for all the devices specified
- if (!gpuInfo->isUserCreated) {
- status = clBuildProgram(gpuInfo->programs[idx], 1, gpuInfo->devices,
+ if (!gpuInfo->mnIsUserCreated) {
+ status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
buildOption, NULL, NULL);
CHECK_OPENCL(status)
} else {
- status = clBuildProgram(gpuInfo->programs[idx], 1, &(gpuInfo->dev),
+ status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
buildOption, NULL, NULL);
CHECK_OPENCL(status)
}
printf("BuildProgram.\n");
if (status != CL_SUCCESS) {
- if (!gpuInfo->isUserCreated) {
- status = clGetProgramBuildInfo(gpuInfo->programs[idx],
- gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
+ if (!gpuInfo->mnIsUserCreated) {
+ status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
+ gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
&length);
} else {
- status = clGetProgramBuildInfo(gpuInfo->programs[idx],
- gpuInfo->dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
+ status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
+ gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
}
if (status != CL_SUCCESS) {
printf("opencl create build log fail\n");
@@ -504,13 +510,13 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
if (buildLog == (char*) NULL) {
return 0;
}
- if (!gpuInfo->isUserCreated) {
- status = clGetProgramBuildInfo(gpuInfo->programs[idx],
- gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, length,
+ if (!gpuInfo->mnIsUserCreated) {
+ status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
+ gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, length,
buildLog, &length);
} else {
- status = clGetProgramBuildInfo(gpuInfo->programs[idx],
- gpuInfo->dev, CL_PROGRAM_BUILD_LOG, length, buildLog,
+ status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
+ gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, length, buildLog,
&length);
}
@@ -524,12 +530,12 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
return 0;
}
- strcpy(gpuEnv.kernelSrcFile[idx], filename);
+ strcpy(gpuEnv.mArryKnelSrcFile[idx], filename);
if (binaryExisted == 0)
- GeneratBinFromKernelSource(gpuEnv.programs[idx], filename);
+ GeneratBinFromKernelSource(gpuEnv.mpArryPrograms[idx], filename);
- gpuInfo->fileCount += 1;
+ gpuInfo->mnFileCount += 1;
return 1;
@@ -538,14 +544,14 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName,
KernelEnv *env, cl_kernel_function *function) {
int i; //,program_idx ;
- printf("----------------OpenclDevice::GetKernelEnvAndFunc\n");
- for (i = 0; i < gpuEnv.kernelCount; i++) {
- if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0) {
- env->context = gpuEnv.context;
- env->commandQueue = gpuEnv.commandQueue;
- env->program = gpuEnv.programs[0];
- env->kernel = gpuEnv.kernels[i];
- *function = gpuEnv.kernelFunctions[i];
+ //printf("----------------OpenclDevice::GetKernelEnvAndFunc\n");
+ for (i = 0; i < gpuEnv.mnKernelCount; i++) {
+ if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0) {
+ env->mpkContext = gpuEnv.mpContext;
+ env->mpkCmdQueue = gpuEnv.mpCmdQueue;
+ env->mpkProgram = gpuEnv.mpArryPrograms[0];
+ env->mpkKernel = gpuEnv.mpArryKernels[i];
+ *function = gpuEnv.mpArryKnelFuncs[i];
return 1;
}
}
@@ -553,21 +559,21 @@ int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName,
}
int OpenclDevice::RunKernel(const char *kernelName, void **userdata) {
- KernelEnv env;
+ KernelEnv kEnv;
cl_kernel_function function;
int status;
- memset(&env, 0, sizeof(KernelEnv));
- status = GetKernelEnvAndFunc(kernelName, &env, &function);
- strcpy(env.kernelName, kernelName);
+ memset(&kEnv, 0, sizeof(KernelEnv));
+ status = GetKernelEnvAndFunc(kernelName, &kEnv, &function);
+ strcpy(kEnv.mckKernelName, kernelName);
if (status == 1) {
- if (&env == (KernelEnv *) NULL
+ if (&kEnv == (KernelEnv *) NULL
|| &function == (cl_kernel_function *) NULL) {
return 0;
}
- return (function(userdata, &env));
+ return (function(userdata, &kEnv));
}
return 0;
}
@@ -592,7 +598,7 @@ int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles)
printf("init_opencl_env successed.\n");
//initialize program, kernelName, kernelCount
status = CompileKernelFile( &gpuEnv, buildOptionKernelfiles);
- if (status == 0 || gpuEnv.kernelCount == 0) {
+ if (status == 0 || gpuEnv.mnKernelCount == 0) {
printf("CompileKernelFile failed.\n");
return 1;
}
@@ -614,12 +620,12 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
// Have a look at the available platforms.
- if (!gpuInfo->isUserCreated) {
+ if (!gpuInfo->mnIsUserCreated) {
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS) {
return 1;
}
- gpuInfo->platform = NULL;
+ gpuInfo->mpPlatformID = NULL;
if (0 < numPlatforms) {
platforms = (cl_platform_id*) malloc(
@@ -640,18 +646,18 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
if (status != CL_SUCCESS) {
return 1;
}
- gpuInfo->platform = platforms[i];
+ gpuInfo->mpPlatformID = platforms[i];
//if (!strcmp(platformName, "Intel(R) Coporation"))
//if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
{
- gpuInfo->platform = platforms[i];
+ gpuInfo->mpPlatformID = platforms[i];
- status = clGetDeviceIDs(gpuInfo->platform, // platform
- CL_DEVICE_TYPE_GPU, // device_type
- 0, // num_entries
- NULL, // devices
- &numDevices);
+ status = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
+ CL_DEVICE_TYPE_GPU, // device_type
+ 0, // num_entries
+ NULL, // devices
+ &numDevices);
if (status != CL_SUCCESS) {
return 1;
@@ -664,65 +670,65 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
}
free(platforms);
}
- if (NULL == gpuInfo->platform) {
+ if (NULL == gpuInfo->mpPlatformID) {
return 1;
}
// Use available platform.
cps[0] = CL_CONTEXT_PLATFORM;
- cps[1] = (cl_context_properties) gpuInfo->platform;
+ cps[1] = (cl_context_properties) gpuInfo->mpPlatformID;
cps[2] = 0;
// Check for GPU.
- gpuInfo->dType = CL_DEVICE_TYPE_GPU;
- gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType, NULL,
+ gpuInfo->mDevType = CL_DEVICE_TYPE_GPU;
+ gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType, NULL,
NULL, &status);
- if ((gpuInfo->context == (cl_context) NULL)
+ if ((gpuInfo->mpContext == (cl_context) NULL)
|| (status != CL_SUCCESS)) {
- gpuInfo->dType = CL_DEVICE_TYPE_CPU;
- gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType,
+ gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
+ gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType,
NULL, NULL, &status);
}
- if ((gpuInfo->context == (cl_context) NULL)
+ if ((gpuInfo->mpContext == (cl_context) NULL)
|| (status != CL_SUCCESS)) {
- gpuInfo->dType = CL_DEVICE_TYPE_DEFAULT;
- gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType,
+ gpuInfo->mDevType = CL_DEVICE_TYPE_DEFAULT;
+ gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType,
NULL, NULL, &status);
}
- if ((gpuInfo->context == (cl_context) NULL)
+ if ((gpuInfo->mpContext == (cl_context) NULL)
|| (status != CL_SUCCESS)) {
return 1;
}
// Detect OpenCL devices.
// First, get the size of device list data
- status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, 0,
+ status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0,
NULL, &length);
if ((status != CL_SUCCESS) || (length == 0)) {
return 1;
}
// Now allocate memory for device list based on the size we got earlier
- gpuInfo->devices = (cl_device_id*) malloc(length);
- if (gpuInfo->devices == (cl_device_id*) NULL) {
+ gpuInfo->mpArryDevsID = (cl_device_id*) malloc(length);
+ if (gpuInfo->mpArryDevsID == (cl_device_id*) NULL) {
return 1;
}
// Now, get the device list data
- status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, length,
- gpuInfo->devices, NULL);
+ status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, length,
+ gpuInfo->mpArryDevsID, NULL);
if (status != CL_SUCCESS) {
return 1;
}
// Create OpenCL command queue.
- gpuInfo->commandQueue = clCreateCommandQueue(gpuInfo->context,
- gpuInfo->devices[0], 0, &status);
+ gpuInfo->mpCmdQueue = clCreateCommandQueue(gpuInfo->mpContext,
+ gpuInfo->mpArryDevsID[0], 0, &status);
if (status != CL_SUCCESS) {
return 1;
}
}
- status = clGetCommandQueueInfo(gpuInfo->commandQueue,
+ status = clGetCommandQueueInfo(gpuInfo->mpCmdQueue,
CL_QUEUE_THREAD_HANDLE_AMD, 0, NULL, NULL);
return 0;
@@ -730,16 +736,16 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
}
int OpenclDevice::RegisterKernelWrapper(const char *kernelName,cl_kernel_function function)
{
- int i;
- printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.kernelCount);
- for (i = 0; i < gpuEnv.kernelCount; i++)
- {
- if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0)
- {
- gpuEnv.kernelFunctions[i] = function;
- return 1;
- }
- }
+ int i;
+ //printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.mnKernelCount);
+ for (i = 0; i < gpuEnv.mnKernelCount; i++)
+ {
+ if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0)
+ {
+ gpuEnv.mpArryKnelFuncs[i] = function;
+ return 1;
+ }
+ }
return 0;
}
@@ -771,20 +777,20 @@ int OclFormulax(void ** usrdata, KernelEnv *env) {
tdata[i] = (float) data[i];
}
- env->kernel = clCreateKernel(env->program, "oclformula", &clStatus);
+ env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus);
//printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
int size = NUM;
- cl_mem formula_data = clCreateBuffer(env->context,
+ cl_mem formula_data = clCreateBuffer(env->mpkContext,
(cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
size * sizeof(float), (void *) tdata, &clStatus);
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
- status = clSetKernelArg(env->kernel, 0, sizeof(cl_mem),
+ status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem),
(void *) &formula_data);
CHECK_OPENCL(status)
- status = clSetKernelArg(env->kernel, 1, sizeof(unsigned int),
+ status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int),
(void *) &type);
CHECK_OPENCL(status)
@@ -794,21 +800,21 @@ int OclFormulax(void ** usrdata, KernelEnv *env) {
while (global_work_size[0] != 1) {
global_work_size[0] = global_work_size[0] / 2;
- status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1,
+ status = clEnqueueNDRangeKernel(env->mpkCmdQueue, env->mpkKernel, 1,
NULL, global_work_size, NULL, 0, NULL, NULL);
CHECK_OPENCL(status)
}
//fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
- status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0,
+ status = clEnqueueReadBuffer(env->mpkCmdQueue, formula_data, CL_FALSE, 0,
sizeof(float), (void *) &tdata, 0, NULL, NULL);
CHECK_OPENCL(status)
- status = clFinish(env->commandQueue);
+ status = clFinish(env->mpkCmdQueue);
CHECK_OPENCL(status)
//PPAStopCpuEvent(ppa_proc);
//fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n");
- status = clReleaseKernel(env->kernel);
+ status = clReleaseKernel(env->mpkKernel);
CHECK_OPENCL(status)
status = clReleaseMemObject(formula_data);
CHECK_OPENCL(status)
@@ -839,20 +845,20 @@ int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
tdata[i] = (float) data[i];
}
- env->kernel = clCreateKernel(env->program, "oclformula", &clStatus);
+ env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus);
//printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
int size = NUM;
- cl_mem formula_data = clCreateBuffer(env->context,
+ cl_mem formula_data = clCreateBuffer(env->mpkContext,
(cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
size * sizeof(float), (void *) tdata, &clStatus);
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
- status = clSetKernelArg(env->kernel, 0, sizeof(cl_mem),
+ status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem),
(void *) &formula_data);
CHECK_OPENCL(status)
- status = clSetKernelArg(env->kernel, 1, sizeof(unsigned int),
+ status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int),
(void *) &type);
CHECK_OPENCL(status)
@@ -862,21 +868,21 @@ int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
while (global_work_size[0] != 1) {
global_work_size[0] = global_work_size[0] / 2;
- status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1,
+ status = clEnqueueNDRangeKernel(env->mpkCmdQueue, env->mpkKernel, 1,
NULL, global_work_size, NULL, 0, NULL, NULL);
CHECK_OPENCL(status)
}
//fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
- status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0,
+ status = clEnqueueReadBuffer(env->mpkCmdQueue, formula_data, CL_FALSE, 0,
sizeof(float), (void *) &tdata, 0, NULL, NULL);
CHECK_OPENCL(status)
- status = clFinish(env->commandQueue);
+ status = clFinish(env->mpkCmdQueue);
CHECK_OPENCL(status)
//PPAStopCpuEvent(ppa_proc);
//fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n");
- status = clReleaseKernel(env->kernel);
+ status = clReleaseKernel(env->mpkKernel);
CHECK_OPENCL(status)
status = clReleaseMemObject(formula_data);
CHECK_OPENCL(status)
@@ -893,13 +899,13 @@ int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
double OclCalc::OclProcess(cl_kernel_function function, double *data, formulax type)
{
- fprintf(stderr, "\n OpenclDevice, proc...begin\n");
- double ret = 0;
- void *usrdata[2];
- usrdata[0] = (void *) data;
- usrdata[1] = (void *) &type;
- RunKernelWrapper(function, "oclformula", usrdata);
- return ret;
+ fprintf(stderr, "\n OpenclDevice, proc...begin\n");
+ double ret = 0;
+ void *usrdata[2];
+ usrdata[0] = (void *) data;
+ usrdata[1] = (void *) &type;
+ RunKernelWrapper(function, "oclformula", usrdata);
+ return ret;
}
double OclCalc::OclTest() {
@@ -926,467 +932,1141 @@ double OclCalc::OclTestDll() {
OclCalc::OclCalc()
{
- OpenclDevice::SetOpenclState(1);
- fprintf(stderr,"OclCalc:: init opencl ok.\n");
+ fprintf(stderr,"OclCalc:: init opencl ...\n");
}
OclCalc::~OclCalc()
{
- OpenclDevice::SetOpenclState(0);
- fprintf(stderr,"OclCalc:: opencl end ok.\n");
+ fprintf(stderr,"OclCalc:: opencl end ...\n");
}
/////////////////////////////////////////////////////////////////////////////
-int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *output,int size) {
- KernelEnv env;
- const char *kernelName = "oclFormulaMax";
- CheckKernelName(&env,kernelName);
- cl_int clStatus;
- size_t global_work_size[1];
- int alignSize = size + end[0]-start[0];
-
- env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
- cl_int ret=0;
- cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
- alignSize * sizeof(float), NULL, &clStatus);
- cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size* sizeof(float), NULL, &ret);
-
- float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
- int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- {
- hostMapStart[i] = start[i];
- hostMapEnd[i] = end[i];
- }
- for(int i=0;i<alignSize;i++)
- hostMapSrc[i] = (float)srcData[i];
- clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
-
- clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&inputCl);
- clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&startCl);
- clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&endCl);
- clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
- (void *)&outputCl);
- CHECK_OPENCL(clStatus);
-
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(clStatus)
-
- float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- output[i]=outPutMap[i];
-
- clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
- clStatus = clFinish(env.commandQueue);
-
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseKernel(env.kernel);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(inputCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(startCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(endCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(outputCl);
- CHECK_OPENCL(clStatus);
- return 0;
+#ifdef GPU_64BITS
+int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *output,int size)
+{
+ KernelEnv kEnv;
+ const char *kernelName = "oclFormulaMax";
+ CheckKernelName(&kEnv,kernelName);
+ cl_int clStatus;
+ size_t global_work_size[1];
+ int alignSize = size + end[0]-start[0];
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ cl_int ret=0;
+ cl_mem inputCl = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE),
+ alignSize * sizeof(float), NULL, &clStatus);
+ cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size* sizeof(float), NULL, &ret);
+
+ float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+ int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ {
+ hostMapStart[i] = start[i];
+ hostMapEnd[i] = end[i];
+ }
+ for(int i=0;i<alignSize;i++)
+ hostMapSrc[i] = (float)srcData[i];
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&inputCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&startCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&endCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
+ (void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus);
+
+ float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ output[i]=outPutMap[i];
+
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL);
+ clStatus = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(inputCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(startCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(endCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
}
-int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size) {
- KernelEnv env;
- const char *kernelName = "oclFormulaMin";
- CheckKernelName(&env,kernelName);
-
- cl_int clStatus;
- size_t global_work_size[1];
- int alignSize = size + end[0]-start[0];
-
- env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
- cl_int ret=0;
- cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
- alignSize * sizeof(float), NULL, &clStatus);
- cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size* sizeof(float), NULL, &ret);
-
- float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
- int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- {
- hostMapStart[i] = start[i];
- hostMapEnd[i] = end[i];
- }
- for(int i=0;i<alignSize;i++)
- hostMapSrc[i] = (float)srcData[i];
- clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
-
- clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&inputCl);
- clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&startCl);
- clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&endCl);
- clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
- (void *)&outputCl);
- CHECK_OPENCL(clStatus);
-
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(clStatus)
-
- float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- output[i]=outPutMap[i];
-
- clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
- clStatus = clFinish(env.commandQueue);
-
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseKernel(env.kernel);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(inputCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(startCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(endCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(outputCl);
- CHECK_OPENCL(clStatus);
- return 0;
+int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size)
+{
+ KernelEnv kEnv;
+ const char *kernelName = "oclFormulaMin";
+ CheckKernelName(&kEnv,kernelName);
+ cl_int clStatus;
+ size_t global_work_size[1];
+ int alignSize = size + end[0]-start[0];
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ cl_int ret=0;
+ cl_mem inputCl = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE),
+ alignSize * sizeof(float), NULL, &clStatus);
+ cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size* sizeof(float), NULL, &ret);
+
+ float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+ int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ {
+ hostMapStart[i] = start[i];
+ hostMapEnd[i] = end[i];
+ }
+ for(int i=0;i<alignSize;i++)
+ hostMapSrc[i] = (float)srcData[i];
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&inputCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&startCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&endCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
+ (void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus);
+
+ float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ output[i]=outPutMap[i];
+
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL);
+ clStatus = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(inputCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(startCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(endCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
}
-int OclCalc::OclHostFormulaAverage(double *srcData,int *start,int *end,double *output,int size) {
- KernelEnv env;
- const char *kernelName = "oclFormulaAverage";
- CheckKernelName(&env,kernelName);
-
- cl_int clStatus;
- size_t global_work_size[1];
- int alignSize = size + end[0]-start[0];
-
- env.kernel = clCreateKernel(env.program, kernelName, &clStatus);
- cl_int ret=0;
- cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
- alignSize * sizeof(float), NULL, &clStatus);
- cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size* sizeof(float), NULL, &ret);
-
- float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
- int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- {
- hostMapStart[i] = start[i];
- hostMapEnd[i] = end[i];
- }
- for(int i=0;i<alignSize;i++)
- hostMapSrc[i] = (float)srcData[i];
- clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
-
- clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&inputCl);
- clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&startCl);
- clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&endCl);
- clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
- (void *)&outputCl);
- CHECK_OPENCL(clStatus);
-
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(clStatus)
-
- float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- output[i]=outPutMap[i];
-
- clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
- clStatus = clFinish(env.commandQueue);
-
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseKernel(env.kernel);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(inputCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(startCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(endCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(outputCl);
- CHECK_OPENCL(clStatus);
- return 0;
+int OclCalc::OclHostFormulaAverage(double *srcData,int *start,int *end,double *output,int size)
+{
+ KernelEnv kEnv;
+ const char *kernelName = "oclFormulaAverage";
+ CheckKernelName(&kEnv,kernelName);
+ cl_int clStatus;
+ size_t global_work_size[1];
+ int alignSize = size + end[0]-start[0];
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ cl_int ret=0;
+ cl_mem inputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ alignSize * sizeof(float), NULL, &clStatus);
+ cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size* sizeof(float), NULL, &ret);
+
+ float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+ int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+#if 1
+ for(int i=0;i<size;i++)
+ {
+ hostMapStart[i] = start[i];
+ hostMapEnd[i] = end[i];
+ }
+ for(int i=0;i<alignSize;i++)
+ hostMapSrc[i] = (float)srcData[i];
+ //memcpy(hostMapSrc,srcData,alignSize * sizeof(float));
+#endif
+ for(sal_Int32 i = 0; i < alignSize; ++i){//dbg
+ fprintf(stderr,"In avg host,hostMapSrc[%d] is ...%f\n",i,hostMapSrc[i]);
+ }
-}
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc, 0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl, hostMapEnd, 0,NULL,NULL);
-int OclCalc::OclHostSignedAdd(double *lData,double *rData,double *rResult,int dSize) {
- KernelEnv env;
- int status;
- const char *kernelName = "oclSignedAdd";
- CheckKernelName(&env,kernelName);
-
-
- cl_int clStatus;
- size_t global_work_size[1];
-
- env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
- cl_mem clLiftData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clRightData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clResult = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
-
- float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<dSize;i++)
- {
- hostMapLeftData[i] = (float)lData[i];
- hostMapRightData[i] = (float)rData[i];
- }
- clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
-
- status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&clLiftData);
- status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&clRightData);
- status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&clResult);
- CHECK_OPENCL(status)
- global_work_size[0] = dSize;
- status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(status);
-
- float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<dSize;i++)
- rResult[i]=hostMapResult[i];
- clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
-
- CHECK_OPENCL(status);
- status = clFinish(env.commandQueue);
- CHECK_OPENCL(status);
- status = clReleaseKernel(env.kernel);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clLiftData);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clRightData);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clResult);
- CHECK_OPENCL(status);
- return 0;
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&inputCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&startCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&endCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
+ (void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus);
+
+ float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+
+ for(int i=0;i<size;i++){
+ //fprintf(stderr,"In avg host,outPutMap[%d] is ...%f\n",i,outPutMap[i]);
+ output[i]=outPutMap[i];
+ }
+
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL);
+ clStatus = clFinish(kEnv.mpkCmdQueue);
+
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(inputCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(startCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(endCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
+
+
}
-int OclCalc::OclHostSignedMul(double *lData,double *rData,double *rResult,int dSize) {
- KernelEnv env;
- int status;
- const char *kernelName = "oclSignedMul";
- CheckKernelName(&env,kernelName);
-
-
- size_t global_work_size[1];
- cl_int clStatus;
- env.kernel = clCreateKernel(env.program, kernelName, &clStatus);
- cl_mem clLiftData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clRightData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clResult = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
-
- float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<dSize;i++)
- {
- hostMapLeftData[i] = (float)lData[i];
- hostMapRightData[i] = (float)rData[i];
- }
- clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
-
- status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&clLiftData);
- status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&clRightData);
- status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&clResult);
- CHECK_OPENCL(status)
- global_work_size[0] = dSize;
- status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(status);
-
- float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<dSize;i++)
- rResult[i]=hostMapResult[i];
- clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
-
- CHECK_OPENCL(status);
- status = clFinish(env.commandQueue);
- CHECK_OPENCL(status);
- status = clReleaseKernel(env.kernel);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clLiftData);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clRightData);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clResult);
- CHECK_OPENCL(status);
- return 0;
+
+int OclCalc::OclHostSignedAdd(double *lData,double *rData,double *rResult,int dSize) {
+
+ KernelEnv kEnv;
+ int status;
+ const char *kernelName = "oclSignedAdd";
+ CheckKernelName(&kEnv,kernelName);
+
+
+ cl_int clStatus;
+ size_t global_work_size[1];
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clRightData = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+
+ float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ {
+ hostMapLeftData[i] = (float)lData[i];
+ hostMapRightData[i] = (float)rData[i];
+ }
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+ status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&clLiftData);
+ status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&clRightData);
+ status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&clResult);
+ CHECK_OPENCL(status)
+ global_work_size[0] = dSize;
+ status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(status);
+
+ float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ rResult[i]=hostMapResult[i];
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
+
+ CHECK_OPENCL(status);
+ status = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(status);
+ status = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clLiftData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clRightData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clResult);
+ CHECK_OPENCL(status);
+ return 0;
}
+
int OclCalc::OclHostSignedSub(double *lData,double *rData,double *rResult,int dSize) {
- KernelEnv env;
- int status;
- const char *kernelName = "oclSignedSub";
- CheckKernelName(&env,kernelName);
-
- cl_int clStatus;
- size_t global_work_size[1];
- env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
- cl_mem clLiftData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clRightData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clResult = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
-
- float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<dSize;i++)
- {
- hostMapLeftData[i] = (float)lData[i];
- hostMapRightData[i] = (float)rData[i];
- }
- clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
-
- status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&clLiftData);
- status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&clRightData);
- status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&clResult);
- CHECK_OPENCL(status)
- global_work_size[0] = dSize;
- status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(status);
-
- float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<dSize;i++)
- rResult[i]=hostMapResult[i];
- clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
-
- CHECK_OPENCL(status);
- status = clFinish(env.commandQueue);
- CHECK_OPENCL(status);
- status = clReleaseKernel(env.kernel);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clLiftData);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clRightData);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clResult);
- CHECK_OPENCL(status);
- return 0;
+ KernelEnv kEnv;
+ int status;
+ const char *kernelName = "oclSignedSub";
+ CheckKernelName(&kEnv,kernelName);
+
+ cl_int clStatus;
+ size_t global_work_size[1];
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clRightData = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+
+ float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ {
+ hostMapLeftData[i] = (float)lData[i];
+ hostMapRightData[i] = (float)rData[i];
+ }
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+ status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&clLiftData);
+ status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&clRightData);
+ status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&clResult);
+ CHECK_OPENCL(status)
+ global_work_size[0] = dSize;
+ status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(status);
+
+ float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ rResult[i]=hostMapResult[i];
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
+
+ CHECK_OPENCL(status);
+ status = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(status);
+ status = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clLiftData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clRightData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clResult);
+ CHECK_OPENCL(status);
+ return 0;
+}
+
+int OclCalc::OclHostSignedMul(double *lData,double *rData,double *rResult,int dSize) {
+ KernelEnv kEnv;
+ int status;
+ const char *kernelName = "oclSignedMul";
+ CheckKernelName(&kEnv,kernelName);
+
+
+ size_t global_work_size[1];
+ cl_int clStatus;
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, kernelName, &clStatus);
+ cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clRightData = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+
+ float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ {
+ hostMapLeftData[i] = (float)lData[i];
+ hostMapRightData[i] = (float)rData[i];
+ }
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+ status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&clLiftData);
+ status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&clRightData);
+ status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&clResult);
+ CHECK_OPENCL(status)
+ global_work_size[0] = dSize;
+ status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(status);
+
+ float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ rResult[i]=hostMapResult[i];
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
+
+ CHECK_OPENCL(status);
+ status = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(status);
+ status = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clLiftData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clRightData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clResult);
+ CHECK_OPENCL(status);
+ return 0;
}
+
int OclCalc::OclHostSignedDiv(double *lData,double *rData,double *rResult,int dSize) {
- KernelEnv env;
- int status;
- const char *kernelName = "oclSignedDiv";
- CheckKernelName(&env,kernelName);
-
-
- size_t global_work_size[1];
- cl_int clStatus;
- env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
- cl_mem clLiftData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clRightData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clResult = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
-
- float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<dSize;i++)
- {
- hostMapLeftData[i] = (float)lData[i];
- hostMapRightData[i] = (float)rData[i];
- }
- clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
-
- status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&clLiftData);
- status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&clRightData);
- status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&clResult);
- CHECK_OPENCL(status)
- global_work_size[0] = dSize;
- status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(status);
-
- float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<dSize;i++)
- rResult[i]=hostMapResult[i];
- clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
-
- CHECK_OPENCL(status);
- status = clFinish(env.commandQueue);
- CHECK_OPENCL(status);
- status = clReleaseKernel(env.kernel);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clLiftData);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clRightData);
- CHECK_OPENCL(status);
- status = clReleaseMemObject(clResult);
- CHECK_OPENCL(status);
- return 0;
+ KernelEnv kEnv;
+ int status;
+ const char *kernelName = "oclSignedDiv";
+ CheckKernelName(&kEnv,kernelName);
+
+
+ cl_int clStatus;
+ size_t global_work_size[1];
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ cl_mem clLiftData = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clRightData = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clResult = clCreateBuffer(kEnv.mpkContext,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+
+ float * hostMapLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ float * hostMapRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ {
+ hostMapLeftData[i] = (float)lData[i];
+ hostMapRightData[i] = (float)rData[i];
+ }
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+ status = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&clLiftData);
+ status = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&clRightData);
+ status = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&clResult);
+ CHECK_OPENCL(status);
+ global_work_size[0] = dSize;
+ status = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(status);
+
+ float * hostMapResult = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ rResult[i]=hostMapResult[i];
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,clResult,hostMapResult,0,NULL,NULL);
+
+ CHECK_OPENCL(status);
+ status = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(status);
+ status = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clLiftData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clRightData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clResult);
+ CHECK_OPENCL(status);
+ return 0;
+}
+#endif // GPU_64BITS
+int OclCalc::CreateBuffer(float *&fpSrcData,uint *&npStartPos,uint *&npEndPos,int nBufferSize)
+{
+ cl_int clStatus = 0;
+ SetKernelEnv(&kEnv);
+
+ mpClmemSrcData = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ nBufferSize * sizeof(float), NULL, &clStatus);
+ CHECK_OPENCL(clStatus);
+ mpClmemStartPos = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ nBufferSize * sizeof(unsigned int), NULL, &clStatus);
+ CHECK_OPENCL(clStatus);
+ mpClmemEndPos = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ nBufferSize * sizeof(unsigned int), NULL, &clStatus);
+ CHECK_OPENCL(clStatus);
+
+ fpSrcData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemSrcData,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(float),0,NULL,NULL,&clStatus);
+ CHECK_OPENCL(clStatus);
+ npStartPos = (uint *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemStartPos,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(uint),0,NULL,NULL,&clStatus);
+ CHECK_OPENCL(clStatus);
+ npEndPos = (uint *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemEndPos,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(uint),0,NULL,NULL,&clStatus);
+ CHECK_OPENCL(clStatus);
+ //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos);
+ return 0;
+}
+
+int OclCalc::CreateBuffer(float *&fpLeftData,float *&fpRightData,int nBufferSize)
+{
+ cl_int clStatus = 0;
+ SetKernelEnv(&kEnv);
+
+ mpClmemLeftData = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ nBufferSize * sizeof(float), NULL, &clStatus);
+ CHECK_OPENCL(clStatus);
+ mpClmemRightData = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ nBufferSize * sizeof(unsigned int), NULL, &clStatus);
+ CHECK_OPENCL(clStatus);
+ fpLeftData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemLeftData,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(float),0,NULL,NULL,&clStatus);
+ CHECK_OPENCL(clStatus);
+ fpRightData = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,mpClmemRightData,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,nBufferSize * sizeof(float),0,NULL,NULL,&clStatus);
+ CHECK_OPENCL(clStatus);
+ //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos);
+ return 0;
+}
+
+int OclCalc::OclHostFormulaMax32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size)
+{
+ cl_int clStatus;
+ size_t global_work_size[1];
+ //int alignSize = size + end[0]-start[0];
+ //for(int u=0;u < size;u++)
+ //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpSrcData[u]);
+ const char *kernelName = "oclFormulaMax";
+ CheckKernelName(&kEnv,kernelName);
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kEnv.mckKernelName,&clStatus);
+ CHECK_OPENCL(clStatus);
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemSrcData, fpSrcData, 0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemStartPos,npStartPos,0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemEndPos, npEndPos, 0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext,
+ CL_MEM_READ_WRITE,
+ size* sizeof(float),
+ NULL,
+ &clStatus);
+ CHECK_OPENCL(clStatus);
+
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData);
+ CHECK_OPENCL(clStatus);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemStartPos);
+ CHECK_OPENCL(clStatus);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&mpClmemEndPos);
+ CHECK_OPENCL(clStatus);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),(void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue,
+ kEnv.mpkKernel,
+ 1,
+ NULL,
+ global_work_size,
+ NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus);
+
+ float * outputMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+ outputCl,
+ CL_TRUE,CL_MAP_READ,
+ 0,
+ size*sizeof(float),
+ 0,NULL,NULL,&clStatus);
+ CHECK_OPENCL(clStatus);
+ for(int i=0;i<size;i++)
+ output[i]=outputMap[i];// from gpu float type to cpu double type
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outputMap,0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+ clStatus = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(clStatus);
+
+ clStatus = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(mpClmemSrcData);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(mpClmemStartPos);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(mpClmemEndPos);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
+
+}
+
+int OclCalc::OclHostFormulaMin32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size)
+{
+ cl_int clStatus;
+ size_t global_work_size[1];
+ //int alignSize = size + end[0]-start[0];
+ //for(int u=0;u < size;u++)
+ //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpSrcData[u]);
+ const char *kernelName = "oclFormulaMin";
+ CheckKernelName(&kEnv,kernelName);
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kEnv.mckKernelName,&clStatus);
+ CHECK_OPENCL(clStatus);
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemSrcData, fpSrcData, 0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemStartPos,npStartPos,0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemEndPos, npEndPos, 0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext,
+ CL_MEM_READ_WRITE,
+ size* sizeof(float),
+ NULL,
+ &clStatus);
+ CHECK_OPENCL(clStatus);
+
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData);
+ CHECK_OPENCL(clStatus);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),(void *)&mpClmemStartPos);
+ CHECK_OPENCL(clStatus);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),(void *)&mpClmemEndPos);
+ CHECK_OPENCL(clStatus);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),(void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue,
+ kEnv.mpkKernel,
+ 1,
+ NULL,
+ global_work_size,
+ NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus);
+
+ float * outputMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+ outputCl,
+ CL_TRUE,CL_MAP_READ,
+ 0,
+ size*sizeof(float),
+ 0,NULL,NULL,&clStatus);
+ CHECK_OPENCL(clStatus);
+ for(int i=0;i<size;i++)
+ output[i]=outputMap[i];// from gpu float type to cpu double type
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outputMap,0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+ clStatus = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(clStatus);
+
+ clStatus = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(mpClmemSrcData);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(mpClmemStartPos);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(mpClmemEndPos);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
+
+}
+
+int OclCalc::OclHostFormulaAverage32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size)
+{
+ cl_int clStatus;
+ size_t global_work_size[1];
+ //int alignSize = size + end[0]-start[0];
+ //for(int u=0;u < size;u++)
+ //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpSrcData[u]);
+ const char *kernelName = "oclFormulaAverage";
+ CheckKernelName(&kEnv,kernelName);
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kEnv.mckKernelName,&clStatus);
+ CHECK_OPENCL(clStatus);
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemSrcData, fpSrcData, 0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemStartPos,npStartPos,0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+ clStatus = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,mpClmemEndPos, npEndPos, 0,NULL,NULL);
+ CHECK_OPENCL(clStatus);
+
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext,
+ CL_MEM_READ_WRITE,
+ size* sizeof(float),
+ NULL,
+ &clStatus);
+ CHECK_OPENCL(clStatus);
+
... etc. - the rest is truncated
More information about the Libreoffice-commits
mailing list