[Libreoffice-commits] core.git: Branch 'feature/formula-core-rework' - 6 commits - sc/source
Michael Meeks
michael.meeks at suse.com
Wed Jul 10 09:11:13 PDT 2013
sc/source/core/data/formulacell.cxx | 8
sc/source/core/opencl/formulagroupcl.cxx | 264 ++-
sc/source/core/opencl/oclkernels.hxx | 207 +-
sc/source/core/opencl/openclwrapper.cxx | 2159 +++++++++++++++++++++----------
sc/source/core/opencl/openclwrapper.hxx | 145 +-
sc/source/core/tool/formulagroup.cxx | 1
6 files changed, 1894 insertions(+), 890 deletions(-)
New commits:
commit 342e6908e22edf3b9443b2997d82d4ff75ad495d
Author: Michael Meeks <michael.meeks at suse.com>
Date: Tue Jul 9 12:02:43 2013 +0100
better opencl error reporting / diagnostics.
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 31f1589..0c94721 100755
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -2131,6 +2131,7 @@ double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, const double *pOpArray,
size_t global_work_size[1];
kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ CHECK_OPENCL(clStatus);
// Ugh - horrible redundant copying ...
cl_mem valuesCl = allocateDoubleBuffer(kEnv, pOpArray, nElements, &clStatus);
@@ -2147,12 +2148,16 @@ double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, const double *pOpArray,
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
(void *)&valuesCl);
+ CHECK_OPENCL(clStatus);
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
(void *)&subtractCl);
- clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ CHECK_OPENCL(clStatus);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_int),
(void *)&start);
- clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
+ CHECK_OPENCL(clStatus);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_int),
(void *)&end);
+ CHECK_OPENCL(clStatus);
clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(cl_mem),
(void *)&outputCl);
CHECK_OPENCL(clStatus);
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index a0c132a..fe62554 100755
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -12,6 +12,7 @@
#include <config_features.h>
#include <formula/opcode.hxx>
+#include <sal/detail/log.h>
#include <cassert>
#include <CL/cl.h>
#endif
@@ -55,7 +56,7 @@ typedef int (*cl_kernel_function)(void **userdata, KernelEnv *kenv);
#define CHECK_OPENCL(status) \
if(status != CL_SUCCESS) \
{ \
- printf ("error code is %d.\n",status); \
+ printf ("OpenCL error code is %d at " SAL_DETAIL_WHERE "\n", status); \
return 0; \
}
commit 8e60c2319b1f959200033bc3d89dca7d92815988
Author: Michael Meeks <michael.meeks at suse.com>
Date: Mon Jul 8 21:49:31 2013 +0100
try harder to setup the kernel environment.
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 3030a2e..31f1589 100755
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -2111,6 +2111,7 @@ double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, const double *pOpArray,
const double *pSubtractSingle, size_t nElements)
{
KernelEnv kEnv;
+ SetKernelEnv(&kEnv);
// select a kernel: cut & paste coding is utterly evil.
const char *kernelName;
commit 7fe2888b5a4f8d434d2d662bbf948bac2501b695
Author: Michael Meeks <michael.meeks at suse.com>
Date: Mon Jul 8 21:35:26 2013 +0100
cleanup formulagroupcl and add opencl kernel for averagedelta.
Conflicts:
sc/source/core/opencl/openclwrapper.hxx
Change-Id: Id4777d3854d34ab34dd29b050cd329a803023a39
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index d92a471..8bc0224 100755
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -310,11 +310,8 @@ bool FormulaGroupInterpreterGroundwater::interpret(ScDocument& rDoc, const ScAdd
OpCode eOp; // type of operation: ocAverage, ocMax, ocMin
const double *pArrayToSubtractOneElementFrom;
const double *pGroundWaterDataArray;
- size_t nGroundWaterDataArrayLen;
// Output:
- double *pResult = new double[xGroup->mnLength];
- RETURN_IF_FAIL(pResult != NULL, "buffer alloc failed");
std::vector<double> aMatrixContent;
const formula::FormulaToken *p;
@@ -338,36 +335,13 @@ bool FormulaGroupInterpreterGroundwater::interpret(ScDocument& rDoc, const ScAdd
p = rCode.NextNoSpaces();
RETURN_IF_FAIL(p != NULL, "no function argument");
- if (p->GetType() == formula::svDoubleVectorRef)
- {
- // FIXME: this is what I would expect; but table1.cxx's
- // ScColumn::ResolveStaticReference as called from
- // GroupTokenConverter::convert returns an ScMatrixToken un-conditionally
- const formula::DoubleVectorRefToken* pDvr = static_cast<const formula::DoubleVectorRefToken*>(p);
- const std::vector<const double*>& rArrays = pDvr->GetArrays();
- RETURN_IF_FAIL(rArrays.size() == 1, "unexpectedly large double ref array");
- RETURN_IF_FAIL(pDvr->GetArrayLength() == (size_t)xGroup->mnLength, "wrong double ref length");
- RETURN_IF_FAIL(pDvr->IsStartFixed() && pDvr->IsEndFixed(), "non-fixed ranges )");
- pGroundWaterDataArray = rArrays[0];
- nGroundWaterDataArrayLen = xGroup->mnLength;
- }
- else
- {
- RETURN_IF_FAIL(p->GetType() == formula::svMatrix, "unexpected fn. param type");
- const ScMatrixToken *pMatTok = static_cast<const ScMatrixToken *>(p);
- pMatTok->GetMatrix()->GetDoubleArray( aMatrixContent );
- // FIXME: horrible hackery: the legacy / excel shared formula oddness,
- // such that the 1st entry is not truly shared, making these a different
- // shape.
- if (aMatrixContent.size() > (size_t)xGroup->mnLength + 1)
- {
- fprintf(stderr, "Error size range mismatch: %ld vs %ld\n",
- (long)aMatrixContent.size(), (long)xGroup->mnLength);
- return false;
- }
- pGroundWaterDataArray = &aMatrixContent[0];
- nGroundWaterDataArrayLen = aMatrixContent.size();
- }
+ RETURN_IF_FAIL(p->GetType() == formula::svDoubleVectorRef, "wrong type of fn argument");
+ const formula::DoubleVectorRefToken* pDvr = static_cast<const formula::DoubleVectorRefToken*>(p);
+ const std::vector<const double*>& rArrays = pDvr->GetArrays();
+ RETURN_IF_FAIL(rArrays.size() == 1, "unexpectedly large double ref array");
+ RETURN_IF_FAIL(pDvr->GetArrayLength() == (size_t)xGroup->mnLength, "wrong double ref length");
+ RETURN_IF_FAIL(pDvr->IsStartFixed() && pDvr->IsEndFixed(), "non-fixed ranges )");
+ pGroundWaterDataArray = rArrays[0];
p = rCode.NextNoSpaces();
RETURN_IF_FAIL(p != NULL && p->GetOpCode() == ocClose, "missing closing )");
@@ -400,17 +374,22 @@ bool FormulaGroupInterpreterGroundwater::interpret(ScDocument& rDoc, const ScAdd
// =AVERAGE(L$6:L$7701) - L6
// we would get:
// eOp => ocAverage
- // pGroundWaterDataArray => contains L$6:L$7701
- // pGroundWaterDataArrayLen => 7701 - 6 + 1
- // pArrayToSubtractOneElementFrom => contains L$5:L$7701 (overlapping)
+ // pGroundWaterDataArray => contains L$5:L$7701
+ // pArrayToSubtractOneElementFrom => contains L$5:L$7701 (ie. a copy)
// length of this array -> xGroup->mnLength
fprintf (stderr, "Calculate !\n");
+ double *pResult = ocl_calc.OclSimpleDeltaOperation( eOp, pGroundWaterDataArray,
+ pArrayToSubtractOneElementFrom,
+ (size_t) xGroup->mnLength );
+ RETURN_IF_FAIL(pResult != NULL, "buffer alloc / calculaton failed");
+
// Insert the double data, in rResult[i] back into the document
rDoc.SetFormulaResults(rTopPos, pResult, xGroup->mnLength);
delete [] pResult;
+
SAL_DEBUG ("exit cleanly !");
return true;
}
diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx
index 6c90126..e13c24a 100755
--- a/sc/source/core/opencl/oclkernels.hxx
+++ b/sc/source/core/opencl/oclkernels.hxx
@@ -142,7 +142,6 @@ __kernel void oclFormulaAverage(__global float *input,__global int *start,__glob
for(i = start[id];i<=end[id];i++)
sum += input[i];
output[id] = sum / (end[id]-start[id]+1);
-
}
//Sumproduct
@@ -162,6 +161,33 @@ __kernel void oclFormulaMinverse(__global float *data,
}
+// Double precision is a requirement of spreadsheets
+#if 0
+#if defined(cl_khr_fp64) // Khronos extension
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#elif defined(cl_amd_fp64) // AMD extension
+#pragma OPENCL EXTENSION cl_amd_fp64 : enable
+#endif
+typedef double fp_t;
+#else
+typedef float fp_t;
+#endif
+
+__kernel void oclAverageDelta(__global fp_t *values, __global fp_t *subtract, __global int start, __global int end, __global fp_t *output)
+{
+ const unsigned int id = get_global_id(0);
+
+ // Average
+ int i;
+ fp_t sum = 0.0;
+ for(i = start; i < end; i++)
+ sum += values[i];
+ fp_t val = sum/(end-start);
+
+ // Subtract & output
+ output[id] = val - subtract[id];
+}
+
);
#endif // USE_EXTERNAL_KERNEL
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 597f370..3030a2e 100755
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -76,16 +76,17 @@ int OpenclDevice::ReleaseOpenclRunEnv() {
}
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
-inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName) {
+inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName)
+{
strcpy(gpuEnv.mArrykernelNames[kCount], kName);
gpuEnv.mnKernelCount++;
return 0;
}
-int OpenclDevice::RegistOpenclKernel() {
- if (!gpuEnv.mnIsUserCreated) {
+int OpenclDevice::RegistOpenclKernel()
+{
+ if (!gpuEnv.mnIsUserCreated)
memset(&gpuEnv, 0, sizeof(gpuEnv));
- }
gpuEnv.mnFileCount = 0; //argc;
gpuEnv.mnKernelCount = 0UL;
@@ -100,17 +101,22 @@ int OpenclDevice::RegistOpenclKernel() {
AddKernelConfig(7, (const char*) "oclFormulaSumproduct");
AddKernelConfig(8, (const char*) "oclFormulaMinverse");
- AddKernelConfig(9, (const char*) "oclSignedAdd");
+ AddKernelConfig(9, (const char*) "oclSignedAdd");
AddKernelConfig(10, (const char*) "oclSignedSub");
AddKernelConfig(11, (const char*) "oclSignedMul");
AddKernelConfig(12, (const char*) "oclSignedDiv");
+ AddKernelConfig(13, (const char*) "oclAverageDelta");
+
return 0;
}
-OpenclDevice::OpenclDevice(){
+
+OpenclDevice::OpenclDevice()
+{
//InitEnv();
}
-OpenclDevice::~OpenclDevice() {
+OpenclDevice::~OpenclDevice()
+{
//ReleaseOpenclRunEnv();
}
@@ -122,13 +128,15 @@ int OpenclDevice::SetKernelEnv(KernelEnv *envInfo)
return 1;
}
-int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName){
+
+int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName)
+{
//printf("CheckKernelName,total count of kernels...%d\n", gpuEnv.kernelCount);
int kCount;
for(kCount=0; kCount < gpuEnv.mnKernelCount; kCount++) {
if(strcasecmp(kernelName, gpuEnv.mArrykernelNames[kCount]) == 0) {
- printf("match %s kernel right\n",kernelName);
- break;
+ printf("match %s kernel right\n",kernelName);
+ break;
}
}
envInfo->mpkKernel = gpuEnv.mpArryKernels[kCount];
@@ -141,7 +149,8 @@ int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName){
return 1;
}
-int OpenclDevice::ConvertToString(const char *filename, char **source) {
+int OpenclDevice::ConvertToString(const char *filename, char **source)
+{
int file_size;
size_t result;
FILE *file = NULL;
@@ -174,8 +183,9 @@ int OpenclDevice::ConvertToString(const char *filename, char **source) {
return 0;
}
-int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) {
- unsigned int i = 0;
+int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle)
+{
+ unsigned int i = 0;
cl_int status;
char *str = NULL;
FILE *fd = NULL;
@@ -208,7 +218,8 @@ int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) {
}
int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary,
- size_t numBytes) {
+ size_t numBytes)
+{
FILE *output = NULL;
output = fopen(fileName, "wb");
if (output == NULL) {
@@ -223,7 +234,8 @@ int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary,
}
int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
- const char * clFileName) {
+ const char * clFileName)
+{
unsigned int i = 0;
cl_int status;
size_t *binarySizes, numDevices;
@@ -319,10 +331,10 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
return 1;
}
-int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) {
- if (gpuEnv.mnIsUserCreated) {
+int OpenclDevice::InitOpenclAttr(OpenCLEnv * env)
+{
+ if (gpuEnv.mnIsUserCreated)
return 1;
- }
gpuEnv.mpContext = env->mpOclContext;
gpuEnv.mpPlatformID = env->mpOclPlatformID;
@@ -334,21 +346,24 @@ int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) {
return 0;
}
-int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env) {
+int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env)
+{
int status;
- env->mpkKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelname, &status);
+ env->mpkKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelname, &status);
env->mpkContext = gpuEnv.mpContext;
env->mpkCmdQueue = gpuEnv.mpCmdQueue;
return status != CL_SUCCESS ? 1 : 0;
}
-int OpenclDevice::ReleaseKernel(KernelEnv * env) {
+int OpenclDevice::ReleaseKernel(KernelEnv * env)
+{
int status = clReleaseKernel(env->mpkKernel);
return status != CL_SUCCESS ? 1 : 0;
}
-int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) {
+int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo)
+{
int i = 0;
int status = 0;
@@ -378,7 +393,8 @@ int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) {
}
int OpenclDevice::RunKernelWrapper(cl_kernel_function function,
- const char * kernelName, void **usrdata) {
+ const char * kernelName, void **usrdata)
+{
printf("oclwrapper:RunKernel_wrapper...\n");
if (RegisterKernelWrapper(kernelName, function) != 1) {
fprintf(stderr,
@@ -389,8 +405,9 @@ int OpenclDevice::RunKernelWrapper(cl_kernel_function function,
}
int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached,
- const char * clFileName) {
- int i;
+ const char * clFileName)
+{
+ int i;
for (i = 0; i < gpuEnvCached->mnFileCount; i++) {
if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) {
if (gpuEnvCached->mpArryPrograms[i] != NULL) {
@@ -574,6 +591,7 @@ int OpenclDevice::RunKernel(const char *kernelName, void **userdata) {
}
return 0;
}
+
int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles)
{
int status = 0;
@@ -1007,6 +1025,7 @@ int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *outpu
CHECK_OPENCL(clStatus);
return 0;
}
+
int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size)
{
KernelEnv kEnv;
@@ -1590,7 +1609,6 @@ int OclCalc::OclHostFormulaMin32Bits(float *fpSrcData,uint *npStartPos,uint *npE
clStatus = clReleaseMemObject(outputCl);
CHECK_OPENCL(clStatus);
return 0;
-
}
int OclCalc::OclHostFormulaAverage32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int size)
@@ -2066,4 +2084,111 @@ int OclCalc::OclHostFormulaSumProduct(float *dpSrcData,int *npStart,int *npEnd,f
}
#endif
+#if 0
+typedef double fp_;
+#else
+typedef float fp_t;
+#endif
+
+// FIXME: should be templatised in <double> - double buffering [sic] rocks
+static cl_mem allocateDoubleBuffer(KernelEnv &rEnv, const double *_pValues,
+ size_t nElements, cl_int *pStatus)
+{
+ // Ugh - horrible redundant copying ...
+ cl_mem xValues = clCreateBuffer(rEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE),
+ nElements * sizeof(double), NULL, pStatus);
+ fp_t *pValues = (fp_t *)clEnqueueMapBuffer(rEnv.mpkCmdQueue,xValues,CL_TRUE,CL_MAP_WRITE,0,
+ nElements * sizeof(fp_t),0,NULL,NULL,NULL);
+ for(int i=0;i<nElements;i++)
+ pValues[i] = (fp_t)_pValues[i];
+
+ clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
+
+ return xValues;
+}
+
+double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, const double *pOpArray,
+ const double *pSubtractSingle, size_t nElements)
+{
+ KernelEnv kEnv;
+
+ // select a kernel: cut & paste coding is utterly evil.
+ const char *kernelName;
+ switch (eOp) {
+ case ocMax:
+ case ocMin:
+ ; // FIXME: fallthrough for now
+ case ocAverage:
+ kernelName = "oclAverageDelta";
+ break;
+ default:
+ assert(false);
+ }
+ CheckKernelName(&kEnv,kernelName);
+
+ cl_int clStatus;
+ size_t global_work_size[1];
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+
+ // Ugh - horrible redundant copying ...
+ cl_mem valuesCl = allocateDoubleBuffer(kEnv, pOpArray, nElements, &clStatus);
+ cl_mem subtractCl = allocateDoubleBuffer(kEnv, pSubtractSingle, nElements, &clStatus);
+
+ cl_int start = 0;
+ cl_int end = (cl_int) nElements;
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext,
+ CL_MEM_READ_WRITE,
+ nElements * sizeof(fp_t),
+ NULL,
+ &clStatus);
+ CHECK_OPENCL(clStatus);
+
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&valuesCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&subtractCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&start);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
+ (void *)&end);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(cl_mem),
+ (void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ fprintf(stderr, "prior to enqueue range kernel\n");
+
+ global_work_size[0] = nElements;
+ clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus);
+
+ double *pResult = new double[nElements];
+ if(!pResult)
+ return NULL; // leak.
+
+ fp_t *pOutput = (fp_t *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,
+ CL_MAP_READ,0,nElements*sizeof(fp_t),
+ 0,NULL,NULL,NULL);
+ for(int i = 0; i < nElements; i++)
+ pResult[i] = (double)pOutput[i];
+
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,pOutput,0,NULL,NULL);
+
+ clStatus = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(valuesCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(subtractCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+
+ fprintf(stderr, "completed opencl delta operation\n");
+
+ return pResult;
+}
+
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index 3e87f84..a0c132a 100755
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -11,10 +11,8 @@
#define _OPENCL_WRAPPER_H_
#include <config_features.h>
-
-#ifdef __APPLE__
-#include <OpenCL/cl.h>
-#else
+#include <formula/opcode.hxx>
+#include <cassert>
#include <CL/cl.h>
#endif
@@ -212,6 +210,7 @@ public:
int OclHostFormulaMax32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize);
int OclHostFormulaMin32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize);
int OclHostFormulaAverage32Bits(float *fpSrcData,uint *npStartPos,uint *npEndPos,double *output,int outputSize);
+ double *OclSimpleDeltaOperation(OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements);
//int OclHostFormulaCount(int *startPos,int *endPos,float *output,int outputSize);
//int OclHostFormulaSum(float *srcData,int *startPos,int *endPos,float *output,int outputSize);
commit ca05948520906052d8bd608231970873ebf01414
Author: Michael Meeks <michael.meeks at suse.com>
Date: Mon Jul 8 14:17:35 2013 +0100
Add new opencl placeholder backend for specific scenarios.
diff --git a/sc/source/core/data/formulacell.cxx b/sc/source/core/data/formulacell.cxx
index 17653c9..55f5010 100644
--- a/sc/source/core/data/formulacell.cxx
+++ b/sc/source/core/data/formulacell.cxx
@@ -3064,6 +3064,14 @@ public:
bool convert(ScTokenArray& rCode)
{
+ { // debug to start with:
+ ScCompiler aComp( &mrDoc, mrPos, rCode);
+ aComp.SetGrammar(formula::FormulaGrammar::GRAM_NATIVE_XL_R1C1);
+ OUStringBuffer aAsString;
+ aComp.CreateStringFromTokenArray(aAsString);
+ SAL_DEBUG("interpret formula: " << aAsString.makeStringAndClear());
+ }
+
rCode.Reset();
for (const formula::FormulaToken* p = rCode.First(); p; p = rCode.Next())
{
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index 857f045..d92a471 100755
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -275,10 +275,153 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
return true;
}
+/// Special case of formula compiler for groundwatering
+class FormulaGroupInterpreterGroundwater : public FormulaGroupInterpreterSoftware
+{
+public:
+ FormulaGroupInterpreterGroundwater() :
+ FormulaGroupInterpreterSoftware()
+ {
+ fprintf(stderr,"\n\n ***** Groundwater Backend *****\n\n\n");
+ OclCalc::InitEnv();
+ }
+ virtual ~FormulaGroupInterpreterGroundwater()
+ {
+ OclCalc::ReleaseOpenclRunEnv();
+ }
+
+ virtual ScMatrixRef inverseMatrix(const ScMatrix& /* rMat */) { return ScMatrixRef(); }
+ virtual bool interpret(ScDocument& rDoc, const ScAddress& rTopPos,
+ const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode);
+};
+
+#define RETURN_IF_FAIL(a,b) do { if (!(a)) { fprintf (stderr,b); return false; } } while (0)
+
+#include "compiler.hxx"
+
+// FIXME: really we should compile the formula and operate on the
+// RPN representation which -should- be more compact and have no Open / Close
+// or precedence issues; cf. rCode.FirstRPN() etc.
+bool FormulaGroupInterpreterGroundwater::interpret(ScDocument& rDoc, const ScAddress& rTopPos,
+ const ScFormulaCellGroupRef& xGroup,
+ ScTokenArray& rCode)
+{
+ // Inputs: both of length xGroup->mnLength
+ OpCode eOp; // type of operation: ocAverage, ocMax, ocMin
+ const double *pArrayToSubtractOneElementFrom;
+ const double *pGroundWaterDataArray;
+ size_t nGroundWaterDataArrayLen;
+
+ // Output:
+ double *pResult = new double[xGroup->mnLength];
+ RETURN_IF_FAIL(pResult != NULL, "buffer alloc failed");
+ std::vector<double> aMatrixContent;
+
+ const formula::FormulaToken *p;
+
+ // special cased formula parser:
+
+ p = rCode.FirstNoSpaces();
+ RETURN_IF_FAIL(p != NULL && p->GetOpCode() == ocOpen, "no opening (");
+
+ {
+ p = rCode.NextNoSpaces();
+ RETURN_IF_FAIL(p != NULL, "no operator");
+
+ // Function:
+ eOp = p->GetOpCode();
+ RETURN_IF_FAIL(eOp == ocAverage || eOp == ocMax || eOp == ocMin, "unexpected opcode");
+
+ { // function arguments
+ p = rCode.NextNoSpaces();
+ RETURN_IF_FAIL(p != NULL && p->GetOpCode() == ocOpen, "missing opening (");
+
+ p = rCode.NextNoSpaces();
+ RETURN_IF_FAIL(p != NULL, "no function argument");
+ if (p->GetType() == formula::svDoubleVectorRef)
+ {
+ // FIXME: this is what I would expect; but table1.cxx's
+ // ScColumn::ResolveStaticReference as called from
+ // GroupTokenConverter::convert returns an ScMatrixToken un-conditionally
+ const formula::DoubleVectorRefToken* pDvr = static_cast<const formula::DoubleVectorRefToken*>(p);
+ const std::vector<const double*>& rArrays = pDvr->GetArrays();
+ RETURN_IF_FAIL(rArrays.size() == 1, "unexpectedly large double ref array");
+ RETURN_IF_FAIL(pDvr->GetArrayLength() == (size_t)xGroup->mnLength, "wrong double ref length");
+ RETURN_IF_FAIL(pDvr->IsStartFixed() && pDvr->IsEndFixed(), "non-fixed ranges )");
+ pGroundWaterDataArray = rArrays[0];
+ nGroundWaterDataArrayLen = xGroup->mnLength;
+ }
+ else
+ {
+ RETURN_IF_FAIL(p->GetType() == formula::svMatrix, "unexpected fn. param type");
+ const ScMatrixToken *pMatTok = static_cast<const ScMatrixToken *>(p);
+ pMatTok->GetMatrix()->GetDoubleArray( aMatrixContent );
+ // FIXME: horrible hackery: the legacy / excel shared formula oddness,
+ // such that the 1st entry is not truly shared, making these a different
+ // shape.
+ if (aMatrixContent.size() > (size_t)xGroup->mnLength + 1)
+ {
+ fprintf(stderr, "Error size range mismatch: %ld vs %ld\n",
+ (long)aMatrixContent.size(), (long)xGroup->mnLength);
+ return false;
+ }
+ pGroundWaterDataArray = &aMatrixContent[0];
+ nGroundWaterDataArrayLen = aMatrixContent.size();
+ }
+
+ p = rCode.NextNoSpaces();
+ RETURN_IF_FAIL(p != NULL && p->GetOpCode() == ocClose, "missing closing )");
+ }
+
+ // Subtract operator
+ p = rCode.NextNoSpaces();
+ RETURN_IF_FAIL(p != NULL && p->GetOpCode() == ocSub, "missing subtract opcode");
+
+ { // subtract parameter
+ p = rCode.NextNoSpaces();
+ RETURN_IF_FAIL(p != NULL, "no tokens");
+ RETURN_IF_FAIL(p->GetType() == formula::svSingleVectorRef, "not a single ref");
+ const formula::SingleVectorRefToken* pSvr = static_cast<const formula::SingleVectorRefToken*>(p);
+ pArrayToSubtractOneElementFrom = pSvr->GetArray();
+ RETURN_IF_FAIL(pSvr->GetArrayLength() == (size_t)xGroup->mnLength, "wrong single ref length");
+ }
+
+ p = rCode.NextNoSpaces();
+ RETURN_IF_FAIL(p != NULL && p->GetOpCode() == ocClose, "missing closing )");
+ }
+
+ p = rCode.NextNoSpaces();
+ RETURN_IF_FAIL(p == NULL, "has 5th");
+
+ static OclCalc ocl_calc;
+
+ // Here we have all the data we need to dispatch our openCL kernel [ I hope ]
+ // so for:
+ // =AVERAGE(L$6:L$7701) - L6
+ // we would get:
+ // eOp => ocAverage
+ // pGroundWaterDataArray => contains L$6:L$7701
+ // pGroundWaterDataArrayLen => 7701 - 6 + 1
+ // pArrayToSubtractOneElementFrom => contains L$5:L$7701 (overlapping)
+ // length of this array -> xGroup->mnLength
+
+ fprintf (stderr, "Calculate !\n");
+
+ // Insert the double data, in rResult[i] back into the document
+ rDoc.SetFormulaResults(rTopPos, pResult, xGroup->mnLength);
+
+ delete [] pResult;
+ SAL_DEBUG ("exit cleanly !");
+ return true;
+}
+
namespace opencl {
sc::FormulaGroupInterpreter *createFormulaGroupInterpreter()
{
- return new sc::FormulaGroupInterpreterOpenCL();
+ if (getenv("SC_GROUNDWATER"))
+ return new sc::FormulaGroupInterpreterGroundwater();
+ else
+ return new sc::FormulaGroupInterpreterOpenCL();
}
} // namespace opencl
commit b13900ec67eca34307d8294a874b5946bffa110d
Author: Michael Meeks <michael.meeks at suse.com>
Date: Mon Jul 8 11:57:39 2013 +0100
avoid regular re-creation of the formulagroup interpreter.
diff --git a/sc/source/core/tool/formulagroup.cxx b/sc/source/core/tool/formulagroup.cxx
index 221a768..627c5f5 100644
--- a/sc/source/core/tool/formulagroup.cxx
+++ b/sc/source/core/tool/formulagroup.cxx
@@ -172,6 +172,7 @@ FormulaGroupInterpreter *FormulaGroupInterpreter::getStatic()
if ( msInstance &&
bOpenCLEnabled != ScInterpreter::GetGlobalConfig().mbOpenCLEnabled )
{
+ bOpenCLEnabled = ScInterpreter::GetGlobalConfig().mbOpenCLEnabled;
delete msInstance;
msInstance = NULL;
}
commit 65dcb6eba8009fef722a5cd714088107d6d37016
Author: Michael Meeks <michael.meeks at suse.com>
Date: Mon Jul 8 10:49:05 2013 +0100
Latest cleanup and improvements of opencl backend.
Conflicts:
sc/source/core/opencl/openclwrapper.cxx
Change-Id: I3fdc90570e90a156ccecb511fc04b473752018bd
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
old mode 100644
new mode 100755
index 6a96129..857f045
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -65,35 +65,32 @@ ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix(const ScMatrix& /* rMat
bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress& rTopPos,
const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode)
{
- size_t rowSize = xGroup->mnLength; //, srcSize = 0;
+ size_t rowSize = xGroup->mnLength;
fprintf(stderr,"rowSize at begin is ...%ld.\n",(long)rowSize);
- int *rangeStart =NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100))
- int *rangeEnd = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100))
// The row quantity can be gotten from p2->GetArrayLength()
- int count1 =0,count2 =0,count3=0;
- int oclOp=0;
- double *srcData = NULL; // Point to the input data from CPU
- double *rResult=NULL; // Point to the output data from GPU
- double *leftData=NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData)
- double *rightData=NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData)
- // The rightData can't be zero for "/"
-
- leftData = (double *)malloc(sizeof(double) * rowSize);
- rightData = (double *)malloc(sizeof(double) * rowSize);
- rResult = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C)
- srcData = (double *)calloc(rowSize,sizeof(double));
-
- rangeStart =(int *)malloc(sizeof(int) * rowSize);
- rangeEnd =(int *)malloc(sizeof(int) * rowSize);
-
- memset(rResult,0,rowSize);
- if(NULL==leftData||NULL==rightData||
- NULL==rResult||NULL==rangeStart||NULL==rangeEnd)
+ int nCount1 = 0, nCount2 = 0, nCount3 = 0;
+ int nOclOp = 0;
+ double *rResult = NULL; // Point to the output data from GPU
+ rResult = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C)
+ if(NULL==rResult)
{
printf("malloc err\n");
return false;
}
- // printf("rowSize is %d.\n",rowsize);
+ memset(rResult,0,rowSize);
+ float * fpOclSrcData = NULL; // Point to the input data from CPU
+ uint * npOclStartPos = NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100))
+ uint * npOclEndPos = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100))
+ float * fpLeftData = NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData)
+ float * fpRightData = NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData)
+ // The rightData can't be zero for "/"
+ static OclCalc ocl_calc;
+ // Don't know how large the size will be applied previously, so create them as the rowSize or 65536
+ // Don't know which formulae will be used previously, so create buffers for different formulae used probably
+ ocl_calc.CreateBuffer(fpOclSrcData,npOclStartPos,npOclEndPos,rowSize);
+ ocl_calc.CreateBuffer(fpLeftData,fpRightData,rowSize);
+ //printf("pptrr is %d,%d,%d\n",fpOclSrcData,npOclStartPos,npOclEndPos);
+///////////////////////////////////////////////////////////////////////////////////////////
// Until we implement group calculation for real, decompose the group into
// individual formula token arrays for individual calculation.
@@ -125,26 +122,23 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
size_t nRowSize = nRowEnd - nRowStart + 1;
ScMatrixRef pMat(new ScMatrix(nColSize, nRowSize, 0.0));
- //srcSize = rowSize+nRowSize-rowSize%nRowSize;//align as nRowSize
- //srcData = (double *)calloc(srcSize,sizeof(double));
- rangeStart[i] = nRowStart;//record the start position
- rangeEnd[i] = nRowEnd;//record the end position
+ npOclStartPos[i] = nRowStart; // record the start position
+ npOclEndPos[i] = nRowEnd; // record the end position
for (size_t nCol = 0; nCol < nColSize; ++nCol)
{
const double* pArray = rArrays[nCol];
-
- //printf("pArray is %p.\n",pArray);
if( NULL==pArray )
{
fprintf(stderr,"Error: pArray is NULL!\n");
return false;
}
- //fprintf(stderr,"(rowSize+nRowSize-1) is %d.\n",rowSize+nRowSize-1);
+
for( size_t u=0; u<rowSize; u++ )
{
- srcData[u] = pArray[u];// note:rowSize<=srcSize
- //fprintf(stderr,"srcData[%d] is %f.\n",u,srcData[u]);
+ // Many video cards can't support double type in kernel, so need transfer the double to float
+ fpOclSrcData[u] = (float)pArray[u];
+ //fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpOclSrcData[u]);
}
for (size_t nRow = 0; nRow < nRowSize; ++nRow)
@@ -177,26 +171,26 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
OpCode eOp = pCur->GetOpCode();
if(eOp==0)
{
- if(count3%2==0)
- leftData[count1++] = pCur->GetDouble();
- else
- rightData[count2++] = pCur->GetDouble();
- count3++;
- }
- else if( eOp!=ocOpen && eOp!=ocClose )
- oclOp = eOp;
-
-// if(count1>0){//dbg
-// fprintf(stderr,"leftData is %f.\n",leftData[count1-1]);
-// count1--;
-// }
-// if(count2>0){//dbg
-// fprintf(stderr,"rightData is %f.\n",rightData[count2-1]);
-// count2--;
-// }
+ if(nCount3%2==0)
+ fpLeftData[nCount1++] = (float)pCur->GetDouble();
+ else
+ fpRightData[nCount2++] = (float)pCur->GetDouble();
+ nCount3++;
+ }
+ else if( eOp!=ocOpen && eOp!=ocClose )
+ nOclOp = eOp;
+
+// if(count1>0){//dbg
+// fprintf(stderr,"leftData is %f.\n",leftData[count1-1]);
+// count1--;
+// }
+// if(count2>0){//dbg
+// fprintf(stderr,"rightData is %f.\n",rightData[count2-1]);
+// count2--;
+// }
}
- if(!getenv("SC_GPU"))
+ if(!getenv("SC_GPU")||!ocl_calc.GetOpenclState())
{
fprintf(stderr,"ccCPU flow...\n\n");
ScCompiler aComp(&rDoc, aTmpPos, aCode2);
@@ -211,34 +205,42 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
} // for loop end (xGroup->mnLength)
// For GPU calculation
- if(getenv("SC_GPU"))
+ if(getenv("SC_GPU")&&ocl_calc.GetOpenclState())
{
fprintf(stderr,"ggGPU flow...\n\n");
- printf(" oclOp is... %d\n",oclOp);
+ printf(" oclOp is... %d\n",nOclOp);
osl_getSystemTime(&aTimeBefore); //timer
- static OclCalc ocl_calc;
- switch(oclOp)
+ switch(nOclOp)
{
case ocAdd:
- ocl_calc.OclHostSignedAdd(leftData,rightData,rResult,count1);
+ ocl_calc.OclHostSignedAdd32Bits(fpLeftData,fpRightData,rResult,nCount1);
break;
case ocSub:
- ocl_calc.OclHostSignedSub(leftData,rightData,rResult,count1);
+ ocl_calc.OclHostSignedSub32Bits(fpLeftData,fpRightData,rResult,nCount1);
break;
case ocMul:
- ocl_calc.OclHostSignedMul(leftData,rightData,rResult,count1);
+ ocl_calc.OclHostSignedMul32Bits(fpLeftData,fpRightData,rResult,nCount1);
break;
case ocDiv:
- ocl_calc.OclHostSignedDiv(leftData,rightData,rResult,count1);
+ ocl_calc.OclHostSignedDiv32Bits(fpLeftData,fpRightData,rResult,nCount1);
break;
case ocMax:
- ocl_calc.OclHostFormulaMax(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ ocl_calc.OclHostFormulaMax32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocMin:
- ocl_calc.OclHostFormulaMin(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ ocl_calc.OclHostFormulaMin32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocAverage:
- ocl_calc.OclHostFormulaAverage(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ ocl_calc.OclHostFormulaAverage32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
+ break;
+ case ocSum:
+ //ocl_calc.OclHostFormulaSum(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ break;
+ case ocCount:
+ //ocl_calc.OclHostFormulaCount(rangeStart,rangeEnd,rResult,rowSize);
+ break;
+ case ocSumProduct:
+ //ocl_calc.OclHostFormulaSumProduct(srcData,rangeStart,rangeEnd,rResult,rowSize);
break;
default:
fprintf(stderr,"No OpenCL function for this calculation.\n");
@@ -254,26 +256,16 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress&
/////////////////////////////////////////////////////
//rResult[i];
-// for(sal_Int32 i = 0; i < rowSize; ++i){//dbg output results
-// fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]);
-// }
+// for(sal_Int32 i = 0; i < rowSize; ++i){//dbg output results
+// fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]);
+// }
// Insert the double data, in rResult[i] back into the document
rDoc.SetFormulaResults(rTopPos, rResult, xGroup->mnLength);
}
- if(leftData)
- free(leftData);
- if(rightData)
- free(rightData);
- if(rangeStart)
- free(rangeStart);
- if(rangeEnd)
- free(rangeEnd);
if(rResult)
free(rResult);
- if(srcData)
- free(srcData);
if(getenv("SC_GPUSAMPLE")){
//fprintf(stderr,"FormulaGroupInterpreter::interpret(),iniflag...%d\n",ocl_calc.GetOpenclState());
diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx
old mode 100644
new mode 100755
index 3269f3a..6c90126
--- a/sc/source/core/opencl/oclkernels.hxx
+++ b/sc/source/core/opencl/oclkernels.hxx
@@ -6,153 +6,158 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
+
#ifndef _OCL_KERNEL_H_
#define _OCL_KERNEL_H_
#ifndef USE_EXTERNAL_KERNEL
#define KERNEL( ... )# __VA_ARGS__
-
/////////////////////////////////////////////
const char *kernel_src = KERNEL(
__kernel void hello(__global uint *buffer)
{
-size_t idx = get_global_id(0);
-
-buffer[idx]=idx;
-
+ size_t idx = get_global_id(0);
+ buffer[idx]=idx;
}
__kernel void oclformula(__global float *data,
- const uint type)
+ const uint type)
{
- const unsigned int i = get_global_id(0);
-
- switch (type)
- {
- case 0: //MAX
- {
- //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
- if(data[2*i]>data[2*i+1])
- data[i] = data[2*i];
- else
- data[i] = data[2*i+1];
- break;
- }
- case 1: //MIN
- {
- //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
- if(data[2*i]<data[2*i+1])
- data[i] = data[2*i];
- else
- data[i] = data[2*i+1];
- break;
- }
- case 2: //SUM
- case 3: //AVG
- {
- //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
- data[i] = data[2*i] + data[2*i+1];
- break;
- }
- default:
- break;
-
- }
+ const unsigned int i = get_global_id(0);
+
+ switch (type)
+ {
+ case 0: //MAX
+ {
+ //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
+ if(data[2*i]>data[2*i+1])
+ data[i] = data[2*i];
+ else
+ data[i] = data[2*i+1];
+ break;
+ }
+ case 1: //MIN
+ {
+ //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
+ if(data[2*i]<data[2*i+1])
+ data[i] = data[2*i];
+ else
+ data[i] = data[2*i+1];
+ break;
+ }
+ case 2: //SUM
+ case 3: //AVG
+ {
+ //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
+ data[i] = data[2*i] + data[2*i+1];
+ break;
+ }
+ default:
+ break;
+
+ }
}
__kernel void oclSignedAdd(__global float *ltData,__global float *rtData,__global float *otData)
{
- const unsigned int id = get_global_id(0);
- otData[id] = ltData[id] + rtData[id];
+ const unsigned int id = get_global_id(0);
+ otData[id] = ltData[id] + rtData[id];
}
__kernel void oclSignedSub(__global float *ltData,__global float *rtData,__global float *otData)
{
- const unsigned int id = get_global_id(0);
- otData[id] = ltData[id] - rtData[id];
+ const unsigned int id = get_global_id(0);
+ otData[id] = ltData[id] - rtData[id];
}
__kernel void oclSignedMul(__global float *ltData,__global float *rtData,__global float *otData)
{
- int id = get_global_id(0);
- otData[id] =ltData[id] * rtData[id];
+ int id = get_global_id(0);
+ otData[id] =ltData[id] * rtData[id];
}
__kernel void oclSignedDiv(__global float *ltData,__global float *rtData,__global float *otData)
{
- const unsigned int id = get_global_id(0);
- otData[id] = ltData[id] / rtData[id];
+ const unsigned int id = get_global_id(0);
+ otData[id] = ltData[id] / rtData[id];
}
__kernel void oclFormulaMin(__global float *input,__global int *start,__global int *end,__global float *output)
{
- const unsigned int id = get_global_id(0);
- int i=0;
- unsigned int startFlag = start[id];
- unsigned int endFlag = end[id];
- float min = input[startFlag];
- for(i=startFlag;i<=endFlag;i++)
- {
- if(input[i]<min)
- min = input[i];
- }
- output[id] = min;
+ const unsigned int id = get_global_id(0);
+ int i=0;
+ unsigned int startFlag = start[id];
+ unsigned int endFlag = end[id];
+ float min = input[startFlag];
+ for(i=startFlag;i<=endFlag;i++)
+ {
+ if(input[i]<min)
+ min = input[i];
+ }
+ output[id] = min;
}
__kernel void oclFormulaMax(__global float *input,__global int *start,__global int *end,__global float *output)
{
- const unsigned int id = get_global_id(0);
- int i=0;
- unsigned int startFlag = start[id];
- unsigned int endFlag = end[id];
- float max = input[startFlag];
- for(i=startFlag;i<=endFlag;i++)
- {
- if(input[i]>max)
- max = input[i];
- }
- output[id] = max;
+ const unsigned int id = get_global_id(0);
+ int i=0;
+ unsigned int startFlag = start[id];
+ unsigned int endFlag = end[id];
+ float max = input[startFlag];
+ for(i=startFlag;i<=endFlag;i++)
+ {
+ if(input[i]>max)
+ max = input[i];
+ }
+ output[id] = max;
}
-
-__kernel void oclFormulaSum(__global float *data,
- const uint type)
+//Sum
+__kernel void oclFormulaSum(__global float *input,__global int *start,__global int *end,__global float *output)
{
-
+ const unsigned int nId = get_global_id(0);
+ float fSum = 0.0f;
+ for(int i = start[nId]; i<=end[nId]; i++)
+ fSum += input[i];
+ output[nId] = fSum ;
}
-
-__kernel void oclFormulaCount(__global float *data,
- const uint type)
+//Count
+__kernel void oclFormulaCount(__global int *start,__global int *end,__global float *output)
{
-
+ const unsigned int nId = get_global_id(0);
+ output[nId] = end[nId] - start[nId] + 1;
}
__kernel void oclFormulaAverage(__global float *input,__global int *start,__global int *end,__global float *output)
{
- const unsigned int id = get_global_id(0);
- int i=0;
- float sum=0;
- for(i = start[id];i<=end[id];i++)
- sum += input[i];
- output[id] = sum / (end[id]-start[id]+1);
+ const unsigned int id = get_global_id(0);
+ int i=0;
+ float sum=0;
+ for(i = start[id];i<=end[id];i++)
+ sum += input[i];
+ output[id] = sum / (end[id]-start[id]+1);
}
-
-__kernel void oclFormulaSumproduct(__global float *data,
- const uint type)
+//Sumproduct
+__kernel void oclFormulaSumproduct(__global float *firstCol,__global float *secondCol,__global int *start,__global int *end,__global float *output)
{
-
+ const int nId = get_global_id(0);
+ int nCount = start[nId] - end[nId] + 1;
+ int nStartA = start[nId*2];
+ int nStartB = start[nId*2+1];
+ for(int i = 0; i<nCount; i++)
+ output[nId] += firstCol[nStartA+i]*secondCol[nStartB+i];
}
__kernel void oclFormulaMinverse(__global float *data,
- const uint type)
+ const uint type)
{
}
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
old mode 100644
new mode 100755
index aa3ce69..597f370
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -22,6 +22,7 @@
GPUEnv OpenclDevice::gpuEnv;
int OpenclDevice::isInited =0;
+
#ifdef SAL_WIN32
#define OPENCL_DLL_NAME "opencllo.dll"
@@ -32,62 +33,62 @@ HINSTANCE HOpenclDll = NULL;
int OpenclDevice::LoadOpencl()
{
- //fprintf(stderr, " LoadOpenclDllxx... \n");
- OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
- OpenclDll = LoadLibrary(OPENCL_DLL_NAME);
- if (!static_cast<HINSTANCE>(OpenclDll))
- {
- fprintf(stderr, " Load opencllo.dll failed! \n");
- FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
- return OCLERR;
- }
- fprintf(stderr, " Load opencllo.dll successfully!\n");
- return OCLSUCCESS;
+ //fprintf(stderr, " LoadOpenclDllxx... \n");
+ OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
+ OpenclDll = LoadLibrary(OPENCL_DLL_NAME);
+ if (!static_cast<HINSTANCE>(OpenclDll))
+ {
+ fprintf(stderr, " Load opencllo.dll failed! \n");
+ FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+ return OCLERR;
+ }
+ fprintf(stderr, " Load opencllo.dll successfully!\n");
+ return OCLSUCCESS;
}
void OpenclDevice::FreeOpenclDll()
{
- fprintf(stderr, " Free opencllo.dll ... \n");
- if(!static_cast<HINSTANCE>(OpenclDll))
- FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+ fprintf(stderr, " Free opencllo.dll ... \n");
+ if(!static_cast<HINSTANCE>(OpenclDll))
+ FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
}
#endif
int OpenclDevice::InitEnv()
{
#ifdef SAL_WIN32
- while(1)
+ while(1)
{
- if(1==LoadOpencl())
- break;
- }
+ if(1==LoadOpencl())
+ break;
+ }
#endif
- InitOpenclRunEnv(0,NULL);
- return 1;
+ InitOpenclRunEnv(0,NULL);
+ return 1;
}
int OpenclDevice::ReleaseOpenclRunEnv() {
- ReleaseOpenclEnv(&gpuEnv);
+ ReleaseOpenclEnv(&gpuEnv);
#ifdef SAL_WIN32
- FreeOpenclDll();
+ FreeOpenclDll();
#endif
return 1;
}
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName) {
- strcpy(gpuEnv.kernelNames[kCount], kName);
- gpuEnv.kernelCount++;
+ strcpy(gpuEnv.mArrykernelNames[kCount], kName);
+ gpuEnv.mnKernelCount++;
return 0;
}
int OpenclDevice::RegistOpenclKernel() {
- if (!gpuEnv.isUserCreated) {
+ if (!gpuEnv.mnIsUserCreated) {
memset(&gpuEnv, 0, sizeof(gpuEnv));
}
- gpuEnv.fileCount = 0; //argc;
- gpuEnv.kernelCount = 0UL;
+ gpuEnv.mnFileCount = 0; //argc;
+ gpuEnv.mnKernelCount = 0UL;
AddKernelConfig(0, (const char*) "hello");
AddKernelConfig(1, (const char*) "oclformula");
@@ -99,34 +100,39 @@ int OpenclDevice::RegistOpenclKernel() {
AddKernelConfig(7, (const char*) "oclFormulaSumproduct");
AddKernelConfig(8, (const char*) "oclFormulaMinverse");
- AddKernelConfig(9, (const char*) "oclSignedAdd");
+ AddKernelConfig(9, (const char*) "oclSignedAdd");
AddKernelConfig(10, (const char*) "oclSignedSub");
AddKernelConfig(11, (const char*) "oclSignedMul");
AddKernelConfig(12, (const char*) "oclSignedDiv");
- return 0;
+ return 0;
}
OpenclDevice::OpenclDevice(){
- //InitEnv();
+ //InitEnv();
}
OpenclDevice::~OpenclDevice() {
- //ReleaseOpenclRunEnv();
+ //ReleaseOpenclRunEnv();
}
+int OpenclDevice::SetKernelEnv(KernelEnv *envInfo)
+{
+ envInfo->mpkContext = gpuEnv.mpContext;
+ envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
+ envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
+
+ return 1;
+}
int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName){
//printf("CheckKernelName,total count of kernels...%d\n", gpuEnv.kernelCount);
int kCount;
- for(kCount=0; kCount < gpuEnv.kernelCount; kCount++) {
- if(strcasecmp(kernelName, gpuEnv.kernelNames[kCount]) == 0) {
- printf("match %s kernel right\n",kernelName);
- break;
+ for(kCount=0; kCount < gpuEnv.mnKernelCount; kCount++) {
+ if(strcasecmp(kernelName, gpuEnv.mArrykernelNames[kCount]) == 0) {
+ printf("match %s kernel right\n",kernelName);
+ break;
}
}
- envInfo->context = gpuEnv.context;
- envInfo->commandQueue = gpuEnv.commandQueue;
- envInfo->program = gpuEnv.programs[0];
- envInfo->kernel = gpuEnv.kernels[kCount];
- strcpy(envInfo->kernelName, kernelName);
+ envInfo->mpkKernel = gpuEnv.mpArryKernels[kCount];
+ strcpy(envInfo->mckKernelName, kernelName);
if (envInfo == (KernelEnv *) NULL)
{
printf("get err func and env\n");
@@ -145,7 +151,7 @@ int OpenclDevice::ConvertToString(const char *filename, char **source) {
printf("open kernel file %s.\n",filename);
if (file != NULL) {
- printf("Open ok!\n");
+ printf("Open ok!\n");
fseek(file, 0, SEEK_END);
file_size = ftell(file);
@@ -169,35 +175,35 @@ int OpenclDevice::ConvertToString(const char *filename, char **source) {
}
int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) {
- unsigned int i = 0;
- cl_int status;
- char *str = NULL;
- FILE *fd = NULL;
- cl_uint numDevices=0;
- status = clGetDeviceIDs(gpuEnv.platform, // platform
- CL_DEVICE_TYPE_ALL, // device_type
- 0, // num_entries
- NULL, // devices
- &numDevices);
- for (i = 0; i <numDevices; i++) {
- char fileName[256] = { 0 }, cl_name[128] = { 0 };
- if (gpuEnv.devices[i] != 0) {
- char deviceName[1024];
- status = clGetDeviceInfo(gpuEnv.devices[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL);
- CHECK_OPENCL(status);
- str = (char*) strstr(clFileName, (char*) ".cl");
- memcpy(cl_name, clFileName, str - clFileName);
- cl_name[str - clFileName] = '\0';
- sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
- fd = fopen(fileName, "rb");
- status = (fd != NULL) ? 1 : 0;
- }
- }
- if (fd != NULL) {
- *fhandle = fd;
- }
-
- return status;
+ unsigned int i = 0;
+ cl_int status;
+ char *str = NULL;
+ FILE *fd = NULL;
+ cl_uint numDevices=0;
+ status = clGetDeviceIDs(gpuEnv.mpPlatformID, // platform
+ CL_DEVICE_TYPE_GPU, // device_type
+ 0, // num_entries
+ NULL, // devices ID
+ &numDevices);
+ for (i = 0; i <numDevices; i++) {
+ char fileName[256] = { 0 }, cl_name[128] = { 0 };
+ if (gpuEnv.mpArryDevsID[i] != 0) {
+ char deviceName[1024];
+ status = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL);
+ CHECK_OPENCL(status);
+ str = (char*) strstr(clFileName, (char*) ".cl");
+ memcpy(cl_name, clFileName, str - clFileName);
+ cl_name[str - clFileName] = '\0';
+ sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
+ fd = fopen(fileName, "rb");
+ status = (fd != NULL) ? 1 : 0;
+ }
+ }
+ if (fd != NULL) {
+ *fhandle = fd;
+ }
+
+ return status;
}
@@ -220,22 +226,21 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
const char * clFileName) {
unsigned int i = 0;
cl_int status;
- size_t *binarySizes;
- cl_uint numDevices;
- cl_device_id *devices;
+ size_t *binarySizes, numDevices;
+ cl_device_id *mpArryDevsID;
char **binaries, *str = NULL;
status = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
sizeof(numDevices), &numDevices, NULL);
CHECK_OPENCL(status)
- devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
- if (devices == NULL) {
+ mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
+ if (mpArryDevsID == NULL) {
return 0;
}
/* grab the handles to all of the devices in the program. */
status = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
- sizeof(cl_device_id) * numDevices, devices, NULL);
+ sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL);
CHECK_OPENCL(status)
/* figure out the sizes of each of the binaries. */
@@ -272,7 +277,7 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
if (binarySizes[i] != 0) {
char deviceName[1024];
- status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
+ status = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
sizeof(deviceName), deviceName, NULL);
CHECK_OPENCL(status)
@@ -307,24 +312,24 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
binarySizes = NULL;
}
- if (devices != NULL) {
- free(devices);
- devices = NULL;
+ if (mpArryDevsID != NULL) {
+ free(mpArryDevsID);
+ mpArryDevsID = NULL;
}
return 1;
}
int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) {
- if (gpuEnv.isUserCreated) {
+ if (gpuEnv.mnIsUserCreated) {
return 1;
}
- gpuEnv.context = env->context;
- gpuEnv.platform = env->platform;
- gpuEnv.dev = env->devices;
- gpuEnv.commandQueue = env->commandQueue;
+ gpuEnv.mpContext = env->mpOclContext;
+ gpuEnv.mpPlatformID = env->mpOclPlatformID;
+ gpuEnv.mpDevID = env->mpOclDevsID;
+ gpuEnv.mpCmdQueue = env->mpOclCmdQueue;
- gpuEnv.isUserCreated = 1;
+ gpuEnv.mnIsUserCreated = 1;
return 0;
}
@@ -332,14 +337,14 @@ int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) {
int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env) {
int status;
- env->kernel = clCreateKernel(gpuEnv.programs[0], kernelname, &status);
- env->context = gpuEnv.context;
- env->commandQueue = gpuEnv.commandQueue;
+ env->mpkKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelname, &status);
+ env->mpkContext = gpuEnv.mpContext;
+ env->mpkCmdQueue = gpuEnv.mpCmdQueue;
return status != CL_SUCCESS ? 1 : 0;
}
int OpenclDevice::ReleaseKernel(KernelEnv * env) {
- int status = clReleaseKernel(env->kernel);
+ int status = clReleaseKernel(env->mpkKernel);
return status != CL_SUCCESS ? 1 : 0;
}
@@ -351,24 +356,24 @@ int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) {
return 1;
}
- for (i = 0; i < gpuEnv.fileCount; i++) {
- if (gpuEnv.programs[i]) {
- status = clReleaseProgram(gpuEnv.programs[i]);
+ for (i = 0; i < gpuEnv.mnFileCount; i++) {
+ if (gpuEnv.mpArryPrograms[i]) {
+ status = clReleaseProgram(gpuEnv.mpArryPrograms[i]);
CHECK_OPENCL(status)
- gpuEnv.programs[i] = NULL;
+ gpuEnv.mpArryPrograms[i] = NULL;
}
}
- if (gpuEnv.commandQueue) {
- clReleaseCommandQueue(gpuEnv.commandQueue);
- gpuEnv.commandQueue = NULL;
+ if (gpuEnv.mpCmdQueue) {
+ clReleaseCommandQueue(gpuEnv.mpCmdQueue);
+ gpuEnv.mpCmdQueue = NULL;
}
- if (gpuEnv.context) {
- clReleaseContext(gpuEnv.context);
- gpuEnv.context = NULL;
+ if (gpuEnv.mpContext) {
+ clReleaseContext(gpuEnv.mpContext);
+ gpuEnv.mpContext = NULL;
}
isInited = 0;
- gpuInfo->isUserCreated = 0;
- free(gpuInfo->devices);
+ gpuInfo->mnIsUserCreated = 0;
+ free(gpuInfo->mpArryDevsID);
return 1;
}
@@ -386,9 +391,9 @@ int OpenclDevice::RunKernelWrapper(cl_kernel_function function,
int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached,
const char * clFileName) {
int i;
- for (i = 0; i < gpuEnvCached->fileCount; i++) {
- if (strcasecmp(gpuEnvCached->kernelSrcFile[i], clFileName) == 0) {
- if (gpuEnvCached->programs[i] != NULL) {
+ for (i = 0; i < gpuEnvCached->mnFileCount; i++) {
+ if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) {
+ if (gpuEnvCached->mpArryPrograms[i] != NULL) {
return 1;
}
}
@@ -404,31 +409,28 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
const char *source;
size_t source_size[1];
int b_error, binary_status, binaryExisted, idx;
- cl_uint numDevices;
- cl_device_id *devices;
+ size_t numDevices;
+ cl_device_id *mpArryDevsID;
FILE *fd, *fd1;
const char* filename = "kernel.cl";
- fprintf(stderr, "CompileKernelFile ... \n");
+ fprintf(stderr, "CompileKernelFile ... \n");
if (CachedOfKernerPrg(gpuInfo, filename) == 1) {
return 1;
}
- idx = gpuInfo->fileCount;
+ idx = gpuInfo->mnFileCount;
source = kernel_src;
source_size[0] = strlen(source);
binaryExisted = 0;
if ((binaryExisted = BinaryGenerated(filename, &fd)) == 1) {
-#ifdef CL_CONTEXT_NUM_DEVICES
- status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_NUM_DEVICES,
+ status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
sizeof(numDevices), &numDevices, NULL);
CHECK_OPENCL(status)
-#else
- numDevices = 1; // ???
-#endif
- devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
- if (devices == NULL) {
+
+ mpArryDevsID = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
+ if (mpArryDevsID == NULL) {
return 0;
}
@@ -452,50 +454,50 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
fclose(fd);
fd = NULL;
// grab the handles to all of the devices in the context.
- status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES,
- sizeof(cl_device_id) * numDevices, devices, NULL);
+ status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL);
CHECK_OPENCL(status)
- gpuInfo->programs[idx] = clCreateProgramWithBinary(gpuInfo->context,
- numDevices, devices, &length, (const unsigned char**) &binary,
+ gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary(gpuInfo->mpContext,
+ numDevices, mpArryDevsID, &length, (const unsigned char**) &binary,
&binary_status, &status);
CHECK_OPENCL(status)
free(binary);
- free(devices);
- devices = NULL;
+ free(mpArryDevsID);
+ mpArryDevsID = NULL;
} else {
// create a CL program using the kernel source
- gpuEnv.programs[idx] = clCreateProgramWithSource(gpuEnv.context,
+ gpuEnv.mpArryPrograms[idx] = clCreateProgramWithSource(gpuEnv.mpContext,
1, &source, source_size, &status);
CHECK_OPENCL(status);
}
- if (gpuInfo->programs[idx] == (cl_program) NULL) {
+ if (gpuInfo->mpArryPrograms[idx] == (cl_program) NULL) {
return 0;
}
//char options[512];
// create a cl program executable for all the devices specified
- if (!gpuInfo->isUserCreated) {
- status = clBuildProgram(gpuInfo->programs[idx], 1, gpuInfo->devices,
+ if (!gpuInfo->mnIsUserCreated) {
+ status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
buildOption, NULL, NULL);
CHECK_OPENCL(status)
} else {
- status = clBuildProgram(gpuInfo->programs[idx], 1, &(gpuInfo->dev),
+ status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
buildOption, NULL, NULL);
CHECK_OPENCL(status)
}
printf("BuildProgram.\n");
if (status != CL_SUCCESS) {
- if (!gpuInfo->isUserCreated) {
- status = clGetProgramBuildInfo(gpuInfo->programs[idx],
- gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
+ if (!gpuInfo->mnIsUserCreated) {
+ status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
+ gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
&length);
} else {
- status = clGetProgramBuildInfo(gpuInfo->programs[idx],
- gpuInfo->dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
+ status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
+ gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
}
if (status != CL_SUCCESS) {
printf("opencl create build log fail\n");
@@ -505,13 +507,13 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
if (buildLog == (char*) NULL) {
return 0;
}
- if (!gpuInfo->isUserCreated) {
- status = clGetProgramBuildInfo(gpuInfo->programs[idx],
- gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, length,
+ if (!gpuInfo->mnIsUserCreated) {
+ status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
+ gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, length,
buildLog, &length);
} else {
- status = clGetProgramBuildInfo(gpuInfo->programs[idx],
- gpuInfo->dev, CL_PROGRAM_BUILD_LOG, length, buildLog,
+ status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
+ gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, length, buildLog,
&length);
}
@@ -525,12 +527,12 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
return 0;
}
- strcpy(gpuEnv.kernelSrcFile[idx], filename);
+ strcpy(gpuEnv.mArryKnelSrcFile[idx], filename);
if (binaryExisted == 0)
- GeneratBinFromKernelSource(gpuEnv.programs[idx], filename);
+ GeneratBinFromKernelSource(gpuEnv.mpArryPrograms[idx], filename);
- gpuInfo->fileCount += 1;
+ gpuInfo->mnFileCount += 1;
return 1;
@@ -539,14 +541,14 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName,
KernelEnv *env, cl_kernel_function *function) {
int i; //,program_idx ;
- printf("----------------OpenclDevice::GetKernelEnvAndFunc\n");
- for (i = 0; i < gpuEnv.kernelCount; i++) {
- if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0) {
- env->context = gpuEnv.context;
- env->commandQueue = gpuEnv.commandQueue;
- env->program = gpuEnv.programs[0];
- env->kernel = gpuEnv.kernels[i];
- *function = gpuEnv.kernelFunctions[i];
+ //printf("----------------OpenclDevice::GetKernelEnvAndFunc\n");
+ for (i = 0; i < gpuEnv.mnKernelCount; i++) {
+ if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0) {
+ env->mpkContext = gpuEnv.mpContext;
+ env->mpkCmdQueue = gpuEnv.mpCmdQueue;
+ env->mpkProgram = gpuEnv.mpArryPrograms[0];
+ env->mpkKernel = gpuEnv.mpArryKernels[i];
+ *function = gpuEnv.mpArryKnelFuncs[i];
return 1;
}
}
@@ -554,21 +556,21 @@ int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName,
}
int OpenclDevice::RunKernel(const char *kernelName, void **userdata) {
- KernelEnv env;
+ KernelEnv kEnv;
cl_kernel_function function;
int status;
- memset(&env, 0, sizeof(KernelEnv));
- status = GetKernelEnvAndFunc(kernelName, &env, &function);
- strcpy(env.kernelName, kernelName);
+ memset(&kEnv, 0, sizeof(KernelEnv));
+ status = GetKernelEnvAndFunc(kernelName, &kEnv, &function);
+ strcpy(kEnv.mckKernelName, kernelName);
if (status == 1) {
- if (&env == (KernelEnv *) NULL
+ if (&kEnv == (KernelEnv *) NULL
|| &function == (cl_kernel_function *) NULL) {
return 0;
}
- return (function(userdata, &env));
+ return (function(userdata, &kEnv));
}
return 0;
}
@@ -593,7 +595,7 @@ int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles)
printf("init_opencl_env successed.\n");
//initialize program, kernelName, kernelCount
status = CompileKernelFile( &gpuEnv, buildOptionKernelfiles);
- if (status == 0 || gpuEnv.kernelCount == 0) {
+ if (status == 0 || gpuEnv.mnKernelCount == 0) {
printf("CompileKernelFile failed.\n");
return 1;
}
@@ -615,12 +617,12 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
// Have a look at the available platforms.
- if (!gpuInfo->isUserCreated) {
+ if (!gpuInfo->mnIsUserCreated) {
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS) {
return 1;
}
- gpuInfo->platform = NULL;
+ gpuInfo->mpPlatformID = NULL;
if (0 < numPlatforms) {
platforms = (cl_platform_id*) malloc(
@@ -641,18 +643,18 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
if (status != CL_SUCCESS) {
return 1;
}
- gpuInfo->platform = platforms[i];
+ gpuInfo->mpPlatformID = platforms[i];
//if (!strcmp(platformName, "Intel(R) Coporation"))
//if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
{
- gpuInfo->platform = platforms[i];
+ gpuInfo->mpPlatformID = platforms[i];
- status = clGetDeviceIDs(gpuInfo->platform, // platform
- CL_DEVICE_TYPE_ALL, // device_type
- 0, // num_entries
- NULL, // devices
- &numDevices);
+ status = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
+ CL_DEVICE_TYPE_GPU, // device_type
+ 0, // num_entries
+ NULL, // devices
+ &numDevices);
if (status != CL_SUCCESS) {
return 1;
@@ -665,82 +667,82 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
}
free(platforms);
}
- if (NULL == gpuInfo->platform) {
+ if (NULL == gpuInfo->mpPlatformID) {
return 1;
}
// Use available platform.
cps[0] = CL_CONTEXT_PLATFORM;
- cps[1] = (cl_context_properties) gpuInfo->platform;
+ cps[1] = (cl_context_properties) gpuInfo->mpPlatformID;
cps[2] = 0;
// Check for GPU.
- gpuInfo->dType = CL_DEVICE_TYPE_GPU;
- gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType, NULL,
+ gpuInfo->mDevType = CL_DEVICE_TYPE_GPU;
+ gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType, NULL,
NULL, &status);
- // If no GPU, check for CPU.
- if ((gpuInfo->context == (cl_context) NULL)
+ if ((gpuInfo->mpContext == (cl_context) NULL)
|| (status != CL_SUCCESS)) {
- gpuInfo->dType = CL_DEVICE_TYPE_CPU;
- gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType,
+ gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
+ gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType,
NULL, NULL, &status);
}
-
- // If no GPU or CPU, check for a "default" type.
- if ((gpuInfo->context == (cl_context) NULL)
+ if ((gpuInfo->mpContext == (cl_context) NULL)
|| (status != CL_SUCCESS)) {
- gpuInfo->dType = CL_DEVICE_TYPE_DEFAULT;
- gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType,
+ gpuInfo->mDevType = CL_DEVICE_TYPE_DEFAULT;
+ gpuInfo->mpContext = clCreateContextFromType(cps, gpuInfo->mDevType,
NULL, NULL, &status);
}
- if ((gpuInfo->context == (cl_context) NULL)
+ if ((gpuInfo->mpContext == (cl_context) NULL)
|| (status != CL_SUCCESS)) {
return 1;
}
// Detect OpenCL devices.
// First, get the size of device list data
- status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, 0,
+ status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0,
NULL, &length);
if ((status != CL_SUCCESS) || (length == 0)) {
return 1;
}
// Now allocate memory for device list based on the size we got earlier
- gpuInfo->devices = (cl_device_id*) malloc(length);
- if (gpuInfo->devices == (cl_device_id*) NULL) {
+ gpuInfo->mpArryDevsID = (cl_device_id*) malloc(length);
+ if (gpuInfo->mpArryDevsID == (cl_device_id*) NULL) {
return 1;
}
// Now, get the device list data
- status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, length,
- gpuInfo->devices, NULL);
+ status = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, length,
+ gpuInfo->mpArryDevsID, NULL);
if (status != CL_SUCCESS) {
return 1;
}
// Create OpenCL command queue.
- gpuInfo->commandQueue = clCreateCommandQueue(gpuInfo->context,
- gpuInfo->devices[0], 0, &status);
+ gpuInfo->mpCmdQueue = clCreateCommandQueue(gpuInfo->mpContext,
+ gpuInfo->mpArryDevsID[0], 0, &status);
if (status != CL_SUCCESS) {
return 1;
}
}
+ status = clGetCommandQueueInfo(gpuInfo->mpCmdQueue,
+ CL_QUEUE_THREAD_HANDLE_AMD, 0, NULL, NULL);
+
return 0;
}
int OpenclDevice::RegisterKernelWrapper(const char *kernelName,cl_kernel_function function)
{
- int i;
- printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.kernelCount);
- for (i = 0; i < gpuEnv.kernelCount; i++)
- {
- if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0)
- {
- gpuEnv.kernelFunctions[i] = function;
- return 1;
- }
- }
+ int i;
+ //printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.mnKernelCount);
+ for (i = 0; i < gpuEnv.mnKernelCount; i++)
+ {
+ if (strcasecmp(kernelName, gpuEnv.mArrykernelNames[i]) == 0)
+ {
+ gpuEnv.mpArryKnelFuncs[i] = function;
+ return 1;
+ }
+ }
return 0;
}
@@ -772,20 +774,20 @@ int OclFormulax(void ** usrdata, KernelEnv *env) {
tdata[i] = (float) data[i];
}
- env->kernel = clCreateKernel(env->program, "oclformula", &clStatus);
+ env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus);
//printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
int size = NUM;
- cl_mem formula_data = clCreateBuffer(env->context,
+ cl_mem formula_data = clCreateBuffer(env->mpkContext,
(cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
size * sizeof(float), (void *) tdata, &clStatus);
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
- status = clSetKernelArg(env->kernel, 0, sizeof(cl_mem),
+ status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem),
(void *) &formula_data);
CHECK_OPENCL(status)
- status = clSetKernelArg(env->kernel, 1, sizeof(unsigned int),
+ status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int),
(void *) &type);
CHECK_OPENCL(status)
@@ -795,21 +797,21 @@ int OclFormulax(void ** usrdata, KernelEnv *env) {
while (global_work_size[0] != 1) {
global_work_size[0] = global_work_size[0] / 2;
- status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1,
+ status = clEnqueueNDRangeKernel(env->mpkCmdQueue, env->mpkKernel, 1,
NULL, global_work_size, NULL, 0, NULL, NULL);
CHECK_OPENCL(status)
}
//fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
- status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0,
+ status = clEnqueueReadBuffer(env->mpkCmdQueue, formula_data, CL_FALSE, 0,
sizeof(float), (void *) &tdata, 0, NULL, NULL);
CHECK_OPENCL(status)
- status = clFinish(env->commandQueue);
+ status = clFinish(env->mpkCmdQueue);
CHECK_OPENCL(status)
//PPAStopCpuEvent(ppa_proc);
//fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n");
- status = clReleaseKernel(env->kernel);
+ status = clReleaseKernel(env->mpkKernel);
CHECK_OPENCL(status)
status = clReleaseMemObject(formula_data);
CHECK_OPENCL(status)
@@ -840,20 +842,20 @@ int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
tdata[i] = (float) data[i];
}
- env->kernel = clCreateKernel(env->program, "oclformula", &clStatus);
+ env->mpkKernel = clCreateKernel(env->mpkProgram, "oclformula", &clStatus);
//printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
int size = NUM;
- cl_mem formula_data = clCreateBuffer(env->context,
+ cl_mem formula_data = clCreateBuffer(env->mpkContext,
(cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
size * sizeof(float), (void *) tdata, &clStatus);
//fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
- status = clSetKernelArg(env->kernel, 0, sizeof(cl_mem),
+ status = clSetKernelArg(env->mpkKernel, 0, sizeof(cl_mem),
(void *) &formula_data);
CHECK_OPENCL(status)
- status = clSetKernelArg(env->kernel, 1, sizeof(unsigned int),
+ status = clSetKernelArg(env->mpkKernel, 1, sizeof(unsigned int),
(void *) &type);
CHECK_OPENCL(status)
@@ -863,21 +865,21 @@ int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
while (global_work_size[0] != 1) {
global_work_size[0] = global_work_size[0] / 2;
- status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1,
+ status = clEnqueueNDRangeKernel(env->mpkCmdQueue, env->mpkKernel, 1,
NULL, global_work_size, NULL, 0, NULL, NULL);
CHECK_OPENCL(status)
}
//fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
- status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0,
+ status = clEnqueueReadBuffer(env->mpkCmdQueue, formula_data, CL_FALSE, 0,
sizeof(float), (void *) &tdata, 0, NULL, NULL);
CHECK_OPENCL(status)
- status = clFinish(env->commandQueue);
+ status = clFinish(env->mpkCmdQueue);
CHECK_OPENCL(status)
//PPAStopCpuEvent(ppa_proc);
//fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n");
- status = clReleaseKernel(env->kernel);
+ status = clReleaseKernel(env->mpkKernel);
CHECK_OPENCL(status)
status = clReleaseMemObject(formula_data);
CHECK_OPENCL(status)
@@ -894,13 +896,13 @@ int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
double OclCalc::OclProcess(cl_kernel_function function, double *data, formulax type)
{
- fprintf(stderr, "\n OpenclDevice, proc...begin\n");
- double ret = 0;
- void *usrdata[2];
- usrdata[0] = (void *) data;
- usrdata[1] = (void *) &type;
- RunKernelWrapper(function, "oclformula", usrdata);
- return ret;
+ fprintf(stderr, "\n OpenclDevice, proc...begin\n");
+ double ret = 0;
+ void *usrdata[2];
+ usrdata[0] = (void *) data;
+ usrdata[1] = (void *) &type;
+ RunKernelWrapper(function, "oclformula", usrdata);
+ return ret;
}
double OclCalc::OclTest() {
@@ -927,467 +929,1141 @@ double OclCalc::OclTestDll() {
OclCalc::OclCalc()
{
- OpenclDevice::SetOpenclState(1);
- fprintf(stderr,"OclCalc:: init opencl ok.\n");
+ fprintf(stderr,"OclCalc:: init opencl ...\n");
}
OclCalc::~OclCalc()
{
- OpenclDevice::SetOpenclState(0);
- fprintf(stderr,"OclCalc:: opencl end ok.\n");
+ fprintf(stderr,"OclCalc:: opencl end ...\n");
}
/////////////////////////////////////////////////////////////////////////////
-int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *output,int size) {
- KernelEnv env;
- const char *kernelName = "oclFormulaMax";
- CheckKernelName(&env,kernelName);
- cl_int clStatus;
- size_t global_work_size[1];
- int alignSize = size + end[0]-start[0];
-
- env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
- cl_int ret=0;
- cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
- alignSize * sizeof(float), NULL, &clStatus);
- cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size* sizeof(float), NULL, &ret);
-
- float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
- int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- {
- hostMapStart[i] = start[i];
- hostMapEnd[i] = end[i];
- }
- for(int i=0;i<alignSize;i++)
- hostMapSrc[i] = (float)srcData[i];
- clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
-
- clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&inputCl);
- clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&startCl);
- clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&endCl);
- clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
- (void *)&outputCl);
- CHECK_OPENCL(clStatus);
-
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(clStatus)
-
- float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- output[i]=outPutMap[i];
-
- clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
- clStatus = clFinish(env.commandQueue);
-
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseKernel(env.kernel);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(inputCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(startCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(endCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(outputCl);
- CHECK_OPENCL(clStatus);
- return 0;
+#ifdef GPU_64BITS
+int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *output,int size)
+{
+ KernelEnv kEnv;
+ const char *kernelName = "oclFormulaMax";
+ CheckKernelName(&kEnv,kernelName);
+ cl_int clStatus;
+ size_t global_work_size[1];
+ int alignSize = size + end[0]-start[0];
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ cl_int ret=0;
+ cl_mem inputCl = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE),
+ alignSize * sizeof(float), NULL, &clStatus);
+ cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size* sizeof(float), NULL, &ret);
+
+ float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+ int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ {
+ hostMapStart[i] = start[i];
+ hostMapEnd[i] = end[i];
+ }
+ for(int i=0;i<alignSize;i++)
+ hostMapSrc[i] = (float)srcData[i];
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&inputCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&startCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&endCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
+ (void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus);
+
+ float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ output[i]=outPutMap[i];
+
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL);
+ clStatus = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(inputCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(startCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(endCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
}
-int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size) {
- KernelEnv env;
- const char *kernelName = "oclFormulaMin";
- CheckKernelName(&env,kernelName);
-
- cl_int clStatus;
- size_t global_work_size[1];
- int alignSize = size + end[0]-start[0];
-
- env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
- cl_int ret=0;
- cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
- alignSize * sizeof(float), NULL, &clStatus);
- cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size* sizeof(float), NULL, &ret);
-
- float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
- int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- {
- hostMapStart[i] = start[i];
- hostMapEnd[i] = end[i];
- }
- for(int i=0;i<alignSize;i++)
- hostMapSrc[i] = (float)srcData[i];
- clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
-
- clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&inputCl);
- clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&startCl);
- clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&endCl);
- clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
- (void *)&outputCl);
- CHECK_OPENCL(clStatus);
-
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(clStatus)
-
- float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- output[i]=outPutMap[i];
-
- clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
- clStatus = clFinish(env.commandQueue);
-
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseKernel(env.kernel);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(inputCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(startCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(endCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(outputCl);
- CHECK_OPENCL(clStatus);
- return 0;
+int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size)
+{
+ KernelEnv kEnv;
+ const char *kernelName = "oclFormulaMin";
+ CheckKernelName(&kEnv,kernelName);
+ cl_int clStatus;
+ size_t global_work_size[1];
+ int alignSize = size + end[0]-start[0];
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ cl_int ret=0;
+ cl_mem inputCl = clCreateBuffer(kEnv.mpkContext,(cl_mem_flags) (CL_MEM_READ_WRITE),
+ alignSize * sizeof(float), NULL, &clStatus);
+ cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size* sizeof(float), NULL, &ret);
+
+ float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+ int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ {
+ hostMapStart[i] = start[i];
+ hostMapEnd[i] = end[i];
+ }
+ for(int i=0;i<alignSize;i++)
+ hostMapSrc[i] = (float)srcData[i];
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem),
+ (void *)&inputCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
+ (void *)&startCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_mem),
+ (void *)&endCl);
+ clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_mem),
+ (void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus);
+
+ float * outPutMap = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ output[i]=outPutMap[i];
+
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,outPutMap,0,NULL,NULL);
+ clStatus = clFinish(kEnv.mpkCmdQueue);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseKernel(kEnv.mpkKernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(inputCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(startCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(endCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
}
-int OclCalc::OclHostFormulaAverage(double *srcData,int *start,int *end,double *output,int size) {
- KernelEnv env;
- const char *kernelName = "oclFormulaAverage";
- CheckKernelName(&env,kernelName);
-
- cl_int clStatus;
- size_t global_work_size[1];
- int alignSize = size + end[0]-start[0];
-
- env.kernel = clCreateKernel(env.program, kernelName, &clStatus);
- cl_int ret=0;
- cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
- alignSize * sizeof(float), NULL, &clStatus);
- cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size * sizeof(unsigned int), NULL, &ret);
- cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
- size* sizeof(float), NULL, &ret);
-
- float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
- int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- {
- hostMapStart[i] = start[i];
- hostMapEnd[i] = end[i];
- }
- for(int i=0;i<alignSize;i++)
- hostMapSrc[i] = (float)srcData[i];
- clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
-
- clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&inputCl);
- clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&startCl);
- clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&endCl);
- clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
- (void *)&outputCl);
- CHECK_OPENCL(clStatus);
-
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
- NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL(clStatus)
-
- float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<size;i++)
- output[i]=outPutMap[i];
-
- clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
- clStatus = clFinish(env.commandQueue);
-
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseKernel(env.kernel);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(inputCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(startCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(endCl);
- CHECK_OPENCL(clStatus);
- clStatus = clReleaseMemObject(outputCl);
- CHECK_OPENCL(clStatus);
- return 0;
+int OclCalc::OclHostFormulaAverage(double *srcData,int *start,int *end,double *output,int size)
+{
+ KernelEnv kEnv;
+ const char *kernelName = "oclFormulaAverage";
+ CheckKernelName(&kEnv,kernelName);
+ cl_int clStatus;
+ size_t global_work_size[1];
+ int alignSize = size + end[0]-start[0];
+
+ kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+ cl_int ret=0;
+ cl_mem inputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ alignSize * sizeof(float), NULL, &clStatus);
+ cl_mem startCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem endCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem outputCl = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size* sizeof(float), NULL, &ret);
+
+ float * hostMapSrc = (float *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,inputCl,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+ int * hostMapStart = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,startCl,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ int * hostMapEnd = (int *)clEnqueueMapBuffer(kEnv.mpkCmdQueue,endCl,CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+#if 1
+ for(int i=0;i<size;i++)
+ {
+ hostMapStart[i] = start[i];
+ hostMapEnd[i] = end[i];
+ }
+ for(int i=0;i<alignSize;i++)
+ hostMapSrc[i] = (float)srcData[i];
+ //memcpy(hostMapSrc,srcData,alignSize * sizeof(float));
+#endif
+ for(sal_Int32 i = 0; i < alignSize; ++i){//dbg
+ fprintf(stderr,"In avg host,hostMapSrc[%d] is ...%f\n",i,hostMapSrc[i]);
+ }
-}
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,inputCl,hostMapSrc, 0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,startCl,hostMapStart,0,NULL,NULL);
+ clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,endCl, hostMapEnd, 0,NULL,NULL);
-int OclCalc::OclHostSignedAdd(double *lData,double *rData,double *rResult,int dSize) {
- KernelEnv env;
- int status;
- const char *kernelName = "oclSignedAdd";
- CheckKernelName(&env,kernelName);
-
-
- cl_int clStatus;
- size_t global_work_size[1];
-
- env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
- cl_mem clLiftData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clRightData = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
- cl_mem clResult = clCreateBuffer(env.context,
- (cl_mem_flags) (CL_MEM_READ_WRITE),
- dSize * sizeof(float), NULL, &clStatus);
-
- float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
- for(int i=0;i<dSize;i++)
- {
- hostMapLeftData[i] = (float)lData[i];
- hostMapRightData[i] = (float)rData[i];
- }
- clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
- clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
-
- status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
- (void *)&clLiftData);
- status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
- (void *)&clRightData);
- status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
- (void *)&clResult);
- CHECK_OPENCL(status)
... etc. - the rest is truncated
More information about the Libreoffice-commits
mailing list