[Libreoffice-commits] core.git: Branch 'private/kohei/xlsx-import-speedup' - 168 commits - autogen.sh basctl/source basebmp/source basebmp/test basegfx/source basegfx/test basic/qa basic/source bin/get-bugzilla-attachments-by-mimetype bridges/source canvas/source chart2/AllLangResTarget_chartcontroller.mk chart2/source chart2/uiconfig chart2/UIConfig_chart2.mk config_host/config_eot.h.in config_host.mk.in configure.ac connectivity/source cui/source dbaccess/source download.lst external/icu external/libeot external/liblangtag external/libxml2 external/libxslt external/Module_external.mk external/nss extras/source filter/AllLangResTarget_flash.mk filter/Module_filter.mk filter/source filter/uiconfig filter/UIConfig_xsltdlg.mk formula/source fpicker/source framework/source helpcontent2 i18npool/qa i18npool/source icon-themes/galaxy include/formula include/osl include/rtl include/sal include/sfx2 include/tools include/vcl include/xmloff Makefile.fetch offapi/com officecfg/registry oox/Library _oox.mk oox/source postprocess/CustomTarget_registry.mk postprocess/packimages RepositoryExternal.mk sal/rtl sc/inc sc/Library_sc.mk sc/Module_sc.mk scp2/source sc/qa sc/sdi sc/source sc/uiconfig sc/UIConfig_scalc.mk sd/AllLangResTarget_sd.mk sd/inc sd/qa sd/source sd/uiconfig sd/UIConfig_sdraw.mk sfx2/inc sfx2/qa sfx2/source sfx2/uiconfig sfx2/UIConfig_sfx.mk solenv/bin solenv/gbuild starmath/inc starmath/source svtools/source svx/source svx/uiconfig sw/CppunitTest_sw_htmlexport.mk sw/qa sw/source sysui/CustomTarget_share.mk sysui/desktop tools/inc tools/source vcl/aqua vcl/Library_vcl.mk vcl/qa vcl/source writerfilter/inc writerfilter/source xmlhelp/source xmlhelp/util xmloff/inc xmloff/source xmlreader/source xmlscript/source xmlscript/test
Tor Lillqvist
tml at collabora.com
Wed Nov 13 11:37:22 PST 2013
Rebased ref, commits from common ancestor:
commit e51ae1c2c69597d751cf184f69192cdcb57ebf48
Author: Tor Lillqvist <tml at collabora.com>
Date: Wed Nov 13 20:42:19 2013 +0200
s/printf/SAL_INFO
Change-Id: I2bced1740811d236e724eaf172249a9828575d7a
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index aa41112..70a9e00 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -207,11 +207,11 @@ std::vector<boost::shared_ptr<osl::File> > OpenclDevice::binaryGenerated( const
if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
{
aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile));
- printf("opencl-wrapper: opening binary for reading [%s] success\n", fileName.getStr());
+ SAL_INFO("sc.opencl", "Opening binary file '" << fileName << "' for reading: success");
}
else
{
- printf("opencl-wrapper: opening binary for reading [%s] fail\n", fileName.getStr());
+ SAL_INFO("sc.opencl", "Opening binary file '" << fileName << "' for reading: FAIL");
delete pNewFile;
break;
}
@@ -288,11 +288,9 @@ int OpenclDevice::generatBinFromKernelSource( cl_program program, const char * c
OString fileName = createFileName(mpArryDevsID[i], clFileName);
if ( !writeBinaryToFile( fileName,
binaries[i], binarySizes[i] ) )
- {
- printf("opencl-wrapper: write binary [%s] fail\n", fileName.getStr());
- }
+ SAL_INFO("sc.opencl", "Writing binary file '" << fileName << "': FAIL");
else
- printf("opencl-wrapper: write binary [%s] success\n", fileName.getStr());
+ SAL_INFO("sc.opencl", "Writing binary file '" << fileName << "': success");
}
}
@@ -351,7 +349,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
cl_int clStatus;
//char options[512];
// create a cl program executable for all the devices specified
- printf("BuildProgram.\n");
if (!gpuInfo->mnIsUserCreated)
{
clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
@@ -366,7 +363,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
if ( clStatus != CL_SUCCESS )
{
size_t length;
- printf ("BuildProgram error!\n");
if ( !gpuInfo->mnIsUserCreated )
{
clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
@@ -379,7 +375,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
}
if ( clStatus != CL_SUCCESS )
{
- printf("opencl create build log fail\n");
return 0;
}
@@ -396,7 +391,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
}
if ( clStatus != CL_SUCCESS )
{
- printf("opencl program build info fail\n");
return false;
}
@@ -465,7 +459,6 @@ bool OpenclDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuIn
cl_int binary_status;
- fprintf(stderr, "Create kernel from binary\n");
gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
mpArryDevsID.get(), length.get(), (const unsigned char**) pBinary.get(),
&binary_status, &clStatus );
@@ -503,10 +496,8 @@ int OpenclDevice::initOpenclRunEnv( int argc )
int status = initOpenclRunEnv( &gpuEnv );
if ( status )
{
- printf("init_opencl_env failed.\n");
return 1;
}
- printf("init_opencl_env successed.\n");
//initialize program, kernelName, kernelCount
if( getenv( "SC_FLOAT" ) )
{
@@ -515,15 +506,15 @@ int OpenclDevice::initOpenclRunEnv( int argc )
}
if( gpuEnv.mnKhrFp64Flag )
{
- printf("----use khr double type in kernel----\n");
+ SAL_INFO("sc.opencl", "Use Khr double");
}
else if( gpuEnv.mnAmdFp64Flag )
{
- printf("----use amd double type in kernel----\n");
+ SAL_INFO("sc.opencl", "Use AMD double type");
}
else
{
- printf("----use float type in kernel----\n");
+ SAL_INFO("sc.opencl", "USE float type");
}
isInited = 1;
}
@@ -710,7 +701,6 @@ int OpenclDevice::initOpenclRunEnv( GPUEnv *gpuInfo )
void OpenclDevice::setOpenclState( int state )
{
- //printf("OpenclDevice::setOpenclState...\n");
isInited = state;
}
commit 3b7c9033e8b0f28544468a15bb902c1029c47084
Author: Tor Lillqvist <tml at collabora.com>
Date: Wed Nov 6 21:21:56 2013 +0200
We don't use the static OpenCL kernels any more
So remove them and code related only to them.
Change-Id: Ibd09e7a801b9757443b6f87018570ec007e201d5
diff --git a/sc/inc/formulagroup.hxx b/sc/inc/formulagroup.hxx
index fb7692b..7c16364 100644
--- a/sc/inc/formulagroup.hxx
+++ b/sc/inc/formulagroup.hxx
@@ -89,7 +89,6 @@ class SC_DLLPUBLIC FormulaGroupInterpreter
static FormulaGroupInterpreter *getStatic();
static void fillOpenCLInfo(std::vector<OpenclPlatformInfo>& rPlatforms);
static bool switchOpenCLDevice(const OUString& rDeviceId, bool bAutoSelect);
- static void compileOpenCLKernels();
static void enableOpenCL(bool bEnable);
virtual ScMatrixRef inverseMatrix(const ScMatrix& rMat) = 0;
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index fbe43c8..81ac09d 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -72,7 +72,7 @@ size_t DynamicKernelArgument::Marshal(cl_kernel k, int argno, int)
}
// Obtain cl context
KernelEnv kEnv;
- OclCalc::setKernelEnv(&kEnv);
+ OpenclDevice::setKernelEnv(&kEnv);
cl_int err;
mpClmem = clCreateBuffer(kEnv.mpkContext,
(cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
@@ -139,7 +139,7 @@ public:
// marshaling
// Obtain cl context
KernelEnv kEnv;
- OclCalc::setKernelEnv(&kEnv);
+ OpenclDevice::setKernelEnv(&kEnv);
// Pass the scalar result back to the rest of the formula kernel
cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode);
if (CL_SUCCESS != err)
@@ -220,7 +220,7 @@ size_t DynamicKernelStringArgument::Marshal(cl_kernel k, int argno, int)
assert(mpClmem == NULL);
// Obtain cl context
KernelEnv kEnv;
- OclCalc::setKernelEnv(&kEnv);
+ OpenclDevice::setKernelEnv(&kEnv);
cl_int err;
formula::VectorRefArray vRef;
size_t nStrings = 0;
@@ -1367,7 +1367,7 @@ public:
{
// Obtain cl context
KernelEnv kEnv;
- OclCalc::setKernelEnv(&kEnv);
+ OpenclDevice::setKernelEnv(&kEnv);
cl_int err;
// The results
mpResClmem = clCreateBuffer(kEnv.mpkContext,
@@ -1424,7 +1424,7 @@ void DynamicKernel::CreateKernel(void)
// Compile kernel here!!!
// Obtain cl context
KernelEnv kEnv;
- OclCalc::setKernelEnv(&kEnv);
+ OpenclDevice::setKernelEnv(&kEnv);
const char *src = mFullProgramSrc.c_str();
if (OpenclDevice::buildProgramFromBinary("",
&OpenclDevice::gpuEnv,
@@ -1546,7 +1546,7 @@ bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
mpKernel->CodeGen();
// Obtain cl context
KernelEnv kEnv;
- OclCalc::setKernelEnv(&kEnv);
+ OpenclDevice::setKernelEnv(&kEnv);
// Compile kernel here!!!
mpKernel->CreateKernel();
// Run the kernel.
@@ -1636,11 +1636,6 @@ SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice(
return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect);
}
-SAL_DLLPUBLIC_EXPORT void compileOpenCLKernels(const OUString* pDeviceId)
-{
- sc::opencl::compileOpenCLKernels(pDeviceId);
-}
-
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx
deleted file mode 100644
index 3e0af5b..0000000
--- a/sc/source/core/opencl/oclkernels.hxx
+++ /dev/null
@@ -1,390 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-#ifndef SC_OCLKERNELS_HXX
-#define SC_OCLKERNELS_HXX
-
-#ifndef USE_EXTERNAL_KERNEL
-#define KERNEL( ... )# __VA_ARGS__
-
-namespace sc { namespace opencl {
-
-// Double precision is a default of spreadsheets
-// cl_khr_fp64: Khronos extension
-// cl_amd_fp64: AMD extension
-// use build option outside to define fp_t
-/////////////////////////////////////////////
-const char *kernel_src = KERNEL(
-\n#ifdef KHR_DP_EXTENSION\n
-\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n
-\n#elif AMD_DP_EXTENSION\n
-\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n
-\n#else\n
-\n#endif\n
-inline fp_t oclAverage( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray)
-{
- uint start = startArray[id];
- uint end = endArray[id];
- fp_t fSum = 0.0;
- fp_t zero[16] = {0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f};
- fp_t16 vSum=vload16(0,zero);
- fp_t16 ptr;
- __global fp_t *p = values;
- p+= start;
-
- for(int i = 0; i < (end - start + 1)/16; ++i)
- {
- ptr=vload16(0,p);
- vSum += ptr;
- p+=16;
- }
- int lastData = (end-start+1)%16;
- for(int i = 0; i <lastData; i++)
- {
- fSum += *p;
- p+=1;
- }
- vSum.s01234567 = vSum.s01234567+vSum.s89abcdef;
- vSum.s0123 = vSum.s0123+vSum.s4567;
- vSum.s01 = vSum.s01+vSum.s23;
- vSum.s0 = vSum.s0+vSum.s1;
- fSum = vSum.s0+fSum;
- fp_t fVal = fSum/(end-start+1);
- return fVal;
-}
-inline fp_t oclMax( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray)
-{
- uint start = startArray[id];
- uint end = endArray[id];
- fp_t fMax = values[start];
- fp_t zero[16] = {fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax};
- fp_t16 vMax=vload16(0,zero);
- //Max
- fp_t16 ptr;
- __global fp_t *p = values;
- p+= start;
- for(int i = 0; i < (end - start + 1)/16; ++i)
- {
- ptr=vload16(0,p);
- vMax = fmax(vMax,ptr);
- p+=16;
- }
- int lastData = (end-start+1)%16;
- for(int i = 0; i <lastData; i++)
- {
- fMax = fmax(fMax,*p);
- p+=1;
- }
- vMax.s01234567 = fmax(vMax.s01234567, vMax.s89abcdef);
- vMax.s0123 = fmax(vMax.s0123, vMax.s4567);
- vMax.s01 = fmax(vMax.s01, vMax.s23);
- vMax.s0 = fmax(vMax.s0, vMax.s1);
- fMax = fmax(vMax.s0, fMax);
- return fMax;
-}
-inline fp_t oclMin( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray)
-{
- uint start = startArray[id];
- uint end = endArray[id];
- fp_t fMin = values[start];
- fp_t zero[16] = {fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin};
- fp_t16 vMin=vload16(0,zero);
- //Min
- fp_t16 ptr;
- __global fp_t *p = values;
- p+= start;
- for(int i = 0; i < (end - start + 1)/16; ++i)
- {
- ptr=vload16(0,p);
- vMin = fmin(vMin,ptr);
- p+=16;
- }
- int lastData = (end-start+1)%16;
- for(int i = 0; i <lastData; i++)
- {
- fMin = fmin(fMin,*p);
- p+=1;
- }
- vMin.s01234567 = fmin(vMin.s01234567, vMin.s89abcdef);
- vMin.s0123 = fmin(vMin.s0123, vMin.s4567);
- vMin.s01 = fmin(vMin.s01, vMin.s23);
- vMin.s0 = fmin(vMin.s0, vMin.s1);
- fMin = fmin(vMin.s0, fMin);
- return fMin;
-}
-
-__kernel void oclSignedAdd(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
-{
- const unsigned int id = get_global_id(0);
- otData[id] = ltData[id] + rtData[id];
-}
-
-__kernel void oclSignedSub(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
-{
- const unsigned int id = get_global_id(0);
- otData[id] = ltData[id] - rtData[id];
-}
-
-__kernel void oclSignedMul(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
-{
- int id = get_global_id(0);
- otData[id] = ltData[id] * rtData[id];
-}
-
-__kernel void oclSignedDiv(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
-{
- const unsigned int id = get_global_id(0);
- fp_t divisor = rtData[id];
- if ( divisor != 0 )
- otData[id] = ltData[id] / divisor;
- else
- otData[id] = 0.0;
-}
-
-__kernel void oclFormulaMin(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fVal = oclMin(id,input,start,end);
- output[id] = fVal ;
-}
-
-__kernel void oclFormulaMax(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fVal = oclMax(id,input,start,end);
- output[id] = fVal ;
-}
-//Sum
-__kernel void oclFormulaSum(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
-{
- const unsigned int nId = get_global_id(0);
- fp_t fSum = 0.0;
- for(int i = start[nId]; i<=end[nId]; i++)
- fSum += input[i];
- output[nId] = fSum ;
-}
-//Count
-__kernel void oclFormulaCount(__global uint *start,__global uint *end,__global fp_t *output)
-{
- const unsigned int nId = get_global_id(0);
- output[nId] = end[nId] - start[nId] + 1;
-}
-
-__kernel void oclFormulaAverage(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fVal = oclAverage(id,input,start,end);
- output[id] = fVal ;
-
-}
-//Sumproduct
-__kernel void oclFormulaSumproduct(__global fp_t *firstCol,__global uint* npSumSize,__global fp_t *output,uint nMatixSize)
-{
- const unsigned int id = get_global_id(0);
- unsigned int nSumSize = npSumSize[id];
- fp_t fSum = 0.0;
- for(int i=0;i<nSumSize;i++)
- fSum += firstCol[i + nMatixSize * id];
- output[id] = fSum;
-}
-
-__kernel void oclAverageDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
-
- // Average
- fp_t fSum = 0.0;
- for(int i = start; i < end; i++)
- fSum += values[i];
- fp_t fVal = fSum/(end-start);
-
- // Subtract & output
- output[id] = fVal - subtract[id];
-}
-
-__kernel void oclMaxDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
-
- // Max
- fp_t fMaxVal = values[start];
- for ( int i = start + 1; i < end; i++ )
- {
- if(values[i]>fMaxVal)
- fMaxVal = values[i];
- }
-
- // Subtract & output
- output[id] = fMaxVal - subtract[id];
-}
-
-__kernel void oclMinDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
-
- // Min
- fp_t fMinVal = values[start];
- for ( int i = start + 1; i < end; i++ )
- {
- if(values[i]<fMinVal)
- fMinVal = values[i];
- }
-
- // Subtract & output
- output[id] = fMinVal - subtract[id];
-}
-
-__kernel void oclSubDelta( fp_t ltData, __global fp_t *rtData, __global fp_t *outData )
-{
- const unsigned int id = get_global_id(0);
- outData[id] = ltData - rtData[id];
-}
-
-__kernel void oclFormulaMtxInv(__global fp_t * fpMatrixInput, __global fp_t * fpP, int nOffset, int nMax)
-{
- //get the global id of the workitem
- int nId = get_global_id(0);
- int nDimension = get_global_size(0);
- fp_t dMovebuffer;
- dMovebuffer = fpMatrixInput[nOffset*nDimension+nId];
- fpMatrixInput[nOffset*nDimension+nId] = fpMatrixInput[nMax*nDimension+nId];
- fpMatrixInput[nMax*nDimension+nId] = dMovebuffer;
-
- dMovebuffer = fpP[nOffset*nDimension+nId];
- fpP[nOffset*nDimension+nId] = fpP[nMax*nDimension+nId];
- fpP[nMax*nDimension+nId] = dMovebuffer;
-}
-__kernel void oclMatrixSolve(__global fp_t * fpMatrixInput,__global fp_t * fpMatrixOutput,__global fp_t * fpP,__global fp_t * fpY,__global uint* npDim)
-{
- int nId = get_global_id(0);
- int nDimension = npDim[nId];
- fp_t fsum = 0.0;
- for ( int yi=0; yi < nDimension; yi++ )
- {
- fsum = 0.0;
- for ( int yj=0; yj < nDimension; yj++ )
- {
- fsum += fpMatrixInput[yi*nDimension+yj] * fpY[nId+yj*nDimension];
- }
-
- fpY[nId+yi*nDimension] = fpP[yi*nDimension+nId] - fsum;
- }
- for ( int xi = nDimension - 1; xi >= 0; xi-- )
- {
- fsum = 0.0;
- for ( int xj = 0; xj < nDimension; xj++ )
- {
- fsum += fpMatrixInput[xi*nDimension+xj] * fpMatrixOutput[nId+nDimension*xj];
- }
- fpMatrixOutput[nId+xi*nDimension] = (fpY[xi*nDimension+nId] - fsum) / fpMatrixInput[xi*nDimension+xi];
- }
-}
-
-__kernel void oclAverageAdd(__global fp_t *values,__global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fVal = oclAverage(id,values,startArray,endArray);
- output[id] = fVal + addend[id];
-}
-
-__kernel void oclAverageSub(__global fp_t *values,__global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fVal = oclAverage(id,values,startArray,endArray);
- output[id] = fVal - subtract[id];
-}
-
-__kernel void oclAverageMul(__global fp_t *values,__global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fVal = oclAverage(id,values,startArray,endArray);
- output[id] = fVal * multiplier[id];
-}
-__kernel void oclAverageDiv(__global fp_t *values,__global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fVal = oclAverage(id,values,startArray,endArray);
- fp_t divisor = div[id];
- if ( divisor != 0 )
- output[id] = fVal / divisor;
- else
- output[id] = 0.0;
-}
-
-__kernel void oclMinAdd(__global fp_t *values, __global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fMin = oclMin(id,values,startArray,endArray);
- output[id] = fMin + addend[id];
-}
-
-__kernel void oclMinSub(__global fp_t *values, __global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fMin = oclMin(id,values,startArray,endArray);
- output[id] = fMin - subtract[id];
-}
-__kernel void oclMinMul(__global fp_t *values, __global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fMin = oclMin(id,values,startArray,endArray);
- output[id] = fMin * multiplier[id];
-}
-__kernel void oclMinDiv(__global fp_t *values, __global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fMin = oclMin(id,values,startArray,endArray);
- fp_t divisor = div[id];
- if ( divisor != 0 )
- output[id] = fMin / divisor;
- else
- output[id] = 0.0;
-}
-__kernel void oclMaxAdd(__global fp_t *values, __global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fMax = oclMax(id,values,startArray,endArray);
- output[id] = fMax + addend[id];
-}
-
-__kernel void oclMaxSub(__global fp_t *values, __global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fMax = oclMax(id,values,startArray,endArray);
- output[id] = fMax - subtract[id];
-}
-__kernel void oclMaxMul(__global fp_t *values, __global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fMax = oclMax(id,values,startArray,endArray);
- output[id] = fMax * multiplier[id];
-}
-__kernel void oclMaxDiv(__global fp_t *values, __global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
- const unsigned int id = get_global_id(0);
- fp_t fMax = oclMax(id,values,startArray,endArray);
- fp_t divisor = div[id];
- if ( divisor != 0 )
- output[id] = fMax / divisor;
- else
- output[id] = 0.0;
-}
-
-__kernel void oclSub( fp_t ltData, __global fp_t *rtData, __global fp_t *outData )
-{
- const unsigned int id = get_global_id(0);
- outData[id] = ltData - rtData[id];
-}
-);
-
-}}
-
-#endif // USE_EXTERNAL_KERNEL
-#endif //_OCL_KERNEL_H_
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 61eed49..aa41112 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -19,7 +19,6 @@
#include "sal/config.h"
#include <osl/file.hxx>
-#include "oclkernels.hxx"
#include <stdio.h>
#include <stdlib.h>
@@ -43,8 +42,6 @@ using namespace std;
namespace sc { namespace opencl {
-Kernel::Kernel( const char* pName ) : mpName(pName), mpKernel(NULL) {}
-
GPUEnv OpenclDevice::gpuEnv;
int OpenclDevice::isInited =0;
@@ -68,12 +65,6 @@ OString generateMD5(const void* pData, size_t length)
return aBuffer.makeStringAndClear();
}
-OString generateHashForSource()
-{
- size_t nLength = strlen(kernel_src);
- return generateMD5(kernel_src, nLength);
-}
-
OString getCacheFolder()
{
OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
@@ -86,6 +77,15 @@ OString getCacheFolder()
void clearCache()
{
+#if 0
+ // We used to delete all files that did not end with the hash of
+ // the static kernel source string from oclkernels.hxx. But as
+ // those static kernels were not used for anything, it was
+ // pointless, that hash never changed. The static kernels are now
+ // removed, their hash is not part of the .bin file names any
+ // more. So there is little this function can do until we come up
+ // with some other way to figure out which cached .bin files are
+ // "current".
OUString aCacheDirURL(rtl::OStringToOUString(OpenclDevice::maCacheFolder, RTL_TEXTENCODING_UTF8));
osl::Directory aCacheDir(aCacheDirURL);
osl::FileBase::RC status = aCacheDir.open();
@@ -93,7 +93,6 @@ void clearCache()
return;
osl::DirectoryItem aItem;
- OUString aSourceString = rtl::OStringToOUString(OpenclDevice::maSourceHash + ".bin", RTL_TEXTENCODING_UTF8);
while(osl::FileBase::E_None == aCacheDir.getNextItem(aItem))
{
osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileName|osl_FileStatus_Mask_FileURL);
@@ -104,7 +103,7 @@ void clearCache()
OUString aFileName = aFileStatus.getFileName();
if(aFileName.endsWith(".bin"))
{
- if(!aFileName.endsWith(aSourceString))
+ if ( file is in some way obsolete )
{
// delete the file
OUString aFileUrl = aFileStatus.getFileURL();
@@ -112,11 +111,11 @@ void clearCache()
}
}
}
+#endif
}
}
-OString OpenclDevice::maSourceHash = generateHashForSource();
OString OpenclDevice::maCacheFolder = getCacheFolder();
int OpenclDevice::releaseOpenclRunEnv()
@@ -126,64 +125,11 @@ int OpenclDevice::releaseOpenclRunEnv()
return 1;
}
-namespace {
-
-const char* pKernelNames[] = {
-
- "oclFormulaMin",
- "oclFormulaMax",
- "oclFormulaSum",
- "oclFormulaCount",
- "oclFormulaAverage",
- "oclFormulaSumproduct",
- "oclFormulaMtxInv",
-
- "oclSignedAdd",
- "oclSignedSub",
- "oclSignedMul",
- "oclSignedDiv",
- "oclAverageDelta",
- "oclMaxDelta",
- "oclMinDelta",
- "oclSubDelta",
- "oclLUDecomposition",
- "oclAverageDeltaRPN",
- "oclMaxDeltaRPN",
- "oclMinDeltaRPN",
- "oclMoreColArithmeticOperator",
- "oclColumnH",
- "oclColumnL",
- "oclColumnN",
- "oclColumnJ",
- "oclMaxSub",
- "oclAverageSub",
- "oclMinSub",
- "oclMaxAdd",
- "oclAverageAdd",
- "oclMinAdd",
- "oclMaxMul",
- "oclAverageMul"
- "oclMinMul",
- "oclMaxDiv",
- "oclAverageDiv"
- "oclMinDiv",
- "oclSub",
-
- "oclMatrixSolve"
-};
-
-}
-
int OpenclDevice::registOpenclKernel()
{
if ( !gpuEnv.mnIsUserCreated )
memset( &gpuEnv, 0, sizeof(gpuEnv) );
- gpuEnv.mnFileCount = 0; //argc;
-
- for (size_t i = 0, n = SAL_N_ELEMENTS(pKernelNames); i < n; ++i)
- gpuEnv.maKernels.push_back(Kernel(pKernelNames[i]));
-
return 0;
}
@@ -196,32 +142,6 @@ int OpenclDevice::setKernelEnv( KernelEnv *envInfo )
return 1;
}
-Kernel* OpenclDevice::fetchKernel( const char *kernelName )
-{
- cl_int nStatus;
- for (size_t i = 0, n = gpuEnv.maKernels.size(); i < n; ++i)
- {
- Kernel* pKernel = &gpuEnv.maKernels[i];
- if (!strcasecmp(kernelName, pKernel->mpName))
- {
- printf("found the kernel named %s.\n", kernelName);
- if (!pKernel->mpKernel && gpuEnv.mpArryPrograms[0])
- {
- pKernel->mpKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelName, &nStatus);
- if (nStatus != CL_SUCCESS)
- pKernel->mpKernel = NULL;
-
- printf("Kernel named '%s' has been compiled\n", kernelName);
- }
-
- return pKernel->mpKernel ? pKernel : NULL;
- }
- }
-
- printf("No kernel named %s found.\n", kernelName);
- return NULL;
-}
-
namespace {
OString createFileName(cl_device_id deviceId, const char* clFileName)
@@ -252,7 +172,7 @@ OString createFileName(cl_device_id deviceId, const char* clFileName)
OString aHash = generateMD5(aString.getStr(), aString.getLength());
return OpenclDevice::maCacheFolder + fileName + "-" +
- aHash + "-" + OpenclDevice::maSourceHash + ".bin";
+ aHash + ".bin";
}
}
@@ -402,27 +322,11 @@ int OpenclDevice::initOpenclAttr( OpenCLEnv * env )
int OpenclDevice::releaseOpenclEnv( GPUEnv *gpuInfo )
{
- int clStatus = 0;
-
if ( !isInited )
{
return 1;
}
- // Release all cached kernels.
- for (size_t i = 0, n = gpuInfo->maKernels.size(); i < n; ++i)
- clReleaseKernel(gpuInfo->maKernels[i].mpKernel);
- gpuInfo->maKernels.clear();
-
- for (int i = 0; i < gpuEnv.mnFileCount; i++)
- {
- if ( gpuEnv.mpArryPrograms[i] )
- {
- clStatus = clReleaseProgram( gpuEnv.mpArryPrograms[i] );
- CHECK_OPENCL( clStatus, "clReleaseProgram" );
- gpuEnv.mpArryPrograms[i] = NULL;
- }
- }
if ( gpuEnv.mpCmdQueue )
{
clReleaseCommandQueue( gpuEnv.mpCmdQueue );
@@ -440,23 +344,6 @@ int OpenclDevice::releaseOpenclEnv( GPUEnv *gpuInfo )
return 1;
}
-int OpenclDevice::cachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName )
-{
- int i;
- for ( i = 0; i < gpuEnvCached->mnFileCount; i++ )
- {
- if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 )
- {
- if ( gpuEnvCached->mpArryPrograms[i] != NULL )
- {
- return 1;
- }
- }
- }
-
- return 0;
-}
-
namespace {
bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
@@ -532,25 +419,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
}
-bool OpenclDevice::buildProgramFromSource(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
-{
- cl_int clStatus = 0;
- // create a CL program using the kernel source
- fprintf(stderr, "Create kernel from source\n");
- size_t source_size[1];
-
- source_size[0] = strlen( kernel_src );
- gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( gpuInfo->mpContext, 1, &kernel_src,
- source_size, &clStatus);
-
- if(clStatus != CL_SUCCESS)
- return false;
-
- bool bSuccess = buildProgram(buildOption, gpuInfo, idx);
- generatBinFromKernelSource( gpuInfo->mpArryPrograms[idx], filename );
- return bSuccess;
-}
-
bool OpenclDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
{
size_t numDevices;
@@ -851,1706 +719,6 @@ int OpenclDevice::getOpenclState()
return isInited;
}
-OclCalc::OclCalc()
-{
- fprintf(stderr,"OclCalc:: init opencl ...\n");
- nFormulaColSize = 0;
- nFormulaRowSize = 0;
- nArithmeticLen = 0;
- nFormulaLen = 0;
- mpClmemSrcData = NULL;
- mpClmemStartPos = NULL;
- mpClmemEndPos = NULL;
- mpClmemLeftData = NULL;
- mpClmemRightData = NULL;
- mpClmemMergeLfData = NULL;
- mpClmemMergeRtData = NULL;
- mpClmemMatixSumSize = NULL;
- mpClmemeOp = NULL;
-}
-
-OclCalc::~OclCalc()
-{
- releaseOclBuffer();
-}
-
-void OclCalc::releaseOclBuffer()
-{
- cl_int clStatus = 0;
- CHECK_OPENCL_RELEASE( clStatus, mpClmemSrcData );
- CHECK_OPENCL_RELEASE( clStatus, mpClmemStartPos );
- CHECK_OPENCL_RELEASE( clStatus, mpClmemEndPos );
- CHECK_OPENCL_RELEASE( clStatus, mpClmemLeftData );
- CHECK_OPENCL_RELEASE( clStatus, mpClmemRightData );
- fprintf(stderr,"OclCalc:: opencl end ...\n");
-}
-
-/////////////////////////////////////////////////////////////////////////////
-
-bool OclCalc::createBuffer64Bits( double *&dpLeftData, double *&dpRightData, int nBufferSize )
-{
- cl_int clStatus = 0;
- setKernelEnv( &kEnv );
-
- mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ),
- nBufferSize * sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ),
- nBufferSize * sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus,"clCreateBuffer" );
- dpLeftData = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue,mpClmemLeftData,CL_TRUE,CL_MAP_WRITE,0,
- nBufferSize * sizeof(double),0,NULL,NULL,&clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clFinish(kEnv.mpkCmdQueue);
- dpRightData = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue,mpClmemRightData,CL_TRUE,CL_MAP_WRITE,0,
- nBufferSize * sizeof(double),0,NULL,NULL,&clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos);
- return true;
-}
-
-bool OclCalc::mapAndCopy64Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize)
-{
- cl_int clStatus = 0;
- double * dpSrcDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemSrcData, CL_TRUE,CL_MAP_WRITE, 0,
- nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- unsigned int *npStartPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemStartPos, CL_TRUE,CL_MAP_WRITE, 0,
- nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- unsigned int *npEndPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemEndPos, CL_TRUE, CL_MAP_WRITE, 0,
- nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for(int i=0;i<nBufferSize;i++)
- dpSrcDataMap[i] = dpTempSrcData[i];
- for(int i=0;i<nRowsize;i++)
- {
- npStartPosMap[i] = unStartPos[i];
- npEndPosMap[i] = unEndPos[i];
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemSrcData, dpSrcDataMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPosMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPosMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::mapAndCopy64Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize )
-{
- cl_int clStatus = 0;
- double *dpLeftDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
- 0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- double *dpRightDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemRightData, CL_TRUE, CL_MAP_WRITE,
- 0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < nBufferSize; i++ )
- {
- dpLeftDataMap[i] = dpTempLeftData[i];
- dpRightDataMap[i] = dpTempRightData[i];
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemRightData, dpRightDataMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::mapAndCopyArithmetic64Bits( const double *dpMoreColArithmetic, int nBufferSize )
-{
- cl_int clStatus = 0;
- double *dpLeftDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
- 0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < nBufferSize; i++ )
- {
- dpLeftDataMap[i] = dpMoreColArithmetic[i];
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::mapAndCopyMoreColArithmetic64Bits( const double *dpMoreColArithmetic, int nBufferSize, uint *npeOp, uint neOpSize )
-{
- cl_int clStatus = 0;
- double *dpLeftDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
- 0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- uint *dpeOpMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemeOp, CL_TRUE, CL_MAP_WRITE,
- 0, neOpSize * sizeof(uint), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < nBufferSize; i++ )
- {
- dpLeftDataMap[i] = dpMoreColArithmetic[i];
- }
- for( uint i = 0; i<neOpSize; i++)
- {
- dpeOpMap[i] = npeOp[i];
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemeOp, dpeOpMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::createFormulaBuf64Bits( int nBufferSize, int rowSize )
-{
- cl_int clStatus = 0;
- setKernelEnv( &kEnv );
- mpClmemSrcData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- nBufferSize * sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- nFormulaLen = nBufferSize;
- mpClmemStartPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- rowSize * sizeof(unsigned int), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- mpClmemEndPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- rowSize * sizeof(unsigned int), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::createArithmeticOptBuf64Bits( int nBufferSize )
-{
- cl_int clStatus = 0;
- nArithmeticLen = nBufferSize;
- setKernelEnv( &kEnv );
- mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- nBufferSize * sizeof(double), NULL, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- nBufferSize * sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::createMoreColArithmeticBuf64Bits( int nBufferSize, int neOpSize )
-{
- cl_int clStatus = 0;
- nArithmeticLen = nBufferSize;
- setKernelEnv( &kEnv );
- mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- nBufferSize * sizeof(double), NULL, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- mpClmemeOp = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- neOpSize * sizeof(uint), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::oclHostArithmeticOperator64Bits( const char* aKernelName, double *&rResult,int nRowSize )
-{
- cl_int clStatus = 0;
- size_t global_work_size[1];
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- clFinish( kEnv.mpkCmdQueue );
- cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE,
- nRowSize * sizeof(double), NULL, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
-
- global_work_size[0] = nRowSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
-
- double *dpOutPut = (double *) clEnqueueMapBuffer( kEnv.mpkCmdQueue, clResult, CL_TRUE,CL_MAP_READ,
- 0, nRowSize*sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- for ( int i = 0; i < nRowSize; i++ )
- rResult[i] = dpOutPut[i];
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, rResult, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-
- clStatus = clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( clResult );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-bool OclCalc::oclMoreColHostArithmeticOperator64Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize )
-{
- cl_int clStatus = 0;
- size_t global_work_size[1];
- const char *aKernelName = "oclMoreColArithmeticOperator";
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_int), (void *)&nDataSize);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemeOp);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_int), (void *)&neOpSize);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = nRowSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- double * hostMapResult = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clResult, CL_TRUE, CL_MAP_READ, 0,
- nRowSize*sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < nRowSize; i++)
- rResult[i] = hostMapResult[i]; // from gpu float type to cpu double type
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, hostMapResult, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clFinish(kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( clResult );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-bool OclCalc::oclHostArithmeticStash64Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize )
-{
- cl_int clStatus = 0;
- size_t global_work_size[1];
- setKernelEnv( &kEnv );
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- clFinish( kEnv.mpkCmdQueue );
-
- cl_mem clLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
- nRowSize * sizeof(double), (void *)dpLeftData, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- cl_mem clRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
- nRowSize * sizeof(double), (void *)dpRightData, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
- cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE,
- nRowSize * sizeof(double), NULL, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&clLeftData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clRightData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
-
- global_work_size[0] = nRowSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
-
- clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, clResult, CL_TRUE, 0, nRowSize * sizeof(double), (double *)rResult, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" );
-
- clStatus = clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( clResult );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clLeftData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clRightData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-bool OclCalc::oclHostFormulaStash64Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size )
-{
- cl_int clStatus = 0;
- size_t global_work_size[1];
- setKernelEnv( &kEnv );
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- cl_mem clSrcData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
- nBufferSize * sizeof(double), (void *)dpSrcData, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- cl_mem clStartPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
- size * sizeof(unsigned int), (void *)nStartPos, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- cl_mem clEndPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
- size * sizeof(unsigned int), (void *)nEndPos, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
- cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&clSrcData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clStartPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clEndPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
-
- clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, outputCl, CL_TRUE, 0, size * sizeof(double), (double *)output, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clReadBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clFinish(kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( outputCl );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clSrcData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clStartPos );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clEndPos );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-bool OclCalc::oclHostFormulaStatistics64Bits( const char* aKernelName, double *&output, int size )
-{
- cl_int clStatus = 0;
- size_t global_work_size[1];
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemStartPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemEndPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- double *dpOutPut = (double *) clEnqueueMapBuffer( kEnv.mpkCmdQueue, outputCl, CL_TRUE,CL_MAP_READ,
- 0, size*sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- for ( int i = 0; i < size; i++ )
- {
- output[i] = dpOutPut[i];
- }
-
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, outputCl, output, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clFinish(kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( outputCl );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-bool OclCalc::oclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize )
-{
- const char *cpKernelName = "oclFormulaCount";
- Kernel* pKernel = fetchKernel(cpKernelName);
- if (!pKernel)
- return false;
-
- cl_int clStatus;
-
- size_t global_work_size[1];
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPos, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
- nSize* sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemStartPos);
- CHECK_OPENCL( clStatus,"clSetKernelArg");
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemEndPos);
- CHECK_OPENCL( clStatus,"clSetKernelArg");
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = nSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- dpOutput = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, CL_MAP_READ,
- 0, nSize*sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, dpOutput, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( clpOutput );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-/*
- * The dpsrcData is double rows,one col is the first column data,second is the second column data.if get a cell data range,the range
- *save the npStart array eg:a4-a8;b10-b14,the npStart will store a4,b10,and the npEnd will store a8,b14 range.So it can if(i +1)%2 to judge
- * the a cloumn or b cloumn npStart range.so as b bolumn.
- */
-bool OclCalc::oclHostFormulaSumProduct64Bits( double *dpSumProMergeLfData, double *dpSumProMergeRrData, uint *npSumSize, double *&dpOutput, int nSize )
-{
- cl_int clStatus;
- size_t global_work_size[1];
- memset(dpOutput,0,nSize);
- const char *cpFirstKernelName = "oclSignedMul";
- const char *cpSecondKernelName = "oclFormulaSumproduct";
- Kernel* pKernel1 = fetchKernel(cpFirstKernelName);
- if (!pKernel1)
- return false;
-
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, dpSumProMergeLfData, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish(kEnv.mpkCmdQueue);
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeRtData, dpSumProMergeRrData, 0, NULL, NULL );
- clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMatixSumSize, npSumSize, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- unsigned int nMulResultSize = nFormulaRowSize + nFormulaRowSize * nSize * nFormulaColSize - 1;
- cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nMulResultSize * sizeof(double),
- NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel1->mpKernel, 0, sizeof(cl_mem),(void *)&mpClmemMergeLfData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel1->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMergeRtData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel1->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = nMulResultSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel1->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clReleaseMemObject( mpClmemMergeLfData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( mpClmemMergeRtData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
- Kernel* pKernel2 = fetchKernel(cpSecondKernelName);
- if (!pKernel2)
- return false;
-
- cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize;
- clStatus = clSetKernelArg(pKernel2->mpKernel, 0, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel2->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMatixSumSize);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel2->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel2->mpKernel, 3, sizeof(cl_uint), (void *)&nMatixSize);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = nSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel2->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- double * outputMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, CL_MAP_READ,
- 0, nSize*sizeof(double), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < nSize; i++ )
- dpOutput[i] = outputMap[i];
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, outputMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( clResult );
- CHECK_OPENCL( clStatus, "clReleaseKernel" );
- clStatus = clReleaseMemObject( mpClmemMatixSumSize );
- CHECK_OPENCL( clStatus, "clReleaseKernel" );
- clStatus = clReleaseMemObject( clpOutput );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
- return true;
-}
-
-bool OclCalc::createMoreColArithmeticBuf32Bits( int nBufferSize, int neOpSize )
-{
- cl_int clStatus = 0;
- nArithmeticLen = nBufferSize;
- setKernelEnv( &kEnv );
- mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- nBufferSize * sizeof(float), NULL, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- mpClmemeOp = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- neOpSize * sizeof(uint), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::createArithmeticOptBuf32Bits( int nBufferSize )
-{
- cl_int clStatus = 0;
- setKernelEnv( &kEnv );
- nArithmeticLen = nBufferSize;
- mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- nBufferSize * sizeof(float), NULL, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- nBufferSize * sizeof(float), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::createFormulaBuf32Bits( int nBufferSize, int rowSize )
-{
- cl_int clStatus = 0;
- setKernelEnv( &kEnv );
- nFormulaLen = nBufferSize;
-
- mpClmemSrcData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- nBufferSize * sizeof(float), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
- mpClmemStartPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- rowSize * sizeof(unsigned int), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- mpClmemEndPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
- rowSize * sizeof(unsigned int), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::createBuffer32Bits( float *&fpLeftData, float *&fpRightData, int nBufferSize )
-{
- cl_int clStatus = 0;
- setKernelEnv( &kEnv );
- mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
- nBufferSize * sizeof(float), NULL, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
- nBufferSize * sizeof(unsigned int), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- fpLeftData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,
- 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- fpRightData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemRightData, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION,
- 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos);
- return true;
-}
-
-bool OclCalc::mapAndCopy32Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize)
-{
- cl_int clStatus = 0;
- float *fpSrcData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemSrcData, CL_TRUE, CL_MAP_WRITE, 0,
- nBufferSize * sizeof(float) , 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- unsigned int *npStartPos = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemStartPos, CL_TRUE, CL_MAP_WRITE, 0,
- nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- unsigned int *npEndPos = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemEndPos, CL_TRUE, CL_MAP_WRITE, 0,
- nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for(int i=0;i<nBufferSize;i++)
- {
- fpSrcData[i] = (float)dpTempSrcData[i];
- }
- for(int i=0;i<nRowsize;i++)
- {
- npStartPos[i] = unStartPos[i];
- npEndPos[i] = unEndPos[i];
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemSrcData, fpSrcData, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPos, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::mapAndCopy32Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize )
-{
- cl_int clStatus = 0;
- float *fpLeftData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
- 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- float *fpRightData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemRightData, CL_TRUE, CL_MAP_WRITE,
- 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for(int i=0;i<nBufferSize;i++)
- {
- fpLeftData[i] = (float)dpTempLeftData[i];
- fpRightData[i] = (float)dpTempRightData[i];
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, fpLeftData, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemRightData, fpRightData, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::mapAndCopyArithmetic32Bits( const double *dpMoreColArithmetic, int nBufferSize )
-{
- cl_int clStatus = 0;
- float *dpLeftDataMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
- 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < nBufferSize; i++ )
- {
- dpLeftDataMap[i] = (float)dpMoreColArithmetic[i];
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::mapAndCopyMoreColArithmetic32Bits( const double *dpMoreColArithmetic, int nBufferSize, uint *npeOp, uint neOpSize )
-{
- cl_int clStatus = 0;
- float *fpLeftDataMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
- 0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- uint *dpeOpMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemeOp, CL_TRUE, CL_MAP_WRITE,
- 0, neOpSize * sizeof(uint), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < nBufferSize; i++ )
- {
- fpLeftDataMap[i] = (float)dpMoreColArithmetic[i];
- }
- for( uint i = 0; i<neOpSize; i++ )
- {
- dpeOpMap[i] = npeOp[i];
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, fpLeftDataMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemeOp, dpeOpMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- return true;
-}
-
-bool OclCalc::oclHostArithmeticOperator32Bits( const char* aKernelName,double *rResult, int nRowSize )
-{
- cl_int clStatus = 0;
- size_t global_work_size[1];
-
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = nRowSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- float * hostMapResult = (float *)clEnqueueMapBuffer(
- kEnv.mpkCmdQueue, clResult, CL_TRUE, CL_MAP_READ, 0, nRowSize*sizeof(float), 0, NULL, NULL, &clStatus);
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < nRowSize; i++)
- rResult[i] = hostMapResult[i]; // from gpu float type to cpu double type
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, hostMapResult, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clFinish(kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( clResult );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
- return true;
-}
-
-bool OclCalc::oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize )
-{
- cl_int clStatus = 0;
- size_t global_work_size[1];
- const char *aKernelName = "oclMoreColArithmeticOperator";
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_int), (void *)&nDataSize);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemeOp);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_int), (void *)&neOpSize);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = nRowSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- float * hostMapResult = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clResult, CL_TRUE, CL_MAP_READ, 0,
- nRowSize*sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < nRowSize; i++)
- rResult[i] = hostMapResult[i]; // from gpu float type to cpu double type
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, hostMapResult, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clFinish(kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( clResult );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
- return true;
-}
-
-bool OclCalc::oclHostFormulaStatistics32Bits(const char* aKernelName,double *output,int size)
-{
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- cl_int clStatus = 0;
- size_t global_work_size[1];
-
- cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(float), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemSrcData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemStartPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemEndPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- float * outputMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ,
- 0, size*sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( int i = 0; i < size; i++ )
- output[i] = outputMap[i]; // from gpu float type to cpu double type
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, outputCl, outputMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( outputCl );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-bool OclCalc::oclHostArithmeticStash32Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize )
-{
- cl_int clStatus = 0;
- size_t global_work_size[1];
- setKernelEnv( &kEnv );
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- float *fpLeftData = (float *)malloc( sizeof(float) * nRowSize );
- float *fpRightData = (float *)malloc( sizeof(float) * nRowSize );
- float *fpResult = (float *)malloc( sizeof(float) * nRowSize );
- for(int i=0;i<nRowSize;i++)
- {
- fpLeftData[i] = (float)dpLeftData[i];
- fpRightData[i] = (float)dpRightData[i];
- }
- cl_mem clLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
- nRowSize * sizeof(float), (void *)fpLeftData, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- cl_mem clRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
- nRowSize * sizeof(float), (void *)fpRightData, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
- cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE,
- nRowSize * sizeof(float), NULL, &clStatus);
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
- clFinish( kEnv.mpkCmdQueue );
-
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&clLeftData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clRightData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
-
- global_work_size[0] = nRowSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
-
- clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, clResult, CL_TRUE, 0, nRowSize * sizeof(float), (float *)fpResult, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" );
- for(int i=0;i<nRowSize;i++)
- rResult[i] = (double)fpResult[i];
- if(fpResult)
- {
- free(fpResult);
- fpResult = NULL;
- }
- clStatus = clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( clResult );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clLeftData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clRightData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-bool OclCalc::oclHostFormulaStash32Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size )
-{
- cl_int clStatus = 0;
- size_t global_work_size[1];
- setKernelEnv( &kEnv );
- Kernel* pKernel = fetchKernel(aKernelName);
- if (!pKernel)
- return false;
-
- float *fpSrcData = (float *)malloc( sizeof(float) * nBufferSize );
- float *fpResult = (float *)malloc( sizeof(float) * size );
- for(int i=0;i<nBufferSize;i++)
- fpSrcData[i] = (float)dpSrcData[i];
- cl_mem clSrcData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_COPY_HOST_PTR),
- nBufferSize * sizeof(float), (void *)fpSrcData, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- cl_mem clStartPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_COPY_HOST_PTR),
- size * sizeof(unsigned int), (void *)nStartPos, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- cl_mem clEndPos = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_COPY_HOST_PTR),
- size * sizeof(unsigned int), (void *)nEndPos, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
- cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&clSrcData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clStartPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clEndPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = size;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
-
- clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, outputCl, CL_TRUE, 0, size * sizeof(float), (double *)fpResult, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clReadBuffer" );
- for(int i = 0;i<size;i++)
- output[i] = (float)fpResult[i];
- clFinish( kEnv.mpkCmdQueue );
- if(fpResult)
- {
- free(fpResult);
- fpResult = NULL;
- }
- clStatus = clFinish(kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( outputCl );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clSrcData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clStartPos );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clEndPos );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-bool OclCalc::oclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize )
-{
- const char *cpKernelName = "oclFormulaCount";
- Kernel* pKernel = fetchKernel(cpKernelName);
- if (!pKernel)
- return false;
-
- cl_int clStatus;
- size_t global_work_size[1];
-
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPos, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
- nSize* sizeof(float), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemStartPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemEndPos);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput);
- CHECK_OPENCL(clStatus, "clSetKernelArg");
- global_work_size[0] = nSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- float * fpOutputMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE,
- CL_MAP_READ, 0, nSize*sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for (int i = 0; i < nSize; i++ )
- dpOutput[i] = fpOutputMap[i];// from gpu float type to cpu double type
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, fpOutputMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clFinish(kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject(mpClmemSrcData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( mpClmemStartPos );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( mpClmemEndPos );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- clStatus = clReleaseMemObject( clpOutput );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
- return true;
-}
-
-//sumproduct
-bool OclCalc::oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float *fpSumProMergeRrData, uint *npSumSize, double *dpOutput, int nSize )
-{
- cl_int clStatus;
- size_t global_work_size[1];
- memset(dpOutput,0,nSize);
- const char *cpFirstKernelName = "oclSignedMul";
- const char *cpSecondKernelName = "oclFormulaSumproduct";
- Kernel* pKernel1 = fetchKernel(cpFirstKernelName);
- if (!pKernel1)
- return false;
-
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, fpSumProMergeLfData, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeRtData, fpSumProMergeRrData, 0, NULL, NULL );
- clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMatixSumSize, npSumSize, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- unsigned int nMulResultSize = nFormulaRowSize + nFormulaRowSize * nSize * nFormulaColSize - 1;
- cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nMulResultSize * sizeof(float), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- clStatus = clSetKernelArg(pKernel1->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemMergeLfData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel1->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMergeRtData);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel1->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = nMulResultSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel1->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clReleaseMemObject( mpClmemMergeLfData );
- CHECK_OPENCL( clStatus,"clReleaseMemObject" );
- clStatus = clReleaseMemObject( mpClmemMergeRtData );
- CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
- Kernel* pKernel2 = fetchKernel(cpSecondKernelName);
- if (!pKernel2)
- return false;
-
- cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(float), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize;
- clStatus = clSetKernelArg(pKernel2->mpKernel, 0, sizeof(cl_mem), (void *)&clResult);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel2->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMatixSumSize);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel2->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel2->mpKernel, 3, sizeof(cl_uint), (void *)&nMatixSize);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- global_work_size[0] = nSize;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel2->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
- float * fpOutputMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, CL_MAP_READ, 0,
- nSize*sizeof(float), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for (int i = 0; i < nSize; i++ )
- {
- dpOutput[i] = fpOutputMap[i]; // from gpu float type to cpu double type
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, fpOutputMap, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-
- clStatus = clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
- clStatus = clReleaseMemObject( clResult );
- CHECK_OPENCL( clStatus, "clReleaseKernel" );
- clStatus = clReleaseMemObject( mpClmemMatixSumSize );
- CHECK_OPENCL( clStatus, "clReleaseKernel" );
- clStatus = clReleaseMemObject( clpOutput );
- CHECK_OPENCL( clStatus, "clReleaseKernel" );
- return true;
-}
-
-
-// FIXME: should be templatised in <double> - double buffering [sic] rocks
-static cl_mem allocateDoubleBuffer( KernelEnv &rEnv, const double *_pValues, size_t nElements, cl_int *pStatus )
-{
- // Ugh - horrible redundant copying ...
- cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
- nElements * sizeof(double), NULL, pStatus);
- double *pValues = (double *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
- nElements * sizeof(double), 0, NULL, NULL, NULL);
- clFinish(rEnv.mpkCmdQueue);
- for ( int i = 0; i < (int)nElements; i++ )
- pValues[i] = _pValues[i];
- clEnqueueUnmapMemObject( rEnv.mpkCmdQueue, xValues, pValues, 0, NULL, NULL );
- clFinish( rEnv.mpkCmdQueue );
- return xValues;
-}
-
-static cl_mem allocateFloatBuffer( KernelEnv &rEnv, const double *_pValues, size_t nElements, cl_int *pStatus )
-{
- // Ugh - horrible redundant copying ...
- cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
- nElements * sizeof(float), NULL, pStatus);
- float *pValues = (float *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
- nElements * sizeof(float), 0, NULL, NULL, NULL );
- clFinish( rEnv.mpkCmdQueue );
- for ( int i = 0; i < (int)nElements; i++ )
- pValues[i] = (float)_pValues[i];
-
- clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
- clFinish( rEnv.mpkCmdQueue );
- return xValues;
-}
-
-bool OclCalc::oclGroundWaterGroup( uint *eOp, uint eOpNum, const double *pOpArray, const double *pSubtractSingle, size_t nSrcDataSize,size_t nElements, double del ,uint *nStartPos,uint *nEndPos,double *dpResult)
-{
- setKernelEnv( &kEnv );
-
- char kernelName[256] = "";
- double delta = del;
- bool subFlag = false;
- strcat(kernelName,"ocl");
- for ( size_t i = 0; i < eOpNum; i++ )
- {
- switch ( eOp[i] )
- {
- case ocAdd:
- strcat(kernelName,"Add");
- break;
- case ocSub:
- strcat(kernelName,"Sub");
- break;
- case ocMul:
- strcat(kernelName,"Mul");
- break;
- case ocDiv:
- strcat(kernelName,"Div");
- break;
- case ocMax:
- strcat(kernelName,"Max");
- break;
- case ocMin:
- strcat(kernelName,"Min");
- break;
- case ocAverage:
- strcat(kernelName,"Average");
- break;
- default:
- assert( false );
- break;
- }
- }
- Kernel* pKernel = fetchKernel(kernelName);
- if (!pKernel)
- return false;
-
- cl_int clStatus;
- size_t global_work_size[1];
- if ( ( eOpNum == 1 ) && ( eOp[0] == ocSub ) )
- subFlag = true;
-
- cl_mem valuesCl = NULL, subtractCl = NULL, outputCl = NULL, startPosCL = NULL, endPosCL = NULL;
-
- if(!subFlag)
- {
- startPosCL = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
- nElements * sizeof(unsigned int), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- endPosCL = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
- nElements * sizeof(unsigned int), NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
- unsigned int *npStartPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, startPosCL, CL_TRUE, CL_MAP_WRITE, 0,
- nElements * sizeof(uint), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- unsigned int *npEndPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, endPosCL, CL_TRUE, CL_MAP_WRITE, 0,
- nElements * sizeof(uint), 0, NULL, NULL, &clStatus );
- CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
- clFinish( kEnv.mpkCmdQueue );
-
- for(uint i=0;i<nElements;i++)
- {
- npStartPosMap[i]=nStartPos[i];
- npEndPosMap[i]=nEndPos[i];
- }
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, startPosCL, npStartPosMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
- clFinish( kEnv.mpkCmdQueue );
- clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, endPosCL, npEndPosMap, 0, NULL, NULL );
- CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-
- if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
- {
- valuesCl = allocateDoubleBuffer( kEnv, pOpArray, nSrcDataSize, &clStatus );
- subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
- outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nElements * sizeof(double), NULL, &clStatus );
- }
- else
- {
- valuesCl = allocateFloatBuffer( kEnv, pOpArray, nSrcDataSize, &clStatus );
- subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
- outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE , nElements * sizeof(float), NULL, &clStatus);
- }
- CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&valuesCl);
- CHECK_OPENCL( clStatus, "clSetKernelArg");
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl);
- CHECK_OPENCL( clStatus, "clSetKernelArg");
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&startPosCL);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&endPosCL);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&outputCl);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
-
- fprintf( stderr, "prior to enqueue range kernel\n" );
- }
- else
- {
- if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
- {
- subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
- outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(double), NULL, &clStatus );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_double), (void *)&delta);
- CHECK_OPENCL( clStatus, "clSetKernelArg");
- }
- else
- {
- float fTmp = (float)delta;
- subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
- outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(float), NULL, &clStatus );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_float), (void *)&fTmp);
- CHECK_OPENCL( clStatus, "clSetKernelArg");
- }
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl);
- CHECK_OPENCL( clStatus, "clSetKernelArg");
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&outputCl);
- CHECK_OPENCL( clStatus, "clSetKernelArg" );
- }
- global_work_size[0] = nElements;
- clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
-
- if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
- {
- clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue,
- outputCl,
- CL_TRUE,0,
- nElements * sizeof(double),
- (void *)dpResult,0,NULL,NULL);
- CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- }
- else
- {
- float *afBuffer = new float[nElements];
- if ( !afBuffer )
- return false;
- clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue,
- outputCl,
- CL_TRUE,0,
- nElements * sizeof(float),
- (void *)afBuffer,0,NULL,NULL);
- CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" );
- clFinish( kEnv.mpkCmdQueue );
- for ( size_t i = 0; i < nElements; i++ )
- {
- dpResult[i] = (double)afBuffer[i];
- }
- delete [] afBuffer;
- }
-
- clStatus = clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL( clStatus, "clFinish" );
-
- CHECK_OPENCL_RELEASE( clStatus, valuesCl );
- CHECK_OPENCL_RELEASE( clStatus, subtractCl );
- CHECK_OPENCL_RELEASE( clStatus, outputCl );
- CHECK_OPENCL_RELEASE( clStatus, startPosCL );
- CHECK_OPENCL_RELEASE( clStatus, endPosCL );
-
- fprintf( stderr, "completed opencl operation\n" );
-
- return true;
-}
-double *OclCalc::oclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements, double del )
-{
- setKernelEnv( &kEnv );
-
- // select a kernel: cut & paste coding is utterly evil.
- const char *kernelName = NULL;
- double delta = del;
- bool subFlag = false;
- switch ( eOp ) {
- case ocAdd:
- fprintf( stderr, "ocSub is %d\n", ocSub );
- case ocMul:
- case ocDiv:
- ; // FIXME: fallthrough for now
- case ocMax:
- kernelName = "oclMaxDelta";
- break;
- case ocMin:
- kernelName = "oclMinDelta";
- break;
- case ocAverage:
- kernelName = "oclAverageDelta";
- break;
- case ocSub:
- kernelName = "oclSubDelta";
- subFlag = true;
- break;
- default:
- assert( false );
- }
-
- Kernel* pKernel = fetchKernel(kernelName);
- if (!pKernel)
- return NULL;
-
- cl_int clStatus;
- size_t global_work_size[1];
-
- // Ugh - horrible redundant copying ...
-
- cl_mem valuesCl = NULL, subtractCl = NULL, outputCl = NULL;
- if(!subFlag)
- {
- if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
- {
- valuesCl = allocateDoubleBuffer( kEnv, pOpArray, nElements, &clStatus );
- subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
- outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(double), NULL, &clStatus );
- }
- else
- {
- valuesCl = allocateFloatBuffer( kEnv, pOpArray, nElements, &clStatus );
- subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
- outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(float), NULL, &clStatus);
- }
- CHECK_OPENCL_PTR( clStatus, "clCreateBuffer" );
-
- cl_uint start = 0;
- cl_uint end = (cl_uint)nElements;
-
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&valuesCl);
- CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl);
- CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_uint), (void *)&start);
- CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_uint), (void *)&end);
- CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" );
- clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&outputCl);
- CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" );
-
- fprintf( stderr, "prior to enqueue range kernel\n" );
- }
- else
- {
- if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
- {
- subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
- outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(double), NULL, &clStatus );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_double), (void *)&delta);
- CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
- }
- else
- {
- float fTmp = (float)delta;
- subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
- outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(float), NULL, &clStatus );
- clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_float), (void *)&fTmp);
- CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
- }
- clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl);
- CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
- clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&outputCl);
- CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" );
- }
- global_work_size[0] = nElements;
- clStatus = clEnqueueNDRangeKernel(
- kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
- CHECK_OPENCL_PTR( clStatus, "clEnqueueNDRangeKernel" );
- clFinish( kEnv.mpkCmdQueue );
-
- double *pResult = new double[nElements];
- if ( !pResult )
- return NULL; // leak.
- if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
- {
- clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue,
- outputCl,
- CL_TRUE,0,
- nElements * sizeof(double),
- (void *)pResult,0,NULL,NULL);
- }
- else
- {
- float *afBuffer = new float[nElements];
- if ( !afBuffer )
- return NULL;
- clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue,
- outputCl,
- CL_TRUE,0,
- nElements * sizeof(float),
- (void *)afBuffer,0,NULL,NULL);
- for ( int i = 0; i < (int)nElements; i++ )
- pResult[i] = (double)afBuffer[i];
- if ( !afBuffer )
- delete [] afBuffer;
- }
- CHECK_OPENCL_PTR( clStatus, "clEnqueueReadBuffer" );
-
- clStatus = clFinish( kEnv.mpkCmdQueue );
- CHECK_OPENCL_PTR( clStatus, "clFinish" );
-
- if ( valuesCl != NULL )
- {
- clStatus = clReleaseMemObject( valuesCl );
- CHECK_OPENCL_PTR( clStatus, "clReleaseMemObject" );
- }
- if ( subtractCl != NULL )
- {
... etc. - the rest is truncated
More information about the Libreoffice-commits
mailing list