[Libreoffice-commits] core.git: Branch 'private/kohei/xlsx-import-speedup' - 168 commits - autogen.sh basctl/source basebmp/source basebmp/test basegfx/source basegfx/test basic/qa basic/source bin/get-bugzilla-attachments-by-mimetype bridges/source canvas/source chart2/AllLangResTarget_chartcontroller.mk chart2/source chart2/uiconfig chart2/UIConfig_chart2.mk config_host/config_eot.h.in config_host.mk.in configure.ac connectivity/source cui/source dbaccess/source download.lst external/icu external/libeot external/liblangtag external/libxml2 external/libxslt external/Module_external.mk external/nss extras/source filter/AllLangResTarget_flash.mk filter/Module_filter.mk filter/source filter/uiconfig filter/UIConfig_xsltdlg.mk formula/source fpicker/source framework/source helpcontent2 i18npool/qa i18npool/source icon-themes/galaxy include/formula include/osl include/rtl include/sal include/sfx2 include/tools include/vcl include/xmloff Makefile.fetch offapi/com officecfg/registry oox/Library _oox.mk oox/source postprocess/CustomTarget_registry.mk postprocess/packimages RepositoryExternal.mk sal/rtl sc/inc sc/Library_sc.mk sc/Module_sc.mk scp2/source sc/qa sc/sdi sc/source sc/uiconfig sc/UIConfig_scalc.mk sd/AllLangResTarget_sd.mk sd/inc sd/qa sd/source sd/uiconfig sd/UIConfig_sdraw.mk sfx2/inc sfx2/qa sfx2/source sfx2/uiconfig sfx2/UIConfig_sfx.mk solenv/bin solenv/gbuild starmath/inc starmath/source svtools/source svx/source svx/uiconfig sw/CppunitTest_sw_htmlexport.mk sw/qa sw/source sysui/CustomTarget_share.mk sysui/desktop tools/inc tools/source vcl/aqua vcl/Library_vcl.mk vcl/qa vcl/source writerfilter/inc writerfilter/source xmlhelp/source xmlhelp/util xmloff/inc xmloff/source xmlreader/source xmlscript/source xmlscript/test

Tor Lillqvist tml at collabora.com
Wed Nov 13 11:37:22 PST 2013


Rebased ref, commits from common ancestor:
commit e51ae1c2c69597d751cf184f69192cdcb57ebf48
Author: Tor Lillqvist <tml at collabora.com>
Date:   Wed Nov 13 20:42:19 2013 +0200

    s/printf/SAL_INFO
    
    Change-Id: I2bced1740811d236e724eaf172249a9828575d7a

diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index aa41112..70a9e00 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -207,11 +207,11 @@ std::vector<boost::shared_ptr<osl::File> > OpenclDevice::binaryGenerated( const
             if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
             {
                 aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile));
-                printf("opencl-wrapper: opening binary for reading [%s] success\n", fileName.getStr());
+                SAL_INFO("sc.opencl", "Opening binary file '" << fileName << "' for reading: success");
             }
             else
             {
-                printf("opencl-wrapper: opening binary for reading [%s] fail\n", fileName.getStr());
+                SAL_INFO("sc.opencl", "Opening binary file '" << fileName << "' for reading: FAIL");
                 delete pNewFile;
                 break;
             }
@@ -288,11 +288,9 @@ int OpenclDevice::generatBinFromKernelSource( cl_program program, const char * c
             OString fileName = createFileName(mpArryDevsID[i], clFileName);
             if ( !writeBinaryToFile( fileName,
                         binaries[i], binarySizes[i] ) )
-            {
-                printf("opencl-wrapper: write binary [%s] fail\n", fileName.getStr());
-            }
+                SAL_INFO("sc.opencl", "Writing binary file '" << fileName << "': FAIL");
             else
-                printf("opencl-wrapper: write binary [%s] success\n", fileName.getStr());
+                SAL_INFO("sc.opencl", "Writing binary file '" << fileName << "': success");
         }
     }
 
@@ -351,7 +349,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
     cl_int clStatus;
     //char options[512];
     // create a cl program executable for all the devices specified
-    printf("BuildProgram.\n");
     if (!gpuInfo->mnIsUserCreated)
     {
         clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
@@ -366,7 +363,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
     if ( clStatus != CL_SUCCESS )
     {
         size_t length;
-        printf ("BuildProgram error!\n");
         if ( !gpuInfo->mnIsUserCreated )
         {
             clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
@@ -379,7 +375,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
         }
         if ( clStatus != CL_SUCCESS )
         {
-            printf("opencl create build log fail\n");
             return 0;
         }
 
@@ -396,7 +391,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
         }
         if ( clStatus != CL_SUCCESS )
         {
-            printf("opencl program build info fail\n");
             return false;
         }
 
@@ -465,7 +459,6 @@ bool OpenclDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuIn
 
         cl_int binary_status;
 
-        fprintf(stderr, "Create kernel from binary\n");
         gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
                                            mpArryDevsID.get(), length.get(), (const unsigned char**) pBinary.get(),
                                            &binary_status, &clStatus );
@@ -503,10 +496,8 @@ int OpenclDevice::initOpenclRunEnv( int argc )
         int status = initOpenclRunEnv( &gpuEnv );
         if ( status )
         {
-            printf("init_opencl_env failed.\n");
             return 1;
         }
-        printf("init_opencl_env successed.\n");
         //initialize program, kernelName, kernelCount
         if( getenv( "SC_FLOAT" ) )
         {
@@ -515,15 +506,15 @@ int OpenclDevice::initOpenclRunEnv( int argc )
         }
         if( gpuEnv.mnKhrFp64Flag )
         {
-            printf("----use khr double type in kernel----\n");
+            SAL_INFO("sc.opencl", "Use Khr double");
         }
         else if( gpuEnv.mnAmdFp64Flag )
         {
-            printf("----use amd double type in kernel----\n");
+            SAL_INFO("sc.opencl", "Use AMD double type");
         }
         else
         {
-            printf("----use float type in kernel----\n");
+            SAL_INFO("sc.opencl", "USE float type");
         }
         isInited = 1;
     }
@@ -710,7 +701,6 @@ int OpenclDevice::initOpenclRunEnv( GPUEnv *gpuInfo )
 
 void OpenclDevice::setOpenclState( int state )
 {
-    //printf("OpenclDevice::setOpenclState...\n");
     isInited = state;
 }
 
commit 3b7c9033e8b0f28544468a15bb902c1029c47084
Author: Tor Lillqvist <tml at collabora.com>
Date:   Wed Nov 6 21:21:56 2013 +0200

    We don't use the static OpenCL kernels any more
    
    So remove them and code related only to them.
    
    Change-Id: Ibd09e7a801b9757443b6f87018570ec007e201d5

diff --git a/sc/inc/formulagroup.hxx b/sc/inc/formulagroup.hxx
index fb7692b..7c16364 100644
--- a/sc/inc/formulagroup.hxx
+++ b/sc/inc/formulagroup.hxx
@@ -89,7 +89,6 @@ class SC_DLLPUBLIC FormulaGroupInterpreter
     static FormulaGroupInterpreter *getStatic();
     static void fillOpenCLInfo(std::vector<OpenclPlatformInfo>& rPlatforms);
     static bool switchOpenCLDevice(const OUString& rDeviceId, bool bAutoSelect);
-    static void compileOpenCLKernels();
     static void enableOpenCL(bool bEnable);
 
     virtual ScMatrixRef inverseMatrix(const ScMatrix& rMat) = 0;
diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx
index fbe43c8..81ac09d 100644
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -72,7 +72,7 @@ size_t DynamicKernelArgument::Marshal(cl_kernel k, int argno, int)
     }
     // Obtain cl context
     KernelEnv kEnv;
-    OclCalc::setKernelEnv(&kEnv);
+    OpenclDevice::setKernelEnv(&kEnv);
     cl_int err;
     mpClmem = clCreateBuffer(kEnv.mpkContext,
         (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
@@ -139,7 +139,7 @@ public:
         // marshaling
         // Obtain cl context
         KernelEnv kEnv;
-        OclCalc::setKernelEnv(&kEnv);
+        OpenclDevice::setKernelEnv(&kEnv);
         // Pass the scalar result back to the rest of the formula kernel
         cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode);
         if (CL_SUCCESS != err)
@@ -220,7 +220,7 @@ size_t DynamicKernelStringArgument::Marshal(cl_kernel k, int argno, int)
     assert(mpClmem == NULL);
     // Obtain cl context
     KernelEnv kEnv;
-    OclCalc::setKernelEnv(&kEnv);
+    OpenclDevice::setKernelEnv(&kEnv);
     cl_int err;
     formula::VectorRefArray vRef;
     size_t nStrings = 0;
@@ -1367,7 +1367,7 @@ public:
     {
         // Obtain cl context
         KernelEnv kEnv;
-        OclCalc::setKernelEnv(&kEnv);
+        OpenclDevice::setKernelEnv(&kEnv);
         cl_int err;
         // The results
         mpResClmem = clCreateBuffer(kEnv.mpkContext,
@@ -1424,7 +1424,7 @@ void DynamicKernel::CreateKernel(void)
     // Compile kernel here!!!
     // Obtain cl context
     KernelEnv kEnv;
-    OclCalc::setKernelEnv(&kEnv);
+    OpenclDevice::setKernelEnv(&kEnv);
     const char *src = mFullProgramSrc.c_str();
     if (OpenclDevice::buildProgramFromBinary("",
         &OpenclDevice::gpuEnv,
@@ -1546,7 +1546,7 @@ bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
         mpKernel->CodeGen();
         // Obtain cl context
         KernelEnv kEnv;
-        OclCalc::setKernelEnv(&kEnv);
+        OpenclDevice::setKernelEnv(&kEnv);
         // Compile kernel here!!!
         mpKernel->CreateKernel();
         // Run the kernel.
@@ -1636,11 +1636,6 @@ SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice(
     return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect);
 }
 
-SAL_DLLPUBLIC_EXPORT void compileOpenCLKernels(const OUString* pDeviceId)
-{
-    sc::opencl::compileOpenCLKernels(pDeviceId);
-}
-
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx
deleted file mode 100644
index 3e0af5b..0000000
--- a/sc/source/core/opencl/oclkernels.hxx
+++ /dev/null
@@ -1,390 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-#ifndef SC_OCLKERNELS_HXX
-#define SC_OCLKERNELS_HXX
-
-#ifndef USE_EXTERNAL_KERNEL
-#define KERNEL( ... )# __VA_ARGS__
-
-namespace sc { namespace opencl {
-
-// Double precision is a default of spreadsheets
-// cl_khr_fp64: Khronos extension
-// cl_amd_fp64: AMD extension
-// use build option outside to define fp_t
-/////////////////////////////////////////////
-const char *kernel_src = KERNEL(
-\n#ifdef KHR_DP_EXTENSION\n
-\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n
-\n#elif AMD_DP_EXTENSION\n
-\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n
-\n#else\n
-\n#endif\n
-inline fp_t oclAverage( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray)
-{
-    uint start = startArray[id];
-    uint end = endArray[id];
-    fp_t fSum = 0.0;
-    fp_t zero[16] = {0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f};
-    fp_t16 vSum=vload16(0,zero);
-    fp_t16 ptr;
-    __global fp_t *p = values;
-    p+= start;
-
-    for(int i = 0; i < (end - start + 1)/16; ++i)
-    {
-        ptr=vload16(0,p);
-        vSum += ptr;
-        p+=16;
-    }
-    int lastData = (end-start+1)%16;
-    for(int i = 0; i <lastData; i++)
-    {
-        fSum += *p;
-        p+=1;
-    }
-    vSum.s01234567 = vSum.s01234567+vSum.s89abcdef;
-    vSum.s0123 = vSum.s0123+vSum.s4567;
-    vSum.s01 = vSum.s01+vSum.s23;
-    vSum.s0 = vSum.s0+vSum.s1;
-    fSum = vSum.s0+fSum;
-    fp_t fVal = fSum/(end-start+1);
-    return fVal;
-}
-inline fp_t oclMax( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray)
-{
-    uint start = startArray[id];
-    uint end = endArray[id];
-    fp_t fMax = values[start];
-    fp_t zero[16] = {fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax};
-    fp_t16 vMax=vload16(0,zero);
-    //Max
-    fp_t16 ptr;
-    __global fp_t *p = values;
-    p+= start;
-    for(int i = 0; i < (end - start + 1)/16; ++i)
-    {
-        ptr=vload16(0,p);
-        vMax = fmax(vMax,ptr);
-        p+=16;
-    }
-    int lastData = (end-start+1)%16;
-    for(int i = 0; i <lastData; i++)
-    {
-        fMax = fmax(fMax,*p);
-        p+=1;
-    }
-    vMax.s01234567 = fmax(vMax.s01234567, vMax.s89abcdef);
-    vMax.s0123 = fmax(vMax.s0123, vMax.s4567);
-    vMax.s01 = fmax(vMax.s01, vMax.s23);
-    vMax.s0 = fmax(vMax.s0, vMax.s1);
-    fMax = fmax(vMax.s0, fMax);
-    return fMax;
-}
-inline fp_t oclMin( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray)
-{
-    uint start = startArray[id];
-    uint end = endArray[id];
-    fp_t fMin = values[start];
-    fp_t zero[16] = {fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin};
-    fp_t16 vMin=vload16(0,zero);
-    //Min
-    fp_t16 ptr;
-    __global fp_t *p = values;
-    p+= start;
-    for(int i = 0; i < (end - start + 1)/16; ++i)
-    {
-        ptr=vload16(0,p);
-        vMin = fmin(vMin,ptr);
-        p+=16;
-    }
-    int lastData = (end-start+1)%16;
-    for(int i = 0; i <lastData; i++)
-    {
-        fMin = fmin(fMin,*p);
-        p+=1;
-    }
-    vMin.s01234567 = fmin(vMin.s01234567, vMin.s89abcdef);
-    vMin.s0123 = fmin(vMin.s0123, vMin.s4567);
-    vMin.s01 = fmin(vMin.s01, vMin.s23);
-    vMin.s0 = fmin(vMin.s0, vMin.s1);
-    fMin = fmin(vMin.s0, fMin);
-    return fMin;
-}
-
-__kernel void oclSignedAdd(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
-{
-    const unsigned int id = get_global_id(0);
-    otData[id] = ltData[id] + rtData[id];
-}
-
-__kernel void oclSignedSub(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
-{
-    const unsigned int id = get_global_id(0);
-    otData[id] = ltData[id] - rtData[id];
-}
-
-__kernel void oclSignedMul(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
-{
-    int id = get_global_id(0);
-    otData[id] = ltData[id] * rtData[id];
-}
-
-__kernel void oclSignedDiv(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t divisor = rtData[id];
-    if ( divisor != 0 )
-        otData[id] = ltData[id] / divisor;
-    else
-        otData[id] = 0.0;
-}
-
-__kernel void oclFormulaMin(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fVal = oclMin(id,input,start,end);
-    output[id] = fVal ;
-}
-
-__kernel void oclFormulaMax(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fVal = oclMax(id,input,start,end);
-    output[id] = fVal ;
-}
-//Sum
-__kernel void oclFormulaSum(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
-{
-    const unsigned int nId = get_global_id(0);
-    fp_t fSum = 0.0;
-    for(int i = start[nId]; i<=end[nId]; i++)
-        fSum += input[i];
-    output[nId] = fSum ;
-}
-//Count
-__kernel void oclFormulaCount(__global uint *start,__global uint *end,__global fp_t *output)
-{
-    const unsigned int nId = get_global_id(0);
-    output[nId] = end[nId] - start[nId] + 1;
-}
-
-__kernel void oclFormulaAverage(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fVal = oclAverage(id,input,start,end);
-    output[id] = fVal ;
-
-}
-//Sumproduct
-__kernel void oclFormulaSumproduct(__global fp_t *firstCol,__global uint* npSumSize,__global fp_t *output,uint nMatixSize)
-{
-    const unsigned int id = get_global_id(0);
-    unsigned int nSumSize = npSumSize[id];
-    fp_t fSum = 0.0;
-    for(int i=0;i<nSumSize;i++)
-        fSum += firstCol[i + nMatixSize * id];
-    output[id] = fSum;
-}
-
-__kernel void oclAverageDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-
-    // Average
-    fp_t fSum = 0.0;
-    for(int i = start; i < end; i++)
-        fSum += values[i];
-    fp_t fVal = fSum/(end-start);
-
-    // Subtract & output
-    output[id] = fVal - subtract[id];
-}
-
-__kernel void oclMaxDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-
-    // Max
-    fp_t fMaxVal = values[start];
-    for ( int i = start + 1; i < end; i++ )
-    {
-        if(values[i]>fMaxVal)
-            fMaxVal = values[i];
-    }
-
-    // Subtract & output
-    output[id] = fMaxVal - subtract[id];
-}
-
-__kernel void oclMinDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-
-    // Min
-    fp_t fMinVal = values[start];
-    for ( int i = start + 1; i < end; i++ )
-    {
-        if(values[i]<fMinVal)
-            fMinVal = values[i];
-    }
-
-    // Subtract & output
-    output[id] = fMinVal - subtract[id];
-}
-
-__kernel void oclSubDelta( fp_t ltData, __global fp_t *rtData, __global fp_t *outData )
-{
-    const unsigned int id = get_global_id(0);
-    outData[id] = ltData - rtData[id];
-}
-
-__kernel void oclFormulaMtxInv(__global fp_t * fpMatrixInput, __global fp_t * fpP, int nOffset, int nMax)
-{
-    //get the global id of the workitem
-    int nId = get_global_id(0);
-    int nDimension = get_global_size(0);
-    fp_t dMovebuffer;
-    dMovebuffer = fpMatrixInput[nOffset*nDimension+nId];
-    fpMatrixInput[nOffset*nDimension+nId] = fpMatrixInput[nMax*nDimension+nId];
-    fpMatrixInput[nMax*nDimension+nId] = dMovebuffer;
-
-    dMovebuffer = fpP[nOffset*nDimension+nId];
-    fpP[nOffset*nDimension+nId] = fpP[nMax*nDimension+nId];
-    fpP[nMax*nDimension+nId] = dMovebuffer;
-}
-__kernel void oclMatrixSolve(__global fp_t * fpMatrixInput,__global fp_t * fpMatrixOutput,__global fp_t * fpP,__global fp_t * fpY,__global uint* npDim)
-{
-    int nId = get_global_id(0);
-    int nDimension = npDim[nId];
-    fp_t fsum = 0.0;
-    for ( int yi=0; yi < nDimension; yi++ )
-    {
-        fsum = 0.0;
-        for ( int yj=0; yj < nDimension; yj++ )
-        {
-            fsum += fpMatrixInput[yi*nDimension+yj] * fpY[nId+yj*nDimension];
-        }
-
-        fpY[nId+yi*nDimension] = fpP[yi*nDimension+nId] - fsum;
-    }
-    for ( int xi = nDimension - 1; xi >= 0; xi-- )
-    {
-        fsum = 0.0;
-        for ( int xj = 0; xj < nDimension; xj++ )
-        {
-            fsum += fpMatrixInput[xi*nDimension+xj] * fpMatrixOutput[nId+nDimension*xj];
-        }
-        fpMatrixOutput[nId+xi*nDimension] = (fpY[xi*nDimension+nId] - fsum) / fpMatrixInput[xi*nDimension+xi];
-    }
-}
-
-__kernel void oclAverageAdd(__global fp_t *values,__global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fVal = oclAverage(id,values,startArray,endArray);
-    output[id] = fVal + addend[id];
-}
-
-__kernel void oclAverageSub(__global fp_t *values,__global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fVal = oclAverage(id,values,startArray,endArray);
-    output[id] = fVal - subtract[id];
-}
-
-__kernel void oclAverageMul(__global fp_t *values,__global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fVal = oclAverage(id,values,startArray,endArray);
-    output[id] = fVal * multiplier[id];
-}
-__kernel void oclAverageDiv(__global fp_t *values,__global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fVal = oclAverage(id,values,startArray,endArray);
-    fp_t divisor = div[id];
-    if ( divisor != 0 )
-        output[id] = fVal / divisor;
-    else
-        output[id] = 0.0;
-}
-
-__kernel void oclMinAdd(__global fp_t *values, __global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fMin = oclMin(id,values,startArray,endArray);
-    output[id] = fMin + addend[id];
-}
-
-__kernel void oclMinSub(__global fp_t *values, __global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fMin = oclMin(id,values,startArray,endArray);
-    output[id] = fMin - subtract[id];
-}
-__kernel void oclMinMul(__global fp_t *values, __global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fMin = oclMin(id,values,startArray,endArray);
-    output[id] = fMin * multiplier[id];
-}
-__kernel void oclMinDiv(__global fp_t *values, __global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fMin = oclMin(id,values,startArray,endArray);
-    fp_t divisor = div[id];
-    if ( divisor != 0 )
-        output[id] = fMin / divisor;
-    else
-        output[id] = 0.0;
-}
-__kernel void oclMaxAdd(__global fp_t *values, __global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fMax = oclMax(id,values,startArray,endArray);
-    output[id] = fMax + addend[id];
-}
-
-__kernel void oclMaxSub(__global fp_t *values, __global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fMax = oclMax(id,values,startArray,endArray);
-    output[id] = fMax - subtract[id];
-}
-__kernel void oclMaxMul(__global fp_t *values, __global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fMax = oclMax(id,values,startArray,endArray);
-    output[id] = fMax * multiplier[id];
-}
-__kernel void oclMaxDiv(__global fp_t *values, __global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output)
-{
-    const unsigned int id = get_global_id(0);
-    fp_t fMax = oclMax(id,values,startArray,endArray);
-    fp_t divisor = div[id];
-    if ( divisor != 0 )
-        output[id] = fMax / divisor;
-    else
-        output[id] = 0.0;
-}
-
-__kernel void oclSub( fp_t ltData, __global fp_t *rtData, __global fp_t *outData )
-{
-    const unsigned int id = get_global_id(0);
-    outData[id] = ltData - rtData[id];
-}
-);
-
-}}
-
-#endif // USE_EXTERNAL_KERNEL
-#endif //_OCL_KERNEL_H_
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 61eed49..aa41112 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -19,7 +19,6 @@
 
 #include "sal/config.h"
 #include <osl/file.hxx>
-#include "oclkernels.hxx"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -43,8 +42,6 @@ using namespace std;
 
 namespace sc { namespace opencl {
 
-Kernel::Kernel( const char* pName ) : mpName(pName), mpKernel(NULL) {}
-
 GPUEnv OpenclDevice::gpuEnv;
 int OpenclDevice::isInited =0;
 
@@ -68,12 +65,6 @@ OString generateMD5(const void* pData, size_t length)
     return aBuffer.makeStringAndClear();
 }
 
-OString generateHashForSource()
-{
-    size_t nLength = strlen(kernel_src);
-    return generateMD5(kernel_src, nLength);
-}
-
 OString getCacheFolder()
 {
     OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
@@ -86,6 +77,15 @@ OString getCacheFolder()
 
 void clearCache()
 {
+#if 0
+    // We used to delete all files that did not end with the hash of
+    // the static kernel source string from oclkernels.hxx. But as
+    // those static kernels were not used for anything, it was
+    // pointless, that hash never changed. The static kernels are now
+    // removed, their hash is not part of the .bin file names any
+    // more.  So there is little this function can do until we come up
+    // with some other way to figure out which cached .bin files are
+    // "current".
     OUString aCacheDirURL(rtl::OStringToOUString(OpenclDevice::maCacheFolder, RTL_TEXTENCODING_UTF8));
     osl::Directory aCacheDir(aCacheDirURL);
     osl::FileBase::RC status = aCacheDir.open();
@@ -93,7 +93,6 @@ void clearCache()
         return;
 
     osl::DirectoryItem aItem;
-    OUString aSourceString = rtl::OStringToOUString(OpenclDevice::maSourceHash + ".bin", RTL_TEXTENCODING_UTF8);
     while(osl::FileBase::E_None == aCacheDir.getNextItem(aItem))
     {
         osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileName|osl_FileStatus_Mask_FileURL);
@@ -104,7 +103,7 @@ void clearCache()
         OUString aFileName = aFileStatus.getFileName();
         if(aFileName.endsWith(".bin"))
         {
-            if(!aFileName.endsWith(aSourceString))
+            if ( file is in some way obsolete )
             {
                 // delete the file
                 OUString aFileUrl = aFileStatus.getFileURL();
@@ -112,11 +111,11 @@ void clearCache()
             }
         }
     }
+#endif
 }
 
 }
 
-OString OpenclDevice::maSourceHash = generateHashForSource();
 OString OpenclDevice::maCacheFolder = getCacheFolder();
 
 int OpenclDevice::releaseOpenclRunEnv()
@@ -126,64 +125,11 @@ int OpenclDevice::releaseOpenclRunEnv()
     return 1;
 }
 
-namespace {
-
-const char* pKernelNames[] = {
-
-    "oclFormulaMin",
-    "oclFormulaMax",
-    "oclFormulaSum",
-    "oclFormulaCount",
-    "oclFormulaAverage",
-    "oclFormulaSumproduct",
-    "oclFormulaMtxInv",
-
-    "oclSignedAdd",
-    "oclSignedSub",
-    "oclSignedMul",
-    "oclSignedDiv",
-    "oclAverageDelta",
-    "oclMaxDelta",
-    "oclMinDelta",
-    "oclSubDelta",
-    "oclLUDecomposition",
-    "oclAverageDeltaRPN",
-    "oclMaxDeltaRPN",
-    "oclMinDeltaRPN",
-    "oclMoreColArithmeticOperator",
-    "oclColumnH",
-    "oclColumnL",
-    "oclColumnN",
-    "oclColumnJ",
-    "oclMaxSub",
-    "oclAverageSub",
-    "oclMinSub",
-    "oclMaxAdd",
-    "oclAverageAdd",
-    "oclMinAdd",
-    "oclMaxMul",
-    "oclAverageMul"
-    "oclMinMul",
-    "oclMaxDiv",
-    "oclAverageDiv"
-    "oclMinDiv",
-    "oclSub",
-
-    "oclMatrixSolve"
-};
-
-}
-
 int OpenclDevice::registOpenclKernel()
 {
     if ( !gpuEnv.mnIsUserCreated )
         memset( &gpuEnv, 0, sizeof(gpuEnv) );
 
-    gpuEnv.mnFileCount = 0; //argc;
-
-    for (size_t i = 0, n = SAL_N_ELEMENTS(pKernelNames); i < n; ++i)
-        gpuEnv.maKernels.push_back(Kernel(pKernelNames[i]));
-
     return 0;
 }
 
@@ -196,32 +142,6 @@ int OpenclDevice::setKernelEnv( KernelEnv *envInfo )
     return 1;
 }
 
-Kernel* OpenclDevice::fetchKernel( const char *kernelName )
-{
-    cl_int nStatus;
-    for (size_t i = 0, n = gpuEnv.maKernels.size(); i < n; ++i)
-    {
-        Kernel* pKernel = &gpuEnv.maKernels[i];
-        if (!strcasecmp(kernelName, pKernel->mpName))
-        {
-            printf("found the kernel named %s.\n", kernelName);
-            if (!pKernel->mpKernel && gpuEnv.mpArryPrograms[0])
-            {
-                pKernel->mpKernel = clCreateKernel(gpuEnv.mpArryPrograms[0], kernelName, &nStatus);
-                if (nStatus != CL_SUCCESS)
-                    pKernel->mpKernel = NULL;
-
-                printf("Kernel named '%s' has been compiled\n", kernelName);
-            }
-
-            return pKernel->mpKernel ?  pKernel : NULL;
-        }
-    }
-
-    printf("No kernel named %s found.\n", kernelName);
-    return NULL;
-}
-
 namespace {
 
 OString createFileName(cl_device_id deviceId, const char* clFileName)
@@ -252,7 +172,7 @@ OString createFileName(cl_device_id deviceId, const char* clFileName)
     OString aHash = generateMD5(aString.getStr(), aString.getLength());
 
     return OpenclDevice::maCacheFolder + fileName + "-" +
-        aHash + "-" + OpenclDevice::maSourceHash + ".bin";
+        aHash + ".bin";
 }
 
 }
@@ -402,27 +322,11 @@ int OpenclDevice::initOpenclAttr( OpenCLEnv * env )
 
 int OpenclDevice::releaseOpenclEnv( GPUEnv *gpuInfo )
 {
-    int clStatus = 0;
-
     if ( !isInited )
     {
         return 1;
     }
 
-    // Release all cached kernels.
-    for (size_t i = 0, n = gpuInfo->maKernels.size(); i < n; ++i)
-        clReleaseKernel(gpuInfo->maKernels[i].mpKernel);
-    gpuInfo->maKernels.clear();
-
-    for (int i = 0; i < gpuEnv.mnFileCount; i++)
-    {
-        if ( gpuEnv.mpArryPrograms[i] )
-        {
-            clStatus = clReleaseProgram( gpuEnv.mpArryPrograms[i] );
-            CHECK_OPENCL( clStatus, "clReleaseProgram" );
-            gpuEnv.mpArryPrograms[i] = NULL;
-        }
-    }
     if ( gpuEnv.mpCmdQueue )
     {
         clReleaseCommandQueue( gpuEnv.mpCmdQueue );
@@ -440,23 +344,6 @@ int OpenclDevice::releaseOpenclEnv( GPUEnv *gpuInfo )
     return 1;
 }
 
-int OpenclDevice::cachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName )
-{
-    int i;
-    for ( i = 0; i < gpuEnvCached->mnFileCount; i++ )
-    {
-        if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 )
-        {
-            if ( gpuEnvCached->mpArryPrograms[i] != NULL )
-            {
-                return 1;
-            }
-        }
-    }
-
-    return 0;
-}
-
 namespace {
 
 bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
@@ -532,25 +419,6 @@ bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
 
 }
 
-bool OpenclDevice::buildProgramFromSource(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
-{
-    cl_int clStatus = 0;
-    // create a CL program using the kernel source
-    fprintf(stderr, "Create kernel from source\n");
-    size_t source_size[1];
-
-    source_size[0] = strlen( kernel_src );
-    gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( gpuInfo->mpContext, 1, &kernel_src,
-            source_size, &clStatus);
-
-    if(clStatus != CL_SUCCESS)
-        return false;
-
-    bool bSuccess = buildProgram(buildOption, gpuInfo, idx);
-    generatBinFromKernelSource( gpuInfo->mpArryPrograms[idx], filename );
-    return bSuccess;
-}
-
 bool OpenclDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
 {
     size_t numDevices;
@@ -851,1706 +719,6 @@ int OpenclDevice::getOpenclState()
     return isInited;
 }
 
-OclCalc::OclCalc()
-{
-    fprintf(stderr,"OclCalc:: init opencl ...\n");
-    nFormulaColSize = 0;
-    nFormulaRowSize = 0;
-    nArithmeticLen = 0;
-    nFormulaLen = 0;
-    mpClmemSrcData = NULL;
-    mpClmemStartPos = NULL;
-    mpClmemEndPos = NULL;
-    mpClmemLeftData = NULL;
-    mpClmemRightData = NULL;
-    mpClmemMergeLfData = NULL;
-    mpClmemMergeRtData = NULL;
-    mpClmemMatixSumSize = NULL;
-    mpClmemeOp = NULL;
-}
-
-OclCalc::~OclCalc()
-{
-    releaseOclBuffer();
-}
-
-void OclCalc::releaseOclBuffer()
-{
-    cl_int clStatus = 0;
-    CHECK_OPENCL_RELEASE( clStatus, mpClmemSrcData );
-    CHECK_OPENCL_RELEASE( clStatus, mpClmemStartPos );
-    CHECK_OPENCL_RELEASE( clStatus, mpClmemEndPos );
-    CHECK_OPENCL_RELEASE( clStatus, mpClmemLeftData );
-    CHECK_OPENCL_RELEASE( clStatus, mpClmemRightData );
-    fprintf(stderr,"OclCalc:: opencl end ...\n");
-}
-
-/////////////////////////////////////////////////////////////////////////////
-
-bool OclCalc::createBuffer64Bits( double *&dpLeftData, double *&dpRightData, int nBufferSize )
-{
-    cl_int clStatus = 0;
-    setKernelEnv( &kEnv );
-
-    mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ),
-                          nBufferSize * sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR ),
-                           nBufferSize * sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus,"clCreateBuffer" );
-    dpLeftData = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue,mpClmemLeftData,CL_TRUE,CL_MAP_WRITE,0,
-                               nBufferSize * sizeof(double),0,NULL,NULL,&clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clFinish(kEnv.mpkCmdQueue);
-    dpRightData = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue,mpClmemRightData,CL_TRUE,CL_MAP_WRITE,0,
-                                nBufferSize * sizeof(double),0,NULL,NULL,&clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos);
-    return true;
-}
-
-bool OclCalc::mapAndCopy64Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize)
-{
-    cl_int clStatus = 0;
-    double * dpSrcDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemSrcData, CL_TRUE,CL_MAP_WRITE, 0,
-                                          nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    unsigned int *npStartPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemStartPos, CL_TRUE,CL_MAP_WRITE, 0,
-                                              nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    unsigned int *npEndPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemEndPos, CL_TRUE, CL_MAP_WRITE, 0,
-                                            nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for(int i=0;i<nBufferSize;i++)
-        dpSrcDataMap[i] = dpTempSrcData[i];
-    for(int i=0;i<nRowsize;i++)
-    {
-        npStartPosMap[i] = unStartPos[i];
-        npEndPosMap[i] = unEndPos[i];
-    }
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemSrcData, dpSrcDataMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPosMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPosMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::mapAndCopy64Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize )
-{
-    cl_int clStatus = 0;
-    double *dpLeftDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
-        0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    double *dpRightDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemRightData, CL_TRUE, CL_MAP_WRITE,
-        0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < nBufferSize; i++ )
-    {
-        dpLeftDataMap[i] = dpTempLeftData[i];
-        dpRightDataMap[i] = dpTempRightData[i];
-    }
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemRightData, dpRightDataMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::mapAndCopyArithmetic64Bits( const double *dpMoreColArithmetic, int nBufferSize )
-{
-    cl_int clStatus = 0;
-    double *dpLeftDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
-        0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < nBufferSize; i++ )
-    {
-        dpLeftDataMap[i] = dpMoreColArithmetic[i];
-    }
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::mapAndCopyMoreColArithmetic64Bits( const double *dpMoreColArithmetic, int nBufferSize, uint *npeOp, uint neOpSize )
-{
-    cl_int clStatus = 0;
-    double *dpLeftDataMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
-        0, nBufferSize * sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    uint *dpeOpMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemeOp, CL_TRUE, CL_MAP_WRITE,
-        0, neOpSize * sizeof(uint), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < nBufferSize; i++ )
-    {
-        dpLeftDataMap[i] = dpMoreColArithmetic[i];
-    }
-    for( uint i = 0; i<neOpSize; i++)
-    {
-        dpeOpMap[i] = npeOp[i];
-    }
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemeOp, dpeOpMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::createFormulaBuf64Bits( int nBufferSize, int rowSize )
-{
-    cl_int clStatus = 0;
-    setKernelEnv( &kEnv );
-    mpClmemSrcData   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        nBufferSize * sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    nFormulaLen = nBufferSize;
-    mpClmemStartPos   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        rowSize * sizeof(unsigned int), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    mpClmemEndPos   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        rowSize * sizeof(unsigned int), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::createArithmeticOptBuf64Bits( int nBufferSize )
-{
-    cl_int clStatus = 0;
-    nArithmeticLen = nBufferSize;
-    setKernelEnv( &kEnv );
-    mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        nBufferSize * sizeof(double), NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        nBufferSize * sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::createMoreColArithmeticBuf64Bits( int nBufferSize, int neOpSize )
-{
-    cl_int clStatus = 0;
-    nArithmeticLen = nBufferSize;
-    setKernelEnv( &kEnv );
-    mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        nBufferSize * sizeof(double), NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    mpClmemeOp = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        neOpSize * sizeof(uint), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::oclHostArithmeticOperator64Bits( const char* aKernelName, double *&rResult,int nRowSize )
-{
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    clFinish( kEnv.mpkCmdQueue );
-    cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE,
-                          nRowSize * sizeof(double), NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-
-    global_work_size[0] = nRowSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-
-    double *dpOutPut = (double *) clEnqueueMapBuffer( kEnv.mpkCmdQueue, clResult, CL_TRUE,CL_MAP_READ,
-                            0, nRowSize*sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    for ( int i = 0; i < nRowSize; i++ )
-        rResult[i] = dpOutPut[i];
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, rResult, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-
-    clStatus = clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( clResult );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-bool OclCalc::oclMoreColHostArithmeticOperator64Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize )
-{
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-    const char *aKernelName = "oclMoreColArithmeticOperator";
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_int), (void *)&nDataSize);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemeOp);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_int), (void *)&neOpSize);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = nRowSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    double * hostMapResult = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clResult, CL_TRUE, CL_MAP_READ, 0,
-        nRowSize*sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < nRowSize; i++)
-        rResult[i] = hostMapResult[i]; // from gpu float type to cpu double type
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, hostMapResult, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clFinish(kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( clResult );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-bool OclCalc::oclHostArithmeticStash64Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize )
-{
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-    setKernelEnv( &kEnv );
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    clFinish( kEnv.mpkCmdQueue );
-
-    cl_mem clLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
-    nRowSize * sizeof(double), (void *)dpLeftData, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    cl_mem clRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
-        nRowSize * sizeof(double), (void *)dpRightData, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
-    cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE,
-                          nRowSize * sizeof(double), NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&clLeftData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clRightData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-
-    global_work_size[0] = nRowSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-
-    clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, clResult, CL_TRUE, 0, nRowSize * sizeof(double), (double *)rResult, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" );
-
-    clStatus = clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( clResult );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clLeftData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clRightData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-bool OclCalc::oclHostFormulaStash64Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size )
-{
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-    setKernelEnv( &kEnv );
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    cl_mem clSrcData   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
-        nBufferSize * sizeof(double), (void *)dpSrcData, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-     cl_mem clStartPos   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
-        size * sizeof(unsigned int), (void *)nStartPos, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    cl_mem clEndPos   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
-        size * sizeof(unsigned int), (void *)nEndPos, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
-    cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&clSrcData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clStartPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clEndPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = size;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-
-    clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, outputCl, CL_TRUE, 0, size * sizeof(double), (double *)output, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clReadBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clFinish(kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( outputCl );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clSrcData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clStartPos );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clEndPos );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-bool OclCalc::oclHostFormulaStatistics64Bits( const char* aKernelName, double *&output, int size )
-{
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&mpClmemSrcData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemStartPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemEndPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = size;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    double *dpOutPut = (double *) clEnqueueMapBuffer( kEnv.mpkCmdQueue, outputCl, CL_TRUE,CL_MAP_READ,
-                            0, size*sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    for ( int i = 0; i < size; i++ )
-    {
-        output[i] = dpOutPut[i];
-    }
-
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, outputCl, output, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clFinish(kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( outputCl );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-bool OclCalc::oclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize )
-{
-    const char *cpKernelName = "oclFormulaCount";
-    Kernel* pKernel = fetchKernel(cpKernelName);
-    if (!pKernel)
-        return false;
-
-    cl_int clStatus;
-
-    size_t global_work_size[1];
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPos, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
-        nSize* sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemStartPos);
-    CHECK_OPENCL( clStatus,"clSetKernelArg");
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemEndPos);
-    CHECK_OPENCL( clStatus,"clSetKernelArg");
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = nSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    dpOutput = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, CL_MAP_READ,
-        0, nSize*sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, dpOutput, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( clpOutput );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-/*
- * The dpsrcData is double rows,one col is the first column data,second is the second column data.if get a cell data range,the range
- *save the npStart array eg:a4-a8;b10-b14,the npStart will store a4,b10,and the npEnd will store a8,b14 range.So it can if(i +1)%2 to judge
- * the a cloumn or b cloumn npStart range.so as b bolumn.
- */
-bool OclCalc::oclHostFormulaSumProduct64Bits( double *dpSumProMergeLfData, double *dpSumProMergeRrData, uint *npSumSize, double *&dpOutput, int nSize )
-{
-    cl_int clStatus;
-    size_t global_work_size[1];
-    memset(dpOutput,0,nSize);
-    const char *cpFirstKernelName = "oclSignedMul";
-    const char *cpSecondKernelName = "oclFormulaSumproduct";
-    Kernel* pKernel1 = fetchKernel(cpFirstKernelName);
-    if (!pKernel1)
-        return false;
-
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, dpSumProMergeLfData, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish(kEnv.mpkCmdQueue);
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeRtData, dpSumProMergeRrData, 0, NULL, NULL );
-    clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMatixSumSize, npSumSize, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    unsigned int nMulResultSize = nFormulaRowSize + nFormulaRowSize * nSize * nFormulaColSize - 1;
-    cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nMulResultSize * sizeof(double),
-        NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel1->mpKernel, 0, sizeof(cl_mem),(void *)&mpClmemMergeLfData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel1->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMergeRtData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel1->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = nMulResultSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel1->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clReleaseMemObject( mpClmemMergeLfData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( mpClmemMergeRtData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
-    Kernel* pKernel2 = fetchKernel(cpSecondKernelName);
-    if (!pKernel2)
-        return false;
-
-    cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize;
-    clStatus = clSetKernelArg(pKernel2->mpKernel, 0, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel2->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMatixSumSize);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel2->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel2->mpKernel, 3, sizeof(cl_uint), (void *)&nMatixSize);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = nSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel2->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    double * outputMap = (double *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, CL_MAP_READ,
-        0, nSize*sizeof(double), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < nSize; i++ )
-        dpOutput[i] = outputMap[i];
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, outputMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( clResult );
-    CHECK_OPENCL( clStatus, "clReleaseKernel" );
-    clStatus = clReleaseMemObject( mpClmemMatixSumSize );
-    CHECK_OPENCL( clStatus, "clReleaseKernel" );
-    clStatus = clReleaseMemObject( clpOutput );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
-    return true;
-}
-
-bool OclCalc::createMoreColArithmeticBuf32Bits( int nBufferSize, int neOpSize )
-{
-    cl_int clStatus = 0;
-    nArithmeticLen = nBufferSize;
-    setKernelEnv( &kEnv );
-    mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        nBufferSize * sizeof(float), NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    mpClmemeOp = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        neOpSize * sizeof(uint), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::createArithmeticOptBuf32Bits( int nBufferSize )
-{
-    cl_int clStatus = 0;
-    setKernelEnv( &kEnv );
-    nArithmeticLen = nBufferSize;
-    mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        nBufferSize * sizeof(float), NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        nBufferSize * sizeof(float), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::createFormulaBuf32Bits( int nBufferSize, int rowSize )
-{
-    cl_int clStatus = 0;
-    setKernelEnv( &kEnv );
-    nFormulaLen = nBufferSize;
-
-    mpClmemSrcData    = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        nBufferSize * sizeof(float), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
-    mpClmemStartPos    = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        rowSize * sizeof(unsigned int), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    mpClmemEndPos    = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        rowSize * sizeof(unsigned int), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::createBuffer32Bits( float *&fpLeftData, float *&fpRightData, int nBufferSize )
-{
-    cl_int clStatus = 0;
-    setKernelEnv( &kEnv );
-    mpClmemLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
-            nBufferSize * sizeof(float), NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    mpClmemRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
-        nBufferSize * sizeof(unsigned int), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    fpLeftData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE,CL_MAP_WRITE_INVALIDATE_REGION,
-        0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    fpRightData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemRightData, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION,
-        0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    //printf("In CreateBuffer, pptrr is %d,%d,%d\n",fpSrcData,npStartPos,npEndPos);
-    return true;
-}
-
-bool OclCalc::mapAndCopy32Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize)
-{
-    cl_int clStatus = 0;
-    float *fpSrcData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemSrcData, CL_TRUE, CL_MAP_WRITE, 0,
-            nBufferSize * sizeof(float) , 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    unsigned int *npStartPos = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemStartPos, CL_TRUE, CL_MAP_WRITE, 0,
-            nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    unsigned int *npEndPos = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemEndPos, CL_TRUE, CL_MAP_WRITE, 0,
-            nRowsize * sizeof(uint), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for(int i=0;i<nBufferSize;i++)
-    {
-        fpSrcData[i] = (float)dpTempSrcData[i];
-    }
-    for(int i=0;i<nRowsize;i++)
-    {
-        npStartPos[i] = unStartPos[i];
-        npEndPos[i] = unEndPos[i];
-    }
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemSrcData, fpSrcData, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPos, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::mapAndCopy32Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize )
-{
-    cl_int clStatus = 0;
-    float *fpLeftData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
-        0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    float *fpRightData = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemRightData, CL_TRUE, CL_MAP_WRITE,
-        0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for(int i=0;i<nBufferSize;i++)
-    {
-        fpLeftData[i] = (float)dpTempLeftData[i];
-        fpRightData[i] = (float)dpTempRightData[i];
-    }
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, fpLeftData, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemRightData, fpRightData, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::mapAndCopyArithmetic32Bits( const double *dpMoreColArithmetic, int nBufferSize )
-{
-    cl_int clStatus = 0;
-    float *dpLeftDataMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
-        0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < nBufferSize; i++ )
-    {
-        dpLeftDataMap[i] = (float)dpMoreColArithmetic[i];
-    }
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, dpLeftDataMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::mapAndCopyMoreColArithmetic32Bits( const double *dpMoreColArithmetic, int nBufferSize, uint *npeOp, uint neOpSize )
-{
-    cl_int clStatus = 0;
-    float *fpLeftDataMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemLeftData, CL_TRUE, CL_MAP_WRITE,
-        0, nBufferSize * sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    uint *dpeOpMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, mpClmemeOp, CL_TRUE, CL_MAP_WRITE,
-        0, neOpSize * sizeof(uint), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < nBufferSize; i++ )
-    {
-        fpLeftDataMap[i] = (float)dpMoreColArithmetic[i];
-    }
-    for( uint i = 0; i<neOpSize; i++ )
-    {
-        dpeOpMap[i] = npeOp[i];
-    }
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemLeftData, fpLeftDataMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemeOp, dpeOpMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    return true;
-}
-
-bool OclCalc::oclHostArithmeticOperator32Bits( const char* aKernelName,double *rResult, int nRowSize )
-{
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemRightData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = nRowSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    float * hostMapResult = (float *)clEnqueueMapBuffer(
-        kEnv.mpkCmdQueue, clResult, CL_TRUE, CL_MAP_READ, 0, nRowSize*sizeof(float), 0, NULL, NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < nRowSize; i++)
-        rResult[i] = hostMapResult[i]; // from gpu float type to cpu double type
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, hostMapResult, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clFinish(kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( clResult );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
-    return true;
-}
-
-bool OclCalc::oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize )
-{
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-    const char *aKernelName = "oclMoreColArithmeticOperator";
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nRowSize * sizeof(float), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemLeftData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_int), (void *)&nDataSize);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemeOp);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_int), (void *)&neOpSize);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = nRowSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    float * hostMapResult = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clResult, CL_TRUE, CL_MAP_READ, 0,
-        nRowSize*sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < nRowSize; i++)
-        rResult[i] = hostMapResult[i]; // from gpu float type to cpu double type
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clResult, hostMapResult, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clFinish(kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( clResult );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
-    return true;
-}
-
-bool OclCalc::oclHostFormulaStatistics32Bits(const char* aKernelName,double *output,int size)
-{
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-
-    cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(float), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemSrcData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemStartPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&mpClmemEndPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = size;
-    clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    float * outputMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ,
-        0, size*sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for ( int i = 0; i < size; i++ )
-        output[i] = outputMap[i]; // from gpu float type to cpu double type
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, outputCl, outputMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( outputCl );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-bool OclCalc::oclHostArithmeticStash32Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize )
-{
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-    setKernelEnv( &kEnv );
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    float *fpLeftData = (float *)malloc( sizeof(float) * nRowSize );
-    float *fpRightData = (float *)malloc( sizeof(float) * nRowSize );
-    float *fpResult = (float *)malloc( sizeof(float) * nRowSize );
-    for(int i=0;i<nRowSize;i++)
-    {
-        fpLeftData[i] = (float)dpLeftData[i];
-        fpRightData[i] = (float)dpRightData[i];
-    }
-    cl_mem clLeftData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
-        nRowSize * sizeof(float), (void *)fpLeftData, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    cl_mem clRightData = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR),
-        nRowSize * sizeof(float), (void *)fpRightData, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
-    cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE,
-                          nRowSize * sizeof(float), NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
-    clFinish( kEnv.mpkCmdQueue );
-
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&clLeftData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clRightData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-
-    global_work_size[0] = nRowSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-
-    clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, clResult, CL_TRUE, 0, nRowSize * sizeof(float), (float *)fpResult, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" );
-    for(int i=0;i<nRowSize;i++)
-        rResult[i] = (double)fpResult[i];
-    if(fpResult)
-    {
-        free(fpResult);
-        fpResult = NULL;
-    }
-    clStatus = clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( clResult );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clLeftData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clRightData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-bool OclCalc::oclHostFormulaStash32Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size )
-{
-    cl_int clStatus = 0;
-    size_t global_work_size[1];
-    setKernelEnv( &kEnv );
-    Kernel* pKernel = fetchKernel(aKernelName);
-    if (!pKernel)
-        return false;
-
-    float *fpSrcData = (float *)malloc( sizeof(float) * nBufferSize );
-    float *fpResult = (float *)malloc( sizeof(float) * size );
-    for(int i=0;i<nBufferSize;i++)
-        fpSrcData[i] = (float)dpSrcData[i];
-    cl_mem clSrcData   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_COPY_HOST_PTR),
-        nBufferSize * sizeof(float), (void *)fpSrcData, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-     cl_mem clStartPos   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_COPY_HOST_PTR),
-        size * sizeof(unsigned int), (void *)nStartPos, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    cl_mem clEndPos   = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_COPY_HOST_PTR),
-        size * sizeof(unsigned int), (void *)nEndPos, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
-    cl_mem outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, size * sizeof(double), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem),(void *)&clSrcData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&clStartPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clEndPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&outputCl);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = size;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-
-    clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue, outputCl, CL_TRUE, 0, size * sizeof(float), (double *)fpResult, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clReadBuffer" );
-    for(int i = 0;i<size;i++)
-        output[i] = (float)fpResult[i];
-    clFinish( kEnv.mpkCmdQueue );
-    if(fpResult)
-    {
-        free(fpResult);
-        fpResult = NULL;
-    }
-    clStatus = clFinish(kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( outputCl );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clSrcData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clStartPos );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clEndPos );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-bool OclCalc::oclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize )
-{
-    const char *cpKernelName = "oclFormulaCount";
-    Kernel* pKernel = fetchKernel(cpKernelName);
-    if (!pKernel)
-        return false;
-
-    cl_int clStatus;
-    size_t global_work_size[1];
-
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemStartPos, npStartPos, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemEndPos, npEndPos, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
-        nSize* sizeof(float), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemStartPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemEndPos);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput);
-    CHECK_OPENCL(clStatus, "clSetKernelArg");
-    global_work_size[0] = nSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    float * fpOutputMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE,
-        CL_MAP_READ, 0, nSize*sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for (int i = 0; i < nSize; i++ )
-        dpOutput[i] = fpOutputMap[i];// from gpu float type to cpu double type
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, fpOutputMap, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clFinish(kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject(mpClmemSrcData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( mpClmemStartPos );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( mpClmemEndPos );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    clStatus = clReleaseMemObject( clpOutput );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-    return true;
-}
-
-//sumproduct
-bool OclCalc::oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float *fpSumProMergeRrData, uint *npSumSize, double *dpOutput, int nSize )
-{
-    cl_int clStatus;
-    size_t global_work_size[1];
-    memset(dpOutput,0,nSize);
-    const char *cpFirstKernelName = "oclSignedMul";
-    const char *cpSecondKernelName = "oclFormulaSumproduct";
-    Kernel* pKernel1 = fetchKernel(cpFirstKernelName);
-    if (!pKernel1)
-        return false;
-
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeLfData, fpSumProMergeLfData, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMergeRtData, fpSumProMergeRrData, 0, NULL, NULL );
-    clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, mpClmemMatixSumSize, npSumSize, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-    clFinish( kEnv.mpkCmdQueue );
-    unsigned int nMulResultSize = nFormulaRowSize +  nFormulaRowSize * nSize * nFormulaColSize - 1;
-    cl_mem clResult = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nMulResultSize * sizeof(float), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    clStatus = clSetKernelArg(pKernel1->mpKernel, 0, sizeof(cl_mem), (void *)&mpClmemMergeLfData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel1->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMergeRtData);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel1->mpKernel, 2, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = nMulResultSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel1->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    clStatus = clReleaseMemObject( mpClmemMergeLfData );
-    CHECK_OPENCL( clStatus,"clReleaseMemObject" );
-    clStatus = clReleaseMemObject( mpClmemMergeRtData );
-    CHECK_OPENCL( clStatus, "clReleaseMemObject" );
-
-    Kernel* pKernel2 = fetchKernel(cpSecondKernelName);
-    if (!pKernel2)
-        return false;
-
-    cl_mem clpOutput = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nSize* sizeof(float), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer" );
-    cl_uint nMatixSize = nFormulaColSize * nFormulaRowSize;
-    clStatus = clSetKernelArg(pKernel2->mpKernel, 0, sizeof(cl_mem), (void *)&clResult);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel2->mpKernel, 1, sizeof(cl_mem), (void *)&mpClmemMatixSumSize);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel2->mpKernel, 2, sizeof(cl_mem), (void *)&clpOutput);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg(pKernel2->mpKernel, 3, sizeof(cl_uint), (void *)&nMatixSize);
-    CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    global_work_size[0] = nSize;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel2->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-    float * fpOutputMap = (float *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, clpOutput, CL_TRUE, CL_MAP_READ, 0,
-        nSize*sizeof(float), 0, NULL, NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-    clFinish( kEnv.mpkCmdQueue );
-    for (int i = 0; i < nSize; i++ )
-    {
-        dpOutput[i] = fpOutputMap[i]; // from gpu float type to cpu double type
-    }
-    clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, clpOutput, fpOutputMap, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-
-    clStatus = clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-    clStatus = clReleaseMemObject( clResult );
-    CHECK_OPENCL( clStatus, "clReleaseKernel" );
-    clStatus = clReleaseMemObject( mpClmemMatixSumSize );
-    CHECK_OPENCL( clStatus, "clReleaseKernel" );
-    clStatus = clReleaseMemObject( clpOutput );
-    CHECK_OPENCL( clStatus, "clReleaseKernel" );
-    return true;
-}
-
-
-// FIXME: should be templatised in <double> - double buffering [sic] rocks
-static cl_mem allocateDoubleBuffer( KernelEnv &rEnv, const double *_pValues, size_t nElements, cl_int *pStatus )
-{
-    // Ugh - horrible redundant copying ...
-    cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
-        nElements * sizeof(double), NULL, pStatus);
-    double *pValues = (double *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
-        nElements * sizeof(double), 0, NULL, NULL, NULL);
-    clFinish(rEnv.mpkCmdQueue);
-    for ( int i = 0; i < (int)nElements; i++ )
-        pValues[i] = _pValues[i];
-    clEnqueueUnmapMemObject( rEnv.mpkCmdQueue, xValues, pValues, 0, NULL, NULL );
-    clFinish( rEnv.mpkCmdQueue );
-    return xValues;
-}
-
-static cl_mem allocateFloatBuffer( KernelEnv &rEnv, const double *_pValues, size_t nElements, cl_int *pStatus )
-{
-    // Ugh - horrible redundant copying ...
-    cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
-        nElements * sizeof(float), NULL, pStatus);
-    float *pValues = (float *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
-        nElements * sizeof(float), 0, NULL, NULL, NULL );
-    clFinish( rEnv.mpkCmdQueue );
-    for ( int i = 0; i < (int)nElements; i++ )
-        pValues[i] = (float)_pValues[i];
-
-    clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
-    clFinish( rEnv.mpkCmdQueue );
-    return xValues;
-}
-
-bool OclCalc::oclGroundWaterGroup( uint *eOp, uint eOpNum, const double *pOpArray, const double *pSubtractSingle, size_t nSrcDataSize,size_t nElements, double del ,uint *nStartPos,uint *nEndPos,double *dpResult)
-{
-    setKernelEnv( &kEnv );
-
-    char kernelName[256] = "";
-    double delta = del;
-    bool subFlag = false;
-    strcat(kernelName,"ocl");
-    for ( size_t i = 0; i < eOpNum; i++ )
-    {
-        switch ( eOp[i] )
-        {
-        case ocAdd:
-            strcat(kernelName,"Add");
-            break;
-        case ocSub:
-            strcat(kernelName,"Sub");
-            break;
-        case ocMul:
-            strcat(kernelName,"Mul");
-            break;
-        case ocDiv:
-            strcat(kernelName,"Div");
-            break;
-        case ocMax:
-            strcat(kernelName,"Max");
-            break;
-        case ocMin:
-            strcat(kernelName,"Min");
-            break;
-        case ocAverage:
-            strcat(kernelName,"Average");
-            break;
-        default:
-            assert( false );
-            break;
-        }
-    }
-    Kernel* pKernel = fetchKernel(kernelName);
-    if (!pKernel)
-        return false;
-
-    cl_int clStatus;
-    size_t global_work_size[1];
-    if ( ( eOpNum == 1 ) && ( eOp[0] == ocSub ) )
-        subFlag = true;
-
-    cl_mem valuesCl = NULL, subtractCl = NULL, outputCl = NULL, startPosCL = NULL, endPosCL = NULL;
-
-    if(!subFlag)
-    {
-        startPosCL = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
-            nElements * sizeof(unsigned int), NULL, &clStatus );
-        CHECK_OPENCL( clStatus, "clCreateBuffer" );
-        endPosCL = clCreateBuffer( kEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR),
-            nElements * sizeof(unsigned int), NULL, &clStatus );
-        CHECK_OPENCL( clStatus, "clCreateBuffer" );
-        unsigned int *npStartPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, startPosCL, CL_TRUE, CL_MAP_WRITE, 0,
-            nElements * sizeof(uint), 0, NULL, NULL, &clStatus );
-        CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-        clFinish( kEnv.mpkCmdQueue );
-        unsigned int *npEndPosMap = (uint *)clEnqueueMapBuffer( kEnv.mpkCmdQueue, endPosCL, CL_TRUE, CL_MAP_WRITE, 0,
-            nElements * sizeof(uint), 0, NULL, NULL, &clStatus );
-        CHECK_OPENCL( clStatus, "clEnqueueMapBuffer" );
-        clFinish( kEnv.mpkCmdQueue );
-
-        for(uint i=0;i<nElements;i++)
-        {
-            npStartPosMap[i]=nStartPos[i];
-            npEndPosMap[i]=nEndPos[i];
-        }
-        clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, startPosCL, npStartPosMap, 0, NULL, NULL );
-        CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-        clFinish( kEnv.mpkCmdQueue );
-        clStatus = clEnqueueUnmapMemObject( kEnv.mpkCmdQueue, endPosCL, npEndPosMap, 0, NULL, NULL );
-        CHECK_OPENCL( clStatus, "clEnqueueUnmapMemObject" );
-
-        if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
-        {
-            valuesCl = allocateDoubleBuffer( kEnv, pOpArray, nSrcDataSize, &clStatus );
-            subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
-            outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE, nElements * sizeof(double), NULL, &clStatus );
-        }
-        else
-        {
-            valuesCl = allocateFloatBuffer( kEnv, pOpArray, nSrcDataSize, &clStatus );
-            subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
-            outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE , nElements * sizeof(float), NULL, &clStatus);
-        }
-        CHECK_OPENCL( clStatus, "clCreateBuffer" );
-
-        clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&valuesCl);
-        CHECK_OPENCL( clStatus, "clSetKernelArg");
-        clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl);
-        CHECK_OPENCL( clStatus, "clSetKernelArg");
-        clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&startPosCL);
-        CHECK_OPENCL( clStatus, "clSetKernelArg" );
-        clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_mem), (void *)&endPosCL);
-        CHECK_OPENCL( clStatus, "clSetKernelArg" );
-        clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&outputCl);
-        CHECK_OPENCL( clStatus, "clSetKernelArg" );
-
-        fprintf( stderr, "prior to enqueue range kernel\n" );
-    }
-    else
-    {
-        if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
-        {
-             subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
-             outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(double), NULL, &clStatus );
-             clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_double), (void *)&delta);
-             CHECK_OPENCL( clStatus, "clSetKernelArg");
-        }
-        else
-        {
-             float fTmp = (float)delta;
-             subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
-             outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(float), NULL, &clStatus );
-             clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_float), (void *)&fTmp);
-             CHECK_OPENCL( clStatus, "clSetKernelArg");
-        }
-        clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl);
-        CHECK_OPENCL( clStatus, "clSetKernelArg");
-        clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&outputCl);
-        CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    }
-    global_work_size[0] = nElements;
-    clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-
-    if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
-    {
-        clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue,
-                                       outputCl,
-                                       CL_TRUE,0,
-                                       nElements * sizeof(double),
-                                       (void *)dpResult,0,NULL,NULL);
-        CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" );
-        clFinish( kEnv.mpkCmdQueue );
-    }
-    else
-    {
-        float *afBuffer = new float[nElements];
-        if ( !afBuffer )
-            return false;
-        clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue,
-                                       outputCl,
-                                       CL_TRUE,0,
-                                       nElements * sizeof(float),
-                                       (void *)afBuffer,0,NULL,NULL);
-        CHECK_OPENCL( clStatus, "clEnqueueReadBuffer" );
-        clFinish( kEnv.mpkCmdQueue );
-        for ( size_t i = 0; i < nElements; i++ )
-        {
-            dpResult[i] = (double)afBuffer[i];
-        }
-        delete [] afBuffer;
-    }
-
-    clStatus = clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL( clStatus, "clFinish" );
-
-    CHECK_OPENCL_RELEASE( clStatus, valuesCl );
-    CHECK_OPENCL_RELEASE( clStatus, subtractCl );
-    CHECK_OPENCL_RELEASE( clStatus, outputCl );
-    CHECK_OPENCL_RELEASE( clStatus, startPosCL );
-    CHECK_OPENCL_RELEASE( clStatus, endPosCL );
-
-    fprintf( stderr, "completed opencl operation\n" );
-
-    return true;
-}
-double *OclCalc::oclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements, double del )
-{
-    setKernelEnv( &kEnv );
-
-    // select a kernel: cut & paste coding is utterly evil.
-    const char *kernelName = NULL;
-    double delta = del;
-    bool subFlag = false;
-    switch ( eOp ) {
-    case ocAdd:
-       fprintf( stderr, "ocSub is %d\n", ocSub );
-    case ocMul:
-    case ocDiv:
-        ; // FIXME: fallthrough for now
-    case ocMax:
-        kernelName = "oclMaxDelta";
-        break;
-    case ocMin:
-        kernelName = "oclMinDelta";
-        break;
-    case ocAverage:
-        kernelName = "oclAverageDelta";
-        break;
-    case ocSub:
-        kernelName = "oclSubDelta";
-        subFlag = true;
-        break;
-    default:
-        assert( false );
-    }
-
-    Kernel* pKernel = fetchKernel(kernelName);
-    if (!pKernel)
-        return NULL;
-
-    cl_int clStatus;
-    size_t global_work_size[1];
-
-    // Ugh - horrible redundant copying ...
-
-    cl_mem valuesCl = NULL, subtractCl = NULL, outputCl = NULL;
-    if(!subFlag)
-    {
-        if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
-        {
-            valuesCl = allocateDoubleBuffer( kEnv, pOpArray, nElements, &clStatus );
-            subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
-            outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(double), NULL, &clStatus );
-        }
-        else
-        {
-            valuesCl = allocateFloatBuffer( kEnv, pOpArray, nElements, &clStatus );
-            subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
-            outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(float), NULL, &clStatus);
-        }
-        CHECK_OPENCL_PTR( clStatus, "clCreateBuffer" );
-
-        cl_uint start = 0;
-        cl_uint end = (cl_uint)nElements;
-
-        clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_mem), (void *)&valuesCl);
-        CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
-        clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl);
-        CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
-        clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_uint), (void *)&start);
-        CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" );
-        clStatus = clSetKernelArg(pKernel->mpKernel, 3, sizeof(cl_uint), (void *)&end);
-        CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" );
-        clStatus = clSetKernelArg(pKernel->mpKernel, 4, sizeof(cl_mem), (void *)&outputCl);
-        CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" );
-
-        fprintf( stderr, "prior to enqueue range kernel\n" );
-    }
-    else
-    {
-        if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
-        {
-             subtractCl = allocateDoubleBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
-             outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(double), NULL, &clStatus );
-             clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_double), (void *)&delta);
-             CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
-        }
-        else
-       {
-             float fTmp = (float)delta;
-             subtractCl = allocateFloatBuffer( kEnv, pSubtractSingle, nElements, &clStatus );
-             outputCl = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nElements * sizeof(float), NULL, &clStatus );
-             clStatus = clSetKernelArg(pKernel->mpKernel, 0, sizeof(cl_float), (void *)&fTmp);
-             CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
-        }
-        clStatus = clSetKernelArg(pKernel->mpKernel, 1, sizeof(cl_mem), (void *)&subtractCl);
-        CHECK_OPENCL_PTR( clStatus, "clSetKernelArg");
-        clStatus = clSetKernelArg(pKernel->mpKernel, 2, sizeof(cl_mem), (void *)&outputCl);
-        CHECK_OPENCL_PTR( clStatus, "clSetKernelArg" );
-    }
-    global_work_size[0] = nElements;
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue, pKernel->mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
-    CHECK_OPENCL_PTR( clStatus, "clEnqueueNDRangeKernel" );
-    clFinish( kEnv.mpkCmdQueue );
-
-    double *pResult = new double[nElements];
-    if ( !pResult )
-        return NULL; // leak.
-    if ( gpuEnv.mnKhrFp64Flag || gpuEnv.mnAmdFp64Flag )
-    {
-        clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue,
-                                       outputCl,
-                                       CL_TRUE,0,
-                                       nElements * sizeof(double),
-                                       (void *)pResult,0,NULL,NULL);
-    }
-    else
-    {
-        float *afBuffer = new float[nElements];
-        if ( !afBuffer )
-            return NULL;
-        clStatus = clEnqueueReadBuffer(kEnv.mpkCmdQueue,
-                                       outputCl,
-                                       CL_TRUE,0,
-                                       nElements * sizeof(float),
-                                       (void *)afBuffer,0,NULL,NULL);
-        for ( int i = 0; i < (int)nElements; i++ )
-            pResult[i] = (double)afBuffer[i];
-        if ( !afBuffer )
-            delete [] afBuffer;
-    }
-    CHECK_OPENCL_PTR( clStatus, "clEnqueueReadBuffer" );
-
-    clStatus = clFinish( kEnv.mpkCmdQueue );
-    CHECK_OPENCL_PTR( clStatus, "clFinish" );
-
-    if ( valuesCl != NULL )
-    {
-        clStatus = clReleaseMemObject( valuesCl );
-        CHECK_OPENCL_PTR( clStatus, "clReleaseMemObject" );
-    }
-    if ( subtractCl != NULL )
-    {

... etc. - the rest is truncated


More information about the Libreoffice-commits mailing list