[Libreoffice-commits] core.git: Branch 'feature/calc-group-interpreter' - 2 commits - sc/source

Wed Jun 26 04:52:19 PDT 2013

sc/source/core/opencl/oclkernels.hxx    |  144 ++-
 sc/source/core/opencl/openclwrapper.cxx | 1469 +++++++++++++++++++-------------
 sc/source/core/opencl/openclwrapper.hxx |  160 +--
 sc/source/core/tool/formulagroup.cxx    |  198 ++++
 sc/source/core/tool/interpr1.cxx        |    4 
 sc/source/ui/app/scmod.cxx              |   10 
 6 files changed, 1290 insertions(+), 695 deletions(-)

New commits:
commit e984d8f9e3dbfa39c1e2a21f9e5d81d97d556fba
Author: Michael Meeks <michael.meeks at suse.com>
Date:   Wed Jun 26 12:52:53 2013 +0100

    avoid srand / time / rand calls.

diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index bcb9cd8..b06af59 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -900,8 +900,6 @@ double OclCalc::OclProcess(cl_kernel_function function, double *data, formulax t
 double OclCalc::OclTest() {
     double data[NUM];
 
-    srand((unsigned int) time(NULL));
-
     for (int i = 0; i < NUM; i++) {
         data[i] = sc::rng::uniform();
         fprintf(stderr, "%f\t", data[i]);
@@ -912,10 +910,9 @@ double OclCalc::OclTest() {
 
 double OclCalc::OclTestDll() {
     double data[NUM];
-    srand((unsigned int) time(NULL));
 
     for (int i = 0; i < NUM; i++) {
-        data[i] = rand() / (RAND_MAX + 1.0);
+        data[i] = sc::rng::uniform();
         fprintf(stderr, "%f\t", data[i]);
     }
     OclProcess(&OclFormulaxDll, data, AVG);
commit 0b9381812bab1cd85925cbc8a707185fd0a13e2c
Author: Jing Xian <jingxian at multicorewareinc.com>
Date:   Wed Jun 26 12:19:51 2013 +0100

    more work on formula interpretation.

diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx
index f9db447..3269f3a 100644
--- a/sc/source/core/opencl/oclkernels.hxx
+++ b/sc/source/core/opencl/oclkernels.hxx
@@ -6,17 +6,15 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
  */
-
 #ifndef _OCL_KERNEL_H_
 #define _OCL_KERNEL_H_
 
 #ifndef USE_EXTERNAL_KERNEL
 #define KERNEL( ... )# __VA_ARGS__
 
+
 /////////////////////////////////////////////
 const char *kernel_src = KERNEL(
-//Add kernel here
-//use \n ... \n to define macro
 __kernel void hello(__global uint *buffer)
 
 {
@@ -27,83 +25,134 @@ buffer[idx]=idx;
 }
 
 __kernel void oclformula(__global float *data,
-                       const uint type)
+					   const uint type)
 {
-    const unsigned int i = get_global_id(0);
-
-    switch (type)
-    {
-        case 0:          //MAX
-        {
-            //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
-            if(data[2*i]>data[2*i+1])
-                data[i] = data[2*i];
-            else
-                data[i] = data[2*i+1];
-            break;
-        }
-        case 1:          //MIN
-        {
-            //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
-            if(data[2*i]<data[2*i+1])
-                data[i] = data[2*i];
-            else
-                data[i] = data[2*i+1];
-            break;
-        }
-        case 2:          //SUM
-        case 3:          //AVG
-        {
-            //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
-            data[i] = data[2*i] + data[2*i+1];
-            break;
-        }
-        default:
-            break;
-
-    }
+	const unsigned int i = get_global_id(0);
+
+	switch (type)
+	{
+		case 0:          //MAX
+		{
+			//printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
+			if(data[2*i]>data[2*i+1])
+				data[i] = data[2*i];
+			else
+				data[i] = data[2*i+1];
+			break;
+		}
+		case 1:          //MIN
+		{
+			//printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
+			if(data[2*i]<data[2*i+1])
+				data[i] = data[2*i];
+			else
+				data[i] = data[2*i+1];
+			break;
+		}
+		case 2:          //SUM
+		case 3:          //AVG
+		{
+			//printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
+			data[i] = data[2*i] + data[2*i+1];
+			break;
+		}
+		default:
+			break;
+
+	}
+}
 
+
+__kernel void oclSignedAdd(__global float *ltData,__global float *rtData,__global float *otData)
+{
+	const unsigned int id = get_global_id(0);
+	otData[id] = ltData[id] + rtData[id];
 }
 
-__kernel void oclFormulaMin(__global float *data,
-                            const uint type)
+
+__kernel void oclSignedSub(__global float *ltData,__global float *rtData,__global float *otData)
 {
+	const unsigned int id = get_global_id(0);
+	otData[id] = ltData[id] - rtData[id];
 
 }
 
-__kernel void oclFormulaMax(__global float *data,
-                            const uint type)
+__kernel void oclSignedMul(__global float *ltData,__global float *rtData,__global float *otData)
 {
+	int id = get_global_id(0);
+	otData[id] =ltData[id] * rtData[id];
+}
+
+__kernel void oclSignedDiv(__global float *ltData,__global float *rtData,__global float *otData)
+{
+	const unsigned int id = get_global_id(0);
+	otData[id] = ltData[id] / rtData[id];
+}
+
+__kernel void oclFormulaMin(__global float *input,__global int *start,__global int *end,__global float *output)
+{
+	const unsigned int id = get_global_id(0);
+	int i=0;
+	unsigned int startFlag = start[id];
+	unsigned int endFlag = end[id];
+	float min = input[startFlag];
+	for(i=startFlag;i<=endFlag;i++)
+	{
+		if(input[i]<min)
+			min = input[i];
+	}
+	output[id] = min;
+
+}
+
+__kernel void oclFormulaMax(__global float *input,__global int *start,__global int *end,__global float *output)
+{
+	const unsigned int id = get_global_id(0);
+	int i=0;
+	unsigned int startFlag = start[id];
+	unsigned int endFlag = end[id];
+	float max = input[startFlag];
+	for(i=startFlag;i<=endFlag;i++)
+	{
+		if(input[i]>max)
+			max = input[i];
+	}
+	output[id] = max;
 
 }
 
 __kernel void oclFormulaSum(__global float *data,
-                            const uint type)
+					   const uint type)
 {
 
 }
 
 __kernel void oclFormulaCount(__global float *data,
-                              const uint type)
+					   const uint type)
 {
 
 }
 
-__kernel void oclFormulaAverage(__global float *data,
-                                const uint type)
+__kernel void oclFormulaAverage(__global float *input,__global int *start,__global int *end,__global float *output)
 {
+	const unsigned int id = get_global_id(0);
+	int i=0;
+	float sum=0;
+	for(i = start[id];i<=end[id];i++)
+		sum += input[i];
+	output[id] = sum / (end[id]-start[id]+1);
 
 }
 
 
 __kernel void oclFormulaSumproduct(__global float *data,
-                                   const uint type)
+					   const uint type)
 {
 
 }
 
 __kernel void oclFormulaMinverse(__global float *data,
-                                 const uint type)
+					   const uint type)
 {
 
 }
@@ -112,5 +161,4 @@ __kernel void oclFormulaMinverse(__global float *data,
 
 #endif // USE_EXTERNAL_KERNEL
 #endif //_OCL_KERNEL_H_
-
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 6c3935e..bcb9cd8 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -10,57 +10,140 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-
 #include "random.hxx"
 #include "openclwrapper.hxx"
 #include "oclkernels.hxx"
+#ifdef WIN32
+#include <Windows.h>
+#endif
+//#define USE_KERNEL_FILE
+using namespace std;
+GPUEnv OpenclDevice::gpuEnv;
+int OpenclDevice::isInited =0;
 
+#ifdef WIN32
 
-inline int OpenclDevice::add_kernel_cfg(int kCount, const char *kName) {
-    strcpy(gpu_env.kernel_names[kCount], kName);
-    gpu_env.kernel_count++;
-    return 0;
+#define OPENCL_DLL_NAME "opencllo.dll"
+#define OCLERR -1
+#define OCLSUCCESS 1
+HINSTANCE HOpenclDll = NULL;
+    void *OpenclDll = NULL;
+
+int OpenclDevice::LoadOpencl()
+{
+	//fprintf(stderr, " LoadOpenclDllxx... \n");
+	OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
+	OpenclDll = LoadLibrary(OPENCL_DLL_NAME);
+	if (!static_cast<HINSTANCE>(OpenclDll))
+	{
+		fprintf(stderr, " Load opencllo.dll failed! \n");
+		FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+		return OCLERR;
+	}
+	fprintf(stderr, " Load opencllo.dll successfully!\n");
+	return OCLSUCCESS;
 }
 
-int OpenclDevice::regist_opencl_kernel() {
-    if (!gpu_env.isUserCreated) {
-        memset(&gpu_env, 0, sizeof(gpu_env));
-    }
+void OpenclDevice::FreeOpenclDll()
+{
+	fprintf(stderr, " Free opencllo.dll ... \n");
+	if(!static_cast<HINSTANCE>(OpenclDll))
+		FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+}
+#endif
 
-    gpu_env.file_count = 0; //argc;
-    gpu_env.kernel_count = 0UL;
-
-    add_kernel_cfg(0, (const char*) "hello");
-    add_kernel_cfg(1, (const char*) "oclformula");
-    add_kernel_cfg(2, (const char*) "oclFormulaMin");
-    add_kernel_cfg(3, (const char*) "oclFormulaMax");
-    add_kernel_cfg(4, (const char*) "oclFormulaSum");
-    add_kernel_cfg(5, (const char*) "oclFormulaCount");
-    add_kernel_cfg(6, (const char*) "oclFormulaAverage");
-    add_kernel_cfg(7, (const char*) "oclFormulaSumproduct");
-    add_kernel_cfg(8, (const char*) "oclFormulaMinverse");
+int OpenclDevice::InitEnv()
+{
+#ifdef WIN32
+	while(1){
+	    if(1==LoadOpencl())
+			break;
+	}
+#endif
+	InitOpenclRunEnv(0,NULL);
+	return 1;
+}
+
+int OpenclDevice::ReleaseOpenclRunEnv() {
+	ReleaseOpenclEnv(&gpuEnv);
+#ifdef WIN32
+	FreeOpenclDll();
+#endif
+    return 1;
+}
+///////////////////////////////////////////////////////
+///////////////////////////////////////////////////////
+inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName) {
+    strcpy(gpuEnv.kernelNames[kCount], kName);
+    gpuEnv.kernelCount++;
     return 0;
 }
-OpenclDevice::OpenclDevice() :
-        isInited(0) {
 
+int OpenclDevice::RegistOpenclKernel() {
+    if (!gpuEnv.isUserCreated) {
+        memset(&gpuEnv, 0, sizeof(gpuEnv));
+    }
+
+    gpuEnv.fileCount = 0; //argc;
+    gpuEnv.kernelCount = 0UL;
+
+    AddKernelConfig(0, (const char*) "hello");
+    AddKernelConfig(1, (const char*) "oclformula");
+    AddKernelConfig(2, (const char*) "oclFormulaMin");
+    AddKernelConfig(3, (const char*) "oclFormulaMax");
+    AddKernelConfig(4, (const char*) "oclFormulaSum");
+    AddKernelConfig(5, (const char*) "oclFormulaCount");
+    AddKernelConfig(6, (const char*) "oclFormulaAverage");
+    AddKernelConfig(7, (const char*) "oclFormulaSumproduct");
+    AddKernelConfig(8, (const char*) "oclFormulaMinverse");
+
+    AddKernelConfig(9,  (const char*) "oclSignedAdd");
+    AddKernelConfig(10, (const char*) "oclSignedSub");
+    AddKernelConfig(11, (const char*) "oclSignedMul");
+    AddKernelConfig(12, (const char*) "oclSignedDiv");
+	return 0;
+}
+OpenclDevice::OpenclDevice(){
+	//InitEnv();
 }
 
 OpenclDevice::~OpenclDevice() {
+	//ReleaseOpenclRunEnv();
+}
 
+int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName){
+    //printf("CheckKernelName,total count of kernels...%d\n", gpuEnv.kernelCount);
+    int kCount;
+    for(kCount=0; kCount < gpuEnv.kernelCount; kCount++) {
+        if(strcasecmp(kernelName, gpuEnv.kernelNames[kCount]) == 0) {
+	    printf("match  %s kernel right\n",kernelName);
+	    break;
+        }
+    }
+    envInfo->context      = gpuEnv.context;
+    envInfo->commandQueue = gpuEnv.commandQueue;
+    envInfo->program      = gpuEnv.programs[0];
+    envInfo->kernel       = gpuEnv.kernels[kCount];
+    strcpy(envInfo->kernelName, kernelName);
+    if (envInfo == (KernelEnv *) NULL)
+    {
+        printf("get err func and env\n");
+        return 0;
+    }
+    return 1;
 }
-#ifdef USE_KERNEL_FILE
-int OpenclDevice::convert_to_string(const char *filename, char **source) {
+
+int OpenclDevice::ConvertToString(const char *filename, char **source) {
     int file_size;
     size_t result;
     FILE *file = NULL;
-
     file_size = 0;
     result = 0;
     file = fopen(filename, "rb+");
-    printf("open kernel file %s.\n", filename);
+    printf("open kernel file %s.\n",filename);
 
     if (file != NULL) {
+		printf("Open ok!\n");
         fseek(file, 0, SEEK_END);
 
         file_size = ftell(file);
@@ -82,68 +165,41 @@ int OpenclDevice::convert_to_string(const char *filename, char **source) {
     printf("open kernel file failed.\n");
     return (0);
 }
-#endif
-int OpenclDevice::binary_generated(cl_context context,
-        const char * cl_file_name, FILE ** fhandle) {
-    unsigned int i = 0;
-    cl_int status;
-
-    size_t numDevices;
-
-    cl_device_id *devices;
-
-    char *str = NULL;
-
-    FILE *fd = NULL;
-
-    status = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
-            sizeof(numDevices), &numDevices, NULL);
-
-    CHECK_OPENCL(status)
-
-    devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
-
-    if (devices == NULL) {
-        return 0;
-    }
-
-    /* grab the handles to all of the devices in the context. */
-    status = clGetContextInfo(context, CL_CONTEXT_DEVICES,
-            sizeof(cl_device_id) * numDevices, devices, NULL);
-
-    status = 0;
-    /* dump out each binary into its own separate file. */
-    for (i = 0; i < numDevices; i++) {
-        char fileName[256] = { 0 }, cl_name[128] = { 0 };
-
-        if (devices[i] != 0) {
-            char deviceName[1024];
-            status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
-                    sizeof(deviceName), deviceName, NULL);
-            CHECK_OPENCL(status)
-            str = (char*) strstr(cl_file_name, (char*) ".cl");
-            memcpy(cl_name, cl_file_name, str - cl_file_name);
-            cl_name[str - cl_file_name] = '\0';
-            sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
-            fd = fopen(fileName, "rb");
-            status = (fd != NULL) ? 1 : 0;
-        }
 
-    }
-
-    if (devices != NULL) {
-        free(devices);
-        devices = NULL;
-    }
-
-    if (fd != NULL) {
-        *fhandle = fd;
-    }
+int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) {
+    unsigned int i = 0;
+	cl_int status;
+	char *str = NULL;
+	FILE *fd = NULL;
+	cl_uint numDevices=0;
+	status = clGetDeviceIDs(gpuEnv.platform, // platform
+							CL_DEVICE_TYPE_GPU, // device_type
+							0, // num_entries
+							NULL, // devices
+							&numDevices);
+	for (i = 0; i <numDevices; i++) {
+		char fileName[256] = { 0 }, cl_name[128] = { 0 };
+		if (gpuEnv.devices[i] != 0) {
+			char deviceName[1024];
+			status = clGetDeviceInfo(gpuEnv.devices[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL);
+			CHECK_OPENCL(status);
+			str = (char*) strstr(clFileName, (char*) ".cl");
+			memcpy(cl_name, clFileName, str - clFileName);
+			cl_name[str - clFileName] = '\0';
+			sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
+			fd = fopen(fileName, "rb");
+			status = (fd != NULL) ? 1 : 0;
+			}
+		}
+		if (fd != NULL) {
+			*fhandle = fd;
+			}
+
+		return status;
 
-    return status;
 }
 
-int OpenclDevice::write_binary_to_file(const char* fileName, const char* birary,
+int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary,
         size_t numBytes) {
     FILE *output = NULL;
     output = fopen(fileName, "wb");
@@ -155,11 +211,12 @@ int OpenclDevice::write_binary_to_file(const char* fileName, const char* birary,
     fclose(output);
 
     return 1;
+
 }
 
-int OpenclDevice::generat_bin_from_kernel_source(cl_program program,
-        const char * cl_file_name) {
-    unsigned int i = 0;
+int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
+        const char * clFileName) {
+     unsigned int i = 0;
     cl_int status;
     size_t *binarySizes, numDevices;
     cl_device_id *devices;
@@ -216,12 +273,12 @@ int OpenclDevice::generat_bin_from_kernel_source(cl_program program,
                     sizeof(deviceName), deviceName, NULL);
             CHECK_OPENCL(status)
 
-            str = (char*) strstr(cl_file_name, (char*) ".cl");
-            memcpy(cl_name, cl_file_name, str - cl_file_name);
-            cl_name[str - cl_file_name] = '\0';
+            str = (char*) strstr(clFileName, (char*) ".cl");
+            memcpy(cl_name, clFileName, str - clFileName);
+            cl_name[str - clFileName] = '\0';
             sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
 
-            if (!write_binary_to_file(fileName, binaries[i], binarySizes[i])) {
+            if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) {
                 printf("opencl-wrapper: write binary[%s] failds\n", fileName);
                 return 0;
             } //else
@@ -254,164 +311,36 @@ int OpenclDevice::generat_bin_from_kernel_source(cl_program program,
     return 1;
 }
 
-int OpenclDevice::init_opencl_attr(OpenCLEnv * env) {
-    if (gpu_env.isUserCreated) {
+int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) {
+    if (gpuEnv.isUserCreated) {
         return 1;
     }
 
-    gpu_env.context = env->context;
-    gpu_env.platform = env->platform;
-    gpu_env.dev = env->devices;
-    gpu_env.command_queue = env->command_queue;
+    gpuEnv.context = env->context;
+    gpuEnv.platform = env->platform;
+    gpuEnv.dev = env->devices;
+    gpuEnv.commandQueue = env->commandQueue;
 
-    gpu_env.isUserCreated = 1;
+    gpuEnv.isUserCreated = 1;
 
     return 0;
 }
 
-int OpenclDevice::create_kernel(char * kernelname, KernelEnv * env) {
+int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env) {
     int status;
 
-    env->kernel = clCreateKernel(gpu_env.programs[0], kernelname, &status);
-    env->context = gpu_env.context;
-    env->command_queue = gpu_env.command_queue;
+    env->kernel = clCreateKernel(gpuEnv.programs[0], kernelname, &status);
+    env->context = gpuEnv.context;
+    env->commandQueue = gpuEnv.commandQueue;
     return status != CL_SUCCESS ? 1 : 0;
 }
 
-int OpenclDevice::release_kernel(KernelEnv * env) {
+int OpenclDevice::ReleaseKernel(KernelEnv * env) {
     int status = clReleaseKernel(env->kernel);
     return status != CL_SUCCESS ? 1 : 0;
 }
 
-int OpenclDevice::init_opencl_env(GPUEnv *gpu_info) {
-    size_t length;
-    cl_int status;
-    cl_uint numPlatforms, numDevices;
-    cl_platform_id *platforms;
-    cl_context_properties cps[3];
-    char platformName[100];
-    unsigned int i;
-
-    /*
-     * Have a look at the available platforms.
-     */
-    if (!gpu_info->isUserCreated) {
-        status = clGetPlatformIDs(0, NULL, &numPlatforms);
-        if (status != CL_SUCCESS) {
-            return (1);
-        }
-        gpu_info->platform = NULL;
-        ;
-        if (0 < numPlatforms) {
-            platforms = (cl_platform_id*) malloc(
-                    numPlatforms * sizeof(cl_platform_id));
-            if (platforms == (cl_platform_id*) NULL) {
-                return (1);
-            }
-            status = clGetPlatformIDs(numPlatforms, platforms, NULL);
-
-            if (status != CL_SUCCESS) {
-                return (1);
-            }
-
-            for (i = 0; i < numPlatforms; i++) {
-                status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
-                        sizeof(platformName), platformName, NULL);
-
-                if (status != CL_SUCCESS) {
-                    return (1);
-                }
-                gpu_info->platform = platforms[i];
-
-                //if (!strcmp(platformName, "Intel(R) Coporation"))
-                //if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
-                {
-                    gpu_info->platform = platforms[i];
-
-                    status = clGetDeviceIDs(gpu_info->platform /* platform */,
-                            CL_DEVICE_TYPE_GPU /* device_type */,
-                            0 /* num_entries */, NULL /* devices */,
-                            &numDevices);
-
-                    if (status != CL_SUCCESS) {
-                        return (1);
-                    }
-
-                    if (numDevices) {
-                        break;
-                    }
-                }
-            }
-            free(platforms);
-        }
-        if (NULL == gpu_info->platform) {
-            return (1);
-        }
-
-        /*
-         * Use available platform.
-         */
-        cps[0] = CL_CONTEXT_PLATFORM;
-        cps[1] = (cl_context_properties) gpu_info->platform;
-        cps[2] = 0;
-        /* Check for GPU. */
-        gpu_info->dType = CL_DEVICE_TYPE_GPU;
-        gpu_info->context = clCreateContextFromType(cps, gpu_info->dType, NULL,
-                NULL, &status);
-
-        if ((gpu_info->context == (cl_context) NULL)
-                || (status != CL_SUCCESS)) {
-            gpu_info->dType = CL_DEVICE_TYPE_CPU;
-            gpu_info->context = clCreateContextFromType(cps, gpu_info->dType,
-                    NULL, NULL, &status);
-        }
-        if ((gpu_info->context == (cl_context) NULL)
-                || (status != CL_SUCCESS)) {
-            gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;
-            gpu_info->context = clCreateContextFromType(cps, gpu_info->dType,
-                    NULL, NULL, &status);
-        }
-        if ((gpu_info->context == (cl_context) NULL)
-                || (status != CL_SUCCESS)) {
-            return (1);
-        }
-        /* Detect OpenCL devices. */
-        /* First, get the size of device list data */
-        status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, 0,
-                NULL, &length);
-
-        if ((status != CL_SUCCESS) || (length == 0)) {
-            return (1);
-        }
-        /* Now allocate memory for device list based on the size we got earlier */
-        gpu_info->devices = (cl_device_id*) malloc(length);
-        if (gpu_info->devices == (cl_device_id*) NULL) {
-            return (1);
-        }
-        /* Now, get the device list data */
-        status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, length,
-                gpu_info->devices, NULL);
-
-        if (status != CL_SUCCESS) {
-            return (1);
-        }
-
-        /* Create OpenCL command queue. */
-        gpu_info->command_queue = clCreateCommandQueue(gpu_info->context,
-                gpu_info->devices[0], 0, &status);
-
-        if (status != CL_SUCCESS) {
-            return (1);
-        }
-    }
-
-    status = clGetCommandQueueInfo(gpu_info->command_queue,
-            CL_QUEUE_THREAD_HANDLE_AMD, 0, NULL, NULL);
-
-    return 0;
-}
-
-int OpenclDevice::release_opencl_env(GPUEnv *gpu_info) {
+int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) {
     int i = 0;
     int status = 0;
 
@@ -419,60 +348,44 @@ int OpenclDevice::release_opencl_env(GPUEnv *gpu_info) {
         return 1;
     }
 
-    for (i = 0; i < gpu_env.file_count; i++) {
-        if (gpu_env.programs[i]) {
-            status = clReleaseProgram(gpu_env.programs[i]);
+    for (i = 0; i < gpuEnv.fileCount; i++) {
+        if (gpuEnv.programs[i]) {
+            status = clReleaseProgram(gpuEnv.programs[i]);
             CHECK_OPENCL(status)
-            gpu_env.programs[i] = NULL;
+            gpuEnv.programs[i] = NULL;
         }
     }
-    if (gpu_env.command_queue) {
-        clReleaseCommandQueue(gpu_env.command_queue);
-        gpu_env.command_queue = NULL;
+    if (gpuEnv.commandQueue) {
+        clReleaseCommandQueue(gpuEnv.commandQueue);
+        gpuEnv.commandQueue = NULL;
     }
-    if (gpu_env.context) {
-        clReleaseContext(gpu_env.context);
-        gpu_env.context = NULL;
+    if (gpuEnv.context) {
+        clReleaseContext(gpuEnv.context);
+        gpuEnv.context = NULL;
     }
     isInited = 0;
-    gpu_info->isUserCreated = 0;
-    free(gpu_info->devices);
+    gpuInfo->isUserCreated = 0;
+    free(gpuInfo->devices);
     return 1;
 }
 
-int OpenclDevice::run_kernel_wrapper(cl_kernel_function function,
-        char * kernel_name, void **usrdata) {
-    printf("oclwrapper:run_kernel_wrapper...\n");
-    if (register_kernel_wrapper(kernel_name, function) != 1) {
+int OpenclDevice::RunKernelWrapper(cl_kernel_function function,
+        const char * kernelName, void **usrdata) {
+    printf("oclwrapper:RunKernel_wrapper...\n");
+    if (RegisterKernelWrapper(kernelName, function) != 1) {
         fprintf(stderr,
-                "Error:run_kernel_wrapper:register_kernel_wrapper fail!\n");
+                "Error:RunKernel_wrapper:RegisterKernelWrapper fail!\n");
         return -1;
     }
-    return (run_kernel(kernel_name, usrdata));
-}
-
-int OpenclDevice::register_kernel_wrapper(const char *kernel_name,
-        cl_kernel_function function) {
-    int i;
-    printf("oclwrapper:register_kernel_wrapper...%d\n", gpu_env.kernel_count);
-    for (i = 0; i < gpu_env.kernel_count; i++) {
-        //printf("oclwrapper:register_kernel_wrapper kname...%s\n", kernel_name);
-        //printf("oclwrapper:register_kernel_wrapper kname...%s\n", gpu_env.kernel_names[i]);
-        if (strcasecmp(kernel_name, gpu_env.kernel_names[i]) == 0) {
-            //printf("oclwrapper:register_kernel_wrapper if()...\n");
-            gpu_env.kernel_functions[i] = function;
-            return (1);
-        }
-    }
-    return (0);
+    return (RunKernel(kernelName, usrdata));
 }
 
-int OpenclDevice::cached_of_kerner_prg(const GPUEnv *gpu_env_cached,
-        const char * cl_file_name) {
-    int i;
-    for (i = 0; i < gpu_env_cached->file_count; i++) {
-        if (strcasecmp(gpu_env_cached->kernelSrcFile[i], cl_file_name) == 0) {
-            if (gpu_env_cached->programs[i] != NULL) {
+int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached,
+        const char * clFileName) {
+  int i;
+    for (i = 0; i < gpuEnvCached->fileCount; i++) {
+        if (strcasecmp(gpuEnvCached->kernelSrcFile[i], clFileName) == 0) {
+            if (gpuEnvCached->programs[i] != NULL) {
                 return (1);
             }
         }
@@ -481,37 +394,30 @@ int OpenclDevice::cached_of_kerner_prg(const GPUEnv *gpu_env_cached,
     return (0);
 }
 
-int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option) {
+int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
     cl_int status;
-
     size_t length;
-
     char *buildLog = NULL, *binary;
-
     const char *source;
     size_t source_size[1];
-
     int b_error, binary_status, binaryExisted, idx;
-
     size_t numDevices;
-
     cl_device_id *devices;
-
     FILE *fd, *fd1;
     const char* filename = "kernel.cl";
-    if (cached_of_kerner_prg(gpu_info, filename) == 1) {
+	fprintf(stderr, "CompileKernelFile ... \n");
+    if (CachedOfKernerPrg(gpuInfo, filename) == 1) {
         return (1);
     }
 
-    idx = gpu_info->file_count;
+    idx = gpuInfo->fileCount;
 
     source = kernel_src;
 
     source_size[0] = strlen(source);
-
     binaryExisted = 0;
-    if ((binaryExisted = binary_generated(gpu_info->context, filename, &fd)) == 1) {
-        status = clGetContextInfo(gpu_info->context, CL_CONTEXT_NUM_DEVICES,
+    if ((binaryExisted = BinaryGenerated(filename, &fd)) == 1) {
+        status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_NUM_DEVICES,
                 sizeof(numDevices), &numDevices, NULL);
         CHECK_OPENCL(status)
 
@@ -543,11 +449,11 @@ int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option
         fclose(fd);
         fd = NULL;
         // grab the handles to all of the devices in the context.
-        status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
+        status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES,
                 sizeof(cl_device_id) * numDevices, devices, NULL);
         CHECK_OPENCL(status)
 
-        gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,
+        gpuInfo->programs[idx] = clCreateProgramWithBinary(gpuInfo->context,
                 numDevices, devices, &length, (const unsigned char**) &binary,
                 &binary_status, &status);
         CHECK_OPENCL(status)
@@ -556,40 +462,37 @@ int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option
         free(devices);
         devices = NULL;
     } else {
-
         // create a CL program using the kernel source
-        gpu_info->programs[idx] = clCreateProgramWithSource(gpu_info->context,
+        gpuEnv.programs[idx] = clCreateProgramWithSource(gpuEnv.context,
                 1, &source, source_size, &status);
-        CHECK_OPENCL(status)
-
-        printf("clCreateProgramWithSource.\n");
+        CHECK_OPENCL(status);
     }
 
-    if (gpu_info->programs[idx] == (cl_program) NULL) {
+    if (gpuInfo->programs[idx] == (cl_program) NULL) {
         return (0);
     }
 
     //char options[512];
     // create a cl program executable for all the devices specified
-    if (!gpu_info->isUserCreated) {
-        status = clBuildProgram(gpu_info->programs[idx], 1, gpu_info->devices,
-                build_option, NULL, NULL);
+    if (!gpuInfo->isUserCreated) {
+        status = clBuildProgram(gpuInfo->programs[idx], 1, gpuInfo->devices,
+                buildOption, NULL, NULL);
         CHECK_OPENCL(status)
     } else {
-        status = clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->dev),
-                build_option, NULL, NULL);
+        status = clBuildProgram(gpuInfo->programs[idx], 1, &(gpuInfo->dev),
+                buildOption, NULL, NULL);
         CHECK_OPENCL(status)
     }
     printf("BuildProgram.\n");
 
     if (status != CL_SUCCESS) {
-        if (!gpu_info->isUserCreated) {
-            status = clGetProgramBuildInfo(gpu_info->programs[idx],
-                    gpu_info->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
+        if (!gpuInfo->isUserCreated) {
+            status = clGetProgramBuildInfo(gpuInfo->programs[idx],
+                    gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
                     &length);
         } else {
-            status = clGetProgramBuildInfo(gpu_info->programs[idx],
-                    gpu_info->dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
+            status = clGetProgramBuildInfo(gpuInfo->programs[idx],
+                    gpuInfo->dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
         }
         if (status != CL_SUCCESS) {
             printf("opencl create build log fail\n");
@@ -599,13 +502,13 @@ int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option
         if (buildLog == (char*) NULL) {
             return (0);
         }
-        if (!gpu_info->isUserCreated) {
-            status = clGetProgramBuildInfo(gpu_info->programs[idx],
-                    gpu_info->devices[0], CL_PROGRAM_BUILD_LOG, length,
+        if (!gpuInfo->isUserCreated) {
+            status = clGetProgramBuildInfo(gpuInfo->programs[idx],
+                    gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, length,
                     buildLog, &length);
         } else {
-            status = clGetProgramBuildInfo(gpu_info->programs[idx],
-                    gpu_info->dev, CL_PROGRAM_BUILD_LOG, length, buildLog,
+            status = clGetProgramBuildInfo(gpuInfo->programs[idx],
+                    gpuInfo->dev, CL_PROGRAM_BUILD_LOG, length, buildLog,
                     &length);
         }
 
@@ -619,199 +522,35 @@ int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option
         return (0);
     }
 
-    strcpy(gpu_env.kernelSrcFile[idx], filename);
+    strcpy(gpuEnv.kernelSrcFile[idx], filename);
 
     if (binaryExisted == 0)
-        generat_bin_from_kernel_source(gpu_env.programs[idx], filename);
+        GeneratBinFromKernelSource(gpuEnv.programs[idx], filename);
 
-    gpu_info->file_count += 1;
+    gpuInfo->fileCount += 1;
 
     return (1);
-}
-
-int OpenclDevice::compile_kernel_file(const char *filename, GPUEnv *gpu_info,
-        const char *build_option) {
-    cl_int status;
-
-    size_t length;
-
-#ifdef USE_KERNEL_FILE
-    char
-    *source_str;
-#endif
-    char *buildLog = NULL, *binary;
-
-    const char *source;
-    size_t source_size[1];
-
-    int b_error, binary_status, binaryExisted, idx;
-
-    size_t numDevices;
-
-    cl_device_id *devices;
-
-    FILE *fd, *fd1;
-
-    if (cached_of_kerner_prg(gpu_info, filename) == 1) {
-        return (1);
-    }
-
-    idx = gpu_info->file_count;
-#ifdef USE_KERNEL_FILE
-    status = convert_to_string( filename, &source_str, gpu_info, idx );
-
-    if( status == 0 )
-    {
-        printf("convert_to_string failed.\n");
-        return(0);
-    }
-    source = source_str;
-#else
-
-    source = kernel_src;
-#endif
-    source_size[0] = strlen(source);
-
-    binaryExisted = 0;
-    if ((binaryExisted = binary_generated(gpu_info->context, filename, &fd))
-            == 1) {
-        status = clGetContextInfo(gpu_info->context, CL_CONTEXT_NUM_DEVICES,
-                sizeof(numDevices), &numDevices, NULL);
-        CHECK_OPENCL(status)
-
-        devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
-        if (devices == NULL) {
-            return 0;
-        }
-
-        b_error = 0;
-        length = 0;
-        b_error |= fseek(fd, 0, SEEK_END) < 0;
-        b_error |= (length = ftell(fd)) <= 0;
-        b_error |= fseek(fd, 0, SEEK_SET) < 0;
-        if (b_error) {
-            return 0;
-        }
-
-        binary = (char*) malloc(length + 2);
-        if (!binary) {
-            return 0;
-        }
-
-        memset(binary, 0, length + 2);
-        b_error |= fread(binary, 1, length, fd) != length;
-        if (binary[length - 1] != '\n') {
-            binary[length++] = '\n';
-        }
-
-        fclose(fd);
-        fd = NULL;
-        /* grab the handles to all of the devices in the context. */
-        status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
-                sizeof(cl_device_id) * numDevices, devices, NULL);
-        CHECK_OPENCL(status)
-
-        gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,
-                numDevices, devices, &length, (const unsigned char**) &binary,
-                &binary_status, &status);
-        CHECK_OPENCL(status)
-
-        free(binary);
-        free(devices);
-        devices = NULL;
-    } else {
-
-        // create a CL program using the kernel source
-        gpu_info->programs[idx] = clCreateProgramWithSource(gpu_info->context,
-                1, &source, source_size, &status);
-        CHECK_OPENCL(status)
-#ifdef USE_KERNEL_FILE
-        free((char*)source);
-#endif
-        printf("clCreateProgramWithSource.\n");
-    }
-
-    if (gpu_info->programs[idx] == (cl_program) NULL) {
-        return (0);
-    }
-
-    //char options[512];
-    // create a cl program executable for all the devices specified
-    if (!gpu_info->isUserCreated) {
-        status = clBuildProgram(gpu_info->programs[idx], 1, gpu_info->devices,
-                build_option, NULL, NULL);
-        CHECK_OPENCL(status)
-    } else {
-        status = clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->dev),
-                build_option, NULL, NULL);
-        CHECK_OPENCL(status)
-    }
-    printf("BuildProgram.\n");
-
-    if (status != CL_SUCCESS) {
-        if (!gpu_info->isUserCreated) {
-            status = clGetProgramBuildInfo(gpu_info->programs[idx],
-                    gpu_info->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
-                    &length);
-        } else {
-            status = clGetProgramBuildInfo(gpu_info->programs[idx],
-                    gpu_info->dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
-        }
-        if (status != CL_SUCCESS) {
-            printf("opencl create build log fail\n");
-            return (0);
-        }
-        buildLog = (char*) malloc(length);
-        if (buildLog == (char*) NULL) {
-            return (0);
-        }
-        if (!gpu_info->isUserCreated) {
-            status = clGetProgramBuildInfo(gpu_info->programs[idx],
-                    gpu_info->devices[0], CL_PROGRAM_BUILD_LOG, length,
-                    buildLog, &length);
-        } else {
-            status = clGetProgramBuildInfo(gpu_info->programs[idx],
-                    gpu_info->dev, CL_PROGRAM_BUILD_LOG, length, buildLog,
-                    &length);
-        }
-
-        fd1 = fopen("kernel-build.log", "w+");
-        if (fd1 != NULL) {
-            fwrite(buildLog, sizeof(char), length, fd1);
-            fclose(fd1);
-        }
 
-        free(buildLog);
-        return (0);
-    }
 
-    strcpy(gpu_env.kernelSrcFile[idx], filename);
-
-    if (binaryExisted == 0)
-        generat_bin_from_kernel_source(gpu_env.programs[idx], filename);
-
-    gpu_info->file_count += 1;
-
-    return (1);
 }
-
-int OpenclDevice::get_kernel_env_and_func(const char *kernel_name,
+int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName,
         KernelEnv *env, cl_kernel_function *function) {
     int i; //,program_idx ;
-    for (i = 0; i < gpu_env.kernel_count; i++) {
-        if (strcasecmp(kernel_name, gpu_env.kernel_names[i]) == 0) {
-            env->context = gpu_env.context;
-            env->command_queue = gpu_env.command_queue;
-            env->program = gpu_env.programs[0];
-            env->kernel = gpu_env.kernels[i];
-            *function = gpu_env.kernel_functions[i];
+    printf("----------------OpenclDevice::GetKernelEnvAndFunc\n");
+    for (i = 0; i < gpuEnv.kernelCount; i++) {
+        if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0) {
+            env->context = gpuEnv.context;
+            env->commandQueue = gpuEnv.commandQueue;
+            env->program = gpuEnv.programs[0];
+            env->kernel = gpuEnv.kernels[i];
+            *function = gpuEnv.kernelFunctions[i];
             return (1);
         }
     }
     return (0);
 }
 
-int OpenclDevice::run_kernel(const char *kernel_name, void **userdata) {
+int OpenclDevice::RunKernel(const char *kernelName, void **userdata) {
     KernelEnv env;
 
     cl_kernel_function function;
@@ -819,8 +558,8 @@ int OpenclDevice::run_kernel(const char *kernel_name, void **userdata) {
     int status;
 
     memset(&env, 0, sizeof(KernelEnv));
-    status = get_kernel_env_and_func(kernel_name, &env, &function);
-    strcpy(env.kernel_name, kernel_name);
+    status = GetKernelEnvAndFunc(kernelName, &env, &function);
+    strcpy(env.kernelName, kernelName);
     if (status == 1) {
         if (&env == (KernelEnv *) NULL
                 || &function == (cl_kernel_function *) NULL) {
@@ -830,11 +569,9 @@ int OpenclDevice::run_kernel(const char *kernel_name, void **userdata) {
     }
     return (0);
 }
-
-int OpenclDevice::init_opencl_run_env(int argc, const char *build_option_kernelfiles)
+int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles)
 {
     int status = 0;
-
     if (MAX_CLKERNEL_NUM <= 0) {
         return 1;
     }
@@ -843,82 +580,177 @@ int OpenclDevice::init_opencl_run_env(int argc, const char *build_option_kernelf
     }
 
     if (!isInited) {
-        printf("regist_opencl_kernel start.\n");
-        regist_opencl_kernel();
+        RegistOpenclKernel();
         //initialize devices, context, comand_queue
-        status = init_opencl_env(&gpu_env);
+        status = InitOpenclRunEnv(&gpuEnv);
         if (status) {
             printf("init_opencl_env failed.\n");
             return (1);
         }
         printf("init_opencl_env successed.\n");
-        //initialize program, kernel_name, kernel_count
-        status = compile_kernel_file( &gpu_env, build_option_kernelfiles);
-        if (status == 0 || gpu_env.kernel_count == 0) {
-            printf("compile_kernel_file failed.\n");
+        //initialize program, kernelName, kernelCount
+        status = CompileKernelFile( &gpuEnv, buildOptionKernelfiles);
+        if (status == 0 || gpuEnv.kernelCount == 0) {
+            printf("CompileKernelFile failed.\n");
             return (1);
         }
-        printf("compile_kernel_file successed.\n");
+        printf("CompileKernelFile successed.\n");
         isInited = 1;
     }
-
     return (0);
 }
 
-int OpenclDevice::init_opencl_run_env(int argc, const char *argv_kernelfiles[],
-        const char *build_option_kernelfiles) {
-    int status = 0;
+int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
+{
+    size_t length;
+    cl_int status;
+    cl_uint numPlatforms, numDevices;
+    cl_platform_id *platforms;
+    cl_context_properties cps[3];
+    char platformName[100];
+    unsigned int i;
 
-    if (MAX_CLKERNEL_NUM <= 0) {
-        return 1;
-    }
-    if ((argc > MAX_CLFILE_NUM) || (argc < 0)) {
-        return 1;
-    }
+    // Have a look at the available platforms.
 
-    if (!isInited) {
-        printf("regist_opencl_kernel start.\n");
-        regist_opencl_kernel();
-        //initialize devices, context, comand_queue
-        status = init_opencl_env(&gpu_env);
-        if (status) {
-            printf("init_opencl_env failed.\n");
+    if (!gpuInfo->isUserCreated) {
+        status = clGetPlatformIDs(0, NULL, &numPlatforms);
+        if (status != CL_SUCCESS) {
             return (1);
         }
-        printf("init_opencl_env successed.\n");
-        //initialize program, kernel_name, kernel_count
-        status = compile_kernel_file(argv_kernelfiles[0], &gpu_env,
-                build_option_kernelfiles);
-        if (status == 0 || gpu_env.kernel_count == 0) {
-            printf("compile_kernel_file failed.\n");
+        gpuInfo->platform = NULL;
+
+        if (0 < numPlatforms) {
+            platforms = (cl_platform_id*) malloc(
+                    numPlatforms * sizeof(cl_platform_id));
+            if (platforms == (cl_platform_id*) NULL) {
+                return (1);
+            }
+            status = clGetPlatformIDs(numPlatforms, platforms, NULL);
+
+            if (status != CL_SUCCESS) {
+                return (1);
+            }
+
+            for (i = 0; i < numPlatforms; i++) {
+                status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
+                        sizeof(platformName), platformName, NULL);
+
+                if (status != CL_SUCCESS) {
+                    return (1);
+                }
+                gpuInfo->platform = platforms[i];
+
+                //if (!strcmp(platformName, "Intel(R) Coporation"))
+                //if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
+                {
+                    gpuInfo->platform = platforms[i];
+
+                    status = clGetDeviceIDs(gpuInfo->platform, // platform
+												CL_DEVICE_TYPE_GPU, // device_type
+												0, // num_entries
+												NULL, // devices
+												&numDevices);
+
+                    if (status != CL_SUCCESS) {
+                        return (1);
+                    }
+
+                    if (numDevices) {
+                        break;
+                    }
+                }
+            }
+            free(platforms);
+        }
+        if (NULL == gpuInfo->platform) {
+            return (1);
+        }
+
+        // Use available platform.
+
+        cps[0] = CL_CONTEXT_PLATFORM;
+        cps[1] = (cl_context_properties) gpuInfo->platform;
+        cps[2] = 0;
+        // Check for GPU.
+        gpuInfo->dType = CL_DEVICE_TYPE_GPU;
+        gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType, NULL,
+                NULL, &status);
+
+        if ((gpuInfo->context == (cl_context) NULL)
+                || (status != CL_SUCCESS)) {
+            gpuInfo->dType = CL_DEVICE_TYPE_CPU;
+            gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType,
+                    NULL, NULL, &status);
+        }
+        if ((gpuInfo->context == (cl_context) NULL)
+                || (status != CL_SUCCESS)) {
+            gpuInfo->dType = CL_DEVICE_TYPE_DEFAULT;
+            gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType,
+                    NULL, NULL, &status);
+        }
+        if ((gpuInfo->context == (cl_context) NULL)
+                || (status != CL_SUCCESS)) {
+            return (1);
+        }
+        // Detect OpenCL devices.
+        // First, get the size of device list data
+        status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, 0,
+                NULL, &length);
+        if ((status != CL_SUCCESS) || (length == 0)) {
+            return (1);
+        }
+        // Now allocate memory for device list based on the size we got earlier
+        gpuInfo->devices = (cl_device_id*) malloc(length);
+        if (gpuInfo->devices == (cl_device_id*) NULL) {
+            return (1);
+        }
+        // Now, get the device list data
+        status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, length,
+                gpuInfo->devices, NULL);
+        if (status != CL_SUCCESS) {
+            return (1);
+        }
+
+        // Create OpenCL command queue.
+        gpuInfo->commandQueue = clCreateCommandQueue(gpuInfo->context,
+                gpuInfo->devices[0], 0, &status);
+
+        if (status != CL_SUCCESS) {
             return (1);
         }
-        printf("compile_kernel_file successed.\n");
-        isInited = 1;
     }
 
-    return (0);
-}
+    status = clGetCommandQueueInfo(gpuInfo->commandQueue,
+            CL_QUEUE_THREAD_HANDLE_AMD, 0, NULL, NULL);
+
+    return 0;
 
-int OpenclDevice::release_opencl_run_env() {
-    return release_opencl_env(&gpu_env);
 }
+int OpenclDevice::RegisterKernelWrapper(const char *kernelName,cl_kernel_function function)
+{
+	int i;
+	printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.kernelCount);
+	for (i = 0; i < gpuEnv.kernelCount; i++)
+	{
+		if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0)
+		{
+			gpuEnv.kernelFunctions[i] = function;
+			return (1);
+		}
+	}
+		return (0);
+}
+
 
-void OpenclDevice::setOpenclState(int state) {
+void OpenclDevice::SetOpenclState(int state) {
+     //printf("OpenclDevice::setOpenclState...\n");
      isInited = state;
 }
 
-int OpenclDevice::getOpenclState() {
+int OpenclDevice::GetOpenclState() {
     return isInited;
 }
 //ocldbg
-int OclFormulaMin(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaMax(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaSum(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaCount(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaAverage(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaSumproduct(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaMinverse(void ** usrdata, KernelEnv *env) { return 0; }
 
 int OclFormulax(void ** usrdata, KernelEnv *env) {
     fprintf(stderr, "In OpenclDevice,...Formula_proc\n");
@@ -958,16 +790,16 @@ int OclFormulax(void ** usrdata, KernelEnv *env) {
 
     while (global_work_size[0] != 1) {
         global_work_size[0] = global_work_size[0] / 2;
-        status = clEnqueueNDRangeKernel(env->command_queue, env->kernel, 1,
+        status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1,
                 NULL, global_work_size, NULL, 0, NULL, NULL);
         CHECK_OPENCL(status)
 
     }
     //fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
-    status = clEnqueueReadBuffer(env->command_queue, formula_data, CL_FALSE, 0,
+    status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0,
             sizeof(float), (void *) &tdata, 0, NULL, NULL);
     CHECK_OPENCL(status)
-    status = clFinish(env->command_queue);
+    status = clFinish(env->commandQueue);
     CHECK_OPENCL(status)
 
     //PPAStopCpuEvent(ppa_proc);
@@ -986,46 +818,572 @@ int OclFormulax(void ** usrdata, KernelEnv *env) {
 
     return 0;
 }
-double OclCalc::OclProcess(cl_kernel_function function, double *data,
-        formulax type) {
-    fprintf(stderr, "\In OpenclDevice, proc...begin\n");
-    double ret = 0;
 
-    void *usrdata[2];
+int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
+
+    fprintf(stderr, "In OclFormulaxDll...\n");
+    cl_int clStatus;
+    int status;
+    size_t global_work_size[1];
+    float tdata[NUM];
+
+    double *data = (double *) usrdata[0];
+    const formulax type = *((const formulax *) usrdata[1]);
+    double ret = 0.0;
 
-    usrdata[0] = (void *) data;
-    usrdata[1] = (void *) &type;
+    for (int i = 0; i < NUM; i++) {
+        tdata[i] = (float) data[i];
+    }
 
-    run_kernel_wrapper(function, "oclformula", usrdata);
-    //fprintf(stderr, "\In OpenclDevice, proc...after run_kernel_wrapper\n");
-    return ret;
+    env->kernel = clCreateKernel(env->program, "oclformula", &clStatus);
+    //printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
+    //fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
+    int size = NUM;
+
+    cl_mem formula_data = clCreateBuffer(env->context,
+            (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
+            size * sizeof(float), (void *) tdata, &clStatus);
+    //fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
+
+    status = clSetKernelArg(env->kernel, 0, sizeof(cl_mem),
+            (void *) &formula_data);
+    CHECK_OPENCL(status)
+    status = clSetKernelArg(env->kernel, 1, sizeof(unsigned int),
+            (void *) &type);
+    CHECK_OPENCL(status)
+
+    global_work_size[0] = size;
+    //fprintf(stderr, "\nIn OpenclDevice,...after global_work_size\n");
+    //PPAStartCpuEvent(ppa_proc);
+
+    while (global_work_size[0] != 1) {
+        global_work_size[0] = global_work_size[0] / 2;
+        status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1,
+                NULL, global_work_size, NULL, 0, NULL, NULL);
+        CHECK_OPENCL(status)
+
+    }
+    //fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
+    status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0,
+            sizeof(float), (void *) &tdata, 0, NULL, NULL);
+    CHECK_OPENCL(status)
+    status = clFinish(env->commandQueue);
+    CHECK_OPENCL(status)
+
+    //PPAStopCpuEvent(ppa_proc);
+    //fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n");
+    status = clReleaseKernel(env->kernel);
+    CHECK_OPENCL(status)
+    status = clReleaseMemObject(formula_data);
+    CHECK_OPENCL(status)
+
+    if (type == AVG)
+        ret = (double) tdata[0] / NUM;
+    else
+        ret = (double) tdata[0];
+
+    printf("OclFormulaxDllxx:size = %d ret = %f.\n\n", NUM, ret);
+
+    return 0;
+}
+double OclCalc::OclProcess(cl_kernel_function function, double *data, formulax type)
+{
+	fprintf(stderr, "\n OpenclDevice, proc...begin\n");
+	double ret = 0;
+	void *usrdata[2];
+	usrdata[0] = (void *) data;
+	usrdata[1] = (void *) &type;
+	RunKernelWrapper(function, "oclformula", usrdata);
+	return ret;
 }
 
 double OclCalc::OclTest() {
     double data[NUM];
 
+    srand((unsigned int) time(NULL));
+
     for (int i = 0; i < NUM; i++) {
         data[i] = sc::rng::uniform();
         fprintf(stderr, "%f\t", data[i]);
     }
     OclProcess(&OclFormulax, data, AVG);
-    //fprintf(stderr, "\nIn OpenclDevice,OclTest() after proc,data0...%f\n", data[0]);
+    return 0.0;
+}
+
+double OclCalc::OclTestDll() {
+    double data[NUM];
+    srand((unsigned int) time(NULL));
 
+    for (int i = 0; i < NUM; i++) {
+        data[i] = rand() / (RAND_MAX + 1.0);
+        fprintf(stderr, "%f\t", data[i]);
+    }
+    OclProcess(&OclFormulaxDll, data, AVG);
     return 0.0;
 }
 
 OclCalc::OclCalc()
 {
-    OpenclDevice::init_opencl_run_env(0, NULL);
-    OpenclDevice::setOpenclState(1);
-    fprintf(stderr,"OclCalc:: init opencl.\n");
+    OpenclDevice::SetOpenclState(1);
+    fprintf(stderr,"OclCalc:: init opencl ok.\n");
 }
 
 OclCalc::~OclCalc()
 {
-    OpenclDevice::release_opencl_run_env();
-    OpenclDevice::setOpenclState(0);
-    fprintf(stderr,"OclCalc:: opencl end.\n");
+    OpenclDevice::SetOpenclState(0);
+    fprintf(stderr,"OclCalc:: opencl end ok.\n");
+}
+/////////////////////////////////////////////////////////////////////////////
+int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *output,int size) {
+	KernelEnv env;
+	const char *kernelName = "oclFormulaMax";
+	CheckKernelName(&env,kernelName);
+	cl_int clStatus;
+	size_t global_work_size[1];
+	int alignSize = size + end[0]-start[0];
+
+	env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+	cl_int ret=0;
+	cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
+		alignSize * sizeof(float), NULL, &clStatus);
+	cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+		size * sizeof(unsigned int), NULL, &ret);
+	cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+		size * sizeof(unsigned int), NULL, &ret);
+	cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+	size* sizeof(float), NULL, &ret);
+
+	float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+	int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+	int * hostMapEnd   = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+	for(int i=0;i<size;i++)
+	{
+		hostMapStart[i] = start[i];
+		hostMapEnd[i]	= end[i];
+	}
+	for(int i=0;i<alignSize;i++)
+		hostMapSrc[i] = (float)srcData[i];
+	clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+	clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+		(void *)&inputCl);
+	clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+		(void *)&startCl);
+	clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+		(void *)&endCl);
+	clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
+		(void *)&outputCl);
+	CHECK_OPENCL(clStatus);
+
+	global_work_size[0] = size;
+	clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+		NULL, global_work_size, NULL, 0, NULL, NULL);
+	CHECK_OPENCL(clStatus)
+
+	float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<size;i++)
+		output[i]=outPutMap[i];
+
+	clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
+	clStatus = clFinish(env.commandQueue);
+
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseKernel(env.kernel);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(inputCl);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(startCl);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(endCl);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(outputCl);
+	CHECK_OPENCL(clStatus);
+	return 0;
+}
+int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size) {
+	KernelEnv env;
+	const char *kernelName = "oclFormulaMin";
+	CheckKernelName(&env,kernelName);
+
+	cl_int clStatus;
+	size_t global_work_size[1];
+	int alignSize = size + end[0]-start[0];
+
+	env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+	cl_int ret=0;
+	cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
+		alignSize * sizeof(float), NULL, &clStatus);
+	cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+		size * sizeof(unsigned int), NULL, &ret);
+	cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+		size * sizeof(unsigned int), NULL, &ret);
+	cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+	size* sizeof(float), NULL, &ret);
+
+	float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+	int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+	int * hostMapEnd   = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+	for(int i=0;i<size;i++)
+	{
+		hostMapStart[i] = start[i];
+		hostMapEnd[i]	= end[i];
+	}
+	for(int i=0;i<alignSize;i++)
+		hostMapSrc[i] = (float)srcData[i];
+	clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+	clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+		(void *)&inputCl);
+	clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+		(void *)&startCl);
+	clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+		(void *)&endCl);
+	clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
+		(void *)&outputCl);
+	CHECK_OPENCL(clStatus);
+
+	global_work_size[0] = size;
+	clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+		NULL, global_work_size, NULL, 0, NULL, NULL);
+	CHECK_OPENCL(clStatus)
+
+	float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<size;i++)
+		output[i]=outPutMap[i];
+
+	clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
+	clStatus = clFinish(env.commandQueue);
+
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseKernel(env.kernel);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(inputCl);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(startCl);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(endCl);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(outputCl);
+	CHECK_OPENCL(clStatus);
+	return 0;
+}
+int OclCalc::OclHostFormulaAverage(double *srcData,int *start,int *end,double *output,int size) {
+	KernelEnv env;
+	const char *kernelName = "oclFormulaAverage";
+	CheckKernelName(&env,kernelName);
+
+	cl_int clStatus;
+	size_t global_work_size[1];
+	int alignSize = size + end[0]-start[0];
+
+	env.kernel = clCreateKernel(env.program, kernelName, &clStatus);
+	cl_int ret=0;
+	cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
+		alignSize * sizeof(float), NULL, &clStatus);
+	cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+		size * sizeof(unsigned int), NULL, &ret);
+	cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+		size * sizeof(unsigned int), NULL, &ret);
+	cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+	size* sizeof(float), NULL, &ret);
+
+	float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+	int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+	int * hostMapEnd   = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+	for(int i=0;i<size;i++)
+	{
+		hostMapStart[i] = start[i];
+		hostMapEnd[i]	= end[i];
+	}
+	for(int i=0;i<alignSize;i++)
+		hostMapSrc[i] = (float)srcData[i];
+	clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+	clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+		(void *)&inputCl);
+	clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+		(void *)&startCl);
+	clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+		(void *)&endCl);
+	clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
+		(void *)&outputCl);
+	CHECK_OPENCL(clStatus);
+
+	global_work_size[0] = size;
+	clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+		NULL, global_work_size, NULL, 0, NULL, NULL);
+	CHECK_OPENCL(clStatus)
+
+	float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<size;i++)
+		output[i]=outPutMap[i];
+
+	clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
+	clStatus = clFinish(env.commandQueue);
+
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseKernel(env.kernel);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(inputCl);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(startCl);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(endCl);
+	CHECK_OPENCL(clStatus);
+	clStatus = clReleaseMemObject(outputCl);
+	CHECK_OPENCL(clStatus);
+	return 0;
+
+
+}
+
+
+int OclCalc::OclHostSignedAdd(double *lData,double *rData,double *rResult,int dSize) {
+
+	KernelEnv env;
+	int status;
+	const char *kernelName = "oclSignedAdd";
+	CheckKernelName(&env,kernelName);
+
+
+	cl_int clStatus;
+	size_t global_work_size[1];
+
+	env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+	cl_mem clLiftData = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+	cl_mem clRightData = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+	cl_mem clResult = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+
+	float * hostMapLeftData 	= (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	float * hostMapRightData 	= (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<dSize;i++)
+	{
+		hostMapLeftData[i] 	= (float)lData[i];
+		hostMapRightData[i] = (float)rData[i];
+	}
+	clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+	status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+		(void *)&clLiftData);
+	status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+		(void *)&clRightData);
+	status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+		(void *)&clResult);
+	CHECK_OPENCL(status)
+	global_work_size[0] = dSize;
+	status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+		NULL, global_work_size, NULL, 0, NULL, NULL);
+	CHECK_OPENCL(status);
+
+	float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<dSize;i++)
+		rResult[i]=hostMapResult[i];
+	clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
+
+	CHECK_OPENCL(status);
+	status = clFinish(env.commandQueue);
+	CHECK_OPENCL(status);
+	status = clReleaseKernel(env.kernel);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clLiftData);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clRightData);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clResult);
+	CHECK_OPENCL(status);
+	return 0;
+}
+int OclCalc::OclHostSignedMul(double *lData,double *rData,double *rResult,int dSize) {
+	KernelEnv env;
+	int status;
+	const char *kernelName = "oclSignedMul";
+	CheckKernelName(&env,kernelName);
+
+
+	size_t global_work_size[1];
+	cl_int clStatus;
+	env.kernel = clCreateKernel(env.program, kernelName, &clStatus);
+	cl_mem clLiftData = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+	cl_mem clRightData = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+	cl_mem clResult = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+
+	float * hostMapLeftData 	= (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	float * hostMapRightData 	= (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<dSize;i++)
+	{
+		hostMapLeftData[i] 	= (float)lData[i];
+		hostMapRightData[i] = (float)rData[i];
+	}
+	clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+	status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+		(void *)&clLiftData);
+	status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+		(void *)&clRightData);
+	status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+		(void *)&clResult);
+	CHECK_OPENCL(status)
+	global_work_size[0] = dSize;
+	status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+		NULL, global_work_size, NULL, 0, NULL, NULL);
+	CHECK_OPENCL(status);
+
+	float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<dSize;i++)
+		rResult[i]=hostMapResult[i];
+	clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
+
+	CHECK_OPENCL(status);
+	status = clFinish(env.commandQueue);
+	CHECK_OPENCL(status);
+	status = clReleaseKernel(env.kernel);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clLiftData);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clRightData);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clResult);
+	CHECK_OPENCL(status);
+	return 0;
+}
+int OclCalc::OclHostSignedSub(double *lData,double *rData,double *rResult,int dSize) {
+	KernelEnv env;
+	int status;
+	const char *kernelName = "oclSignedSub";
+	CheckKernelName(&env,kernelName);
+
+	cl_int clStatus;
+	size_t global_work_size[1];
+	env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+	cl_mem clLiftData = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+	cl_mem clRightData = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+	cl_mem clResult = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+
+	float * hostMapLeftData 	= (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	float * hostMapRightData 	= (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<dSize;i++)
+	{
+		hostMapLeftData[i] 	= (float)lData[i];
+		hostMapRightData[i] = (float)rData[i];
+	}
+	clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+	status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+		(void *)&clLiftData);
+	status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+		(void *)&clRightData);
+	status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+		(void *)&clResult);
+	CHECK_OPENCL(status)
+	global_work_size[0] = dSize;
+	status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+		NULL, global_work_size, NULL, 0, NULL, NULL);
+	CHECK_OPENCL(status);
+
+	float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<dSize;i++)
+		rResult[i]=hostMapResult[i];
+	clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
+
+	CHECK_OPENCL(status);
+	status = clFinish(env.commandQueue);
+	CHECK_OPENCL(status);
+	status = clReleaseKernel(env.kernel);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clLiftData);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clRightData);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clResult);
+	CHECK_OPENCL(status);
+	return 0;
+}
+int OclCalc::OclHostSignedDiv(double *lData,double *rData,double *rResult,int dSize) {
+	KernelEnv env;
+	int status;
+	const char *kernelName = "oclSignedDiv";
+	CheckKernelName(&env,kernelName);
+
+
+	size_t global_work_size[1];
+	cl_int clStatus;
+	env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+	cl_mem clLiftData = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+	cl_mem clRightData = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+	cl_mem clResult = clCreateBuffer(env.context,
+		(cl_mem_flags) (CL_MEM_READ_WRITE),
+		dSize * sizeof(float), NULL, &clStatus);
+
+	float * hostMapLeftData 	= (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	float * hostMapRightData 	= (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<dSize;i++)
+	{
+		hostMapLeftData[i] 	= (float)lData[i];
+		hostMapRightData[i] = (float)rData[i];
+	}
+	clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+	clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+	status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+		(void *)&clLiftData);
+	status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+		(void *)&clRightData);
+	status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+		(void *)&clResult);
+	CHECK_OPENCL(status)
+	global_work_size[0] = dSize;
+	status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+		NULL, global_work_size, NULL, 0, NULL, NULL);
+	CHECK_OPENCL(status);
+
+	float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+	for(int i=0;i<dSize;i++)
+		rResult[i]=hostMapResult[i];
+	clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
+
+	CHECK_OPENCL(status);
+	status = clFinish(env.commandQueue);
+	CHECK_OPENCL(status);
+	status = clReleaseKernel(env.kernel);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clLiftData);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clRightData);
+	CHECK_OPENCL(status);
+	status = clReleaseMemObject(clResult);
+	CHECK_OPENCL(status);
+	return 0;
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index 4646954..d3b5354 100644
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -5,16 +5,6 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * This file incorporates work covered by the following license notice:
- *
- *   Licensed to the Apache Software Foundation (ASF) under one or more
- *   contributor license agreements. See the NOTICE file distributed
- *   with this work for additional information regarding copyright
- *   ownership. The ASF licenses this file to you under the Apache
- *   License, Version 2.0 (the "License"); you may not use this file
- *   except in compliance with the License. You may obtain a copy of
- *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  */
 
 #ifndef _OPENCL_WRAPPER_H_
@@ -32,20 +22,20 @@
 #define strcasecmp strcmp
 #endif
 #endif
-
+#define ENABLE_OPENCL //dbg
 typedef struct _KernelEnv {
     cl_context context;
-    cl_command_queue command_queue;
+    cl_command_queue commandQueue;
     cl_program program;
     cl_kernel kernel;
-    char kernel_name[150];
+    char kernelName[150];
 } KernelEnv;
 
 typedef struct _OpenCLEnv {
     cl_platform_id platform;
     cl_context context;
     cl_device_id devices;
-    cl_command_queue command_queue;
+    cl_command_queue commandQueue;
 } OpenCLEnv;
 
 #if defined __cplusplus
@@ -64,10 +54,10 @@ typedef int (*cl_kernel_function)(void **userdata, KernelEnv *kenv);
 #define CHECK_OPENCL(status)              \
 if(status != CL_SUCCESS)                  \
 {                                         \
-    printf ("error code is %d.",status);  \
+    printf ("error code is %d.\n",status);  \
     return (0);                           \
 }
-#endif
+
 
 #define MAX_KERNEL_STRING_LEN   64
 #define MAX_CLFILE_NUM 50
@@ -77,27 +67,17 @@ if(status != CL_SUCCESS)                  \
 typedef struct _GPUEnv {
     //share vb in all modules in hb library
     cl_platform_id platform;
-
     cl_device_type dType;
-
     cl_context context;
-
     cl_device_id *devices;
-
     cl_device_id dev;
-
-    cl_command_queue command_queue;
-
+    cl_command_queue commandQueue;
     cl_kernel kernels[MAX_CLFILE_NUM];
-
     cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file
-
     char kernelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256
-         kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
-
-    cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
-
-    int kernel_count, file_count, // only one kernel file
+		 kernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
+		 cl_kernel_function kernelFunctions[MAX_CLKERNEL_NUM];
+    int kernelCount, fileCount, // only one kernel file
         isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
 
 } GPUEnv;
@@ -107,66 +87,96 @@ typedef struct {
     char *kernelStr;
 } kernel_node;
 
+class OpenclCalcBase{
+public:
+    OpenclCalcBase(){};
+    virtual ~OpenclCalcBase(){};
+    virtual int OclHostSignedAdd(double *lData,double *rData,double *rResult,int rowSize)=0;
+    virtual int OclHostSignedSub(double *lData,double *rData,double *rResult,int rowSize)=0;
+    virtual int OclHostSignedMul(double *lData,double *rData,double *rResult,int rowSize)=0;
+    virtual int OclHostSignedDiv(double *lData,double *rData,double *rResult,int rowSize)=0;
+    virtual int OclHostFormulaMax(double *srcData,int *startPos,int *endPos,double *output,int outputSize)=0;
+    virtual int OclHostFormulaMin(double *srcData,int *startPos,int *endPos,double *output,int outputSize)=0;
+    virtual int OclHostFormulaAverage(double *srcData,int *startPos,int *endPos,double *output,int outputSize)=0;
+
+};
+
+
 class OpenclDevice {
-private:
-    GPUEnv gpu_env;
-    int isInited;
 
 public:
+    static GPUEnv gpuEnv;
+    static int isInited;
     OpenclDevice();
     ~OpenclDevice();
-    int regist_opencl_kernel();
-    int convert_to_string(const char *filename, char **source);
-    int binary_generated(cl_context context, const char * cl_file_name,
-            FILE ** fhandle);
-    int write_binary_to_file(const char* fileName, const char* birary,
-            size_t numBytes);
-    int generat_bin_from_kernel_source(cl_program program,
-            const char * cl_file_name);
-    int init_opencl_attr(OpenCLEnv * env);
-    int create_kernel(char * kernelname, KernelEnv * env);
-    int release_kernel(KernelEnv * env);
-    int init_opencl_env(GPUEnv *gpu_info);
-    int release_opencl_env(GPUEnv *gpu_info);
-    int run_kernel_wrapper(cl_kernel_function function, char * kernel_name,
-            void **usrdata);
-    int register_kernel_wrapper(const char *kernel_name,
-            cl_kernel_function function);
-    int cached_of_kerner_prg(const GPUEnv *gpu_env_cached,
-            const char * cl_file_name);
-    int compile_kernel_file(GPUEnv *gpu_info, const char *build_option);
-    int compile_kernel_file(const char *filename, GPUEnv *gpu_info,
-            const char *build_option);
-    int get_kernel_env_and_func(const char *kernel_name, KernelEnv *env,
-            cl_kernel_function *function);
-    int run_kernel(const char *kernel_name, void **userdata);
-    int init_opencl_run_env(int argc, const char *build_option_kernelfiles);
-    int init_opencl_run_env(int argc, const char *argv_kernelfiles[],
-            const char *build_option_kernelfiles);
-    int release_opencl_run_env();
-    void setOpenclState(int state);
-    int getOpenclState();
-    inline int add_kernel_cfg(int kCount, const char *kName);
+    static int InitEnv();
+    static int RegistOpenclKernel();
+    static int ReleaseOpenclRunEnv();
+    static int InitOpenclRunEnv(GPUEnv *gpu);
+    static int ReleaseOpenclEnv(GPUEnv *gpuInfo);
+    static int CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption);
+    static int InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles);
+    static int CachedOfKernerPrg(const GPUEnv *gpuEnvCached, const char * clFileName);
+    static int GeneratBinFromKernelSource(cl_program program, const char * clFileName);
+    static int WriteBinaryToFile(const char* fileName, const char* birary, size_t numBytes);
+    static int BinaryGenerated(const char * clFileName, FILE ** fhandle);
+    static int CompileKernelFile(const char *filename, GPUEnv *gpuInfo, const char *buildOption);
+
+    int ReleaseKernel(KernelEnv * env);
+    int InitOpenclAttr(OpenCLEnv * env);
+    int CreateKernel(char * kernelname, KernelEnv * env);
+    int RunKernel(const char *kernelName, void **userdata);
+    int ConvertToString(const char *filename, char **source);
+    int CheckKernelName(KernelEnv *envInfo,const char *kernelName);
+    int RegisterKernelWrapper(const char *kernelName,cl_kernel_function function);
+    int RunKernelWrapper(cl_kernel_function function, const char * kernelName, void **usrdata);
+    int GetKernelEnvAndFunc(const char *kernelName, KernelEnv *env,cl_kernel_function *function);
+
+
+#ifdef WIN32
+    static int LoadOpencl();
+    static int OpenclInite();
+    static void FreeOpenclDll();
+#endif
+
+    int GetOpenclState();
+    void SetOpenclState(int state);
+    inline static int AddKernelConfig(int kCount, const char *kName);
 
 };
 
 #define NUM 4//(16*16*16)
 typedef enum _formulax_ {
-    MIN, MAX, SUM, AVG, COUNT, SUMPRODUCT, MINVERSE
+	MIN,
+	MAX,
+	SUM,
+	AVG,
+	COUNT,
+	SUMPRODUCT,
+	MINVERSE,
+	SIGNEDADD,
+	SIGNEDNUL,
+	SIGNEDDIV,
+	SIGNEDSUB
 } formulax;
-class OclCalc: public OpenclDevice {
+
+class OclCalc: public OpenclDevice,OpenclCalcBase {
+
 public:
     OclCalc();
     ~OclCalc();
-    double OclProcess(cl_kernel_function function, double *data, formulax type);
     double OclTest();
+	double OclTestDll();
     double OclMin();
-    double OclMax();
-    double OclSum();
-    double OclCount();
-    double OclAverage();
-    double OclSumproduct();
-    double OclMinverse();
-
+	double OclProcess(cl_kernel_function function, double *data, formulax type);
+	int OclHostSignedAdd(double *lData,double *rData,double *rResult,int rowSize);
+	int OclHostSignedSub(double *lData,double *rData,double *rResult,int rowSize);
+	int OclHostSignedMul(double *lData,double *rData,double *rResult,int rowSize);
+	int OclHostSignedDiv(double *lData,double *rData,double *rResult,int rowSize);
+	int OclHostFormulaMax(double *srcData,int *startPos,int *endPos,double *output,int outputSize);
+	int OclHostFormulaMin(double *srcData,int *startPos,int *endPos,double *output,int outputSize);
+	int OclHostFormulaAverage(double *srcData,int *startPos,int *endPos,double *output,int outputSize);
 };
 
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/formulagroup.cxx b/sc/source/core/tool/formulagroup.cxx
index 1ee57b5..88c33cf 100644
--- a/sc/source/core/tool/formulagroup.cxx
+++ b/sc/source/core/tool/formulagroup.cxx
@@ -13,17 +13,64 @@
 #include "tokenarray.hxx"
 #include "compiler.hxx"
 #include "interpre.hxx"
-
 #include "formula/vectortoken.hxx"
 
+#ifdef ENABLE_OPENCL
+#include "openclwrapper.hxx"
+#endif
+
 namespace sc {
 
 FormulaGroupInterpreter::FormulaGroupInterpreter(
     ScDocument& rDoc, const ScAddress& rTopPos, const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode) :
     mrDoc(rDoc), maTopPos(rTopPos), mxGroup(xGroup), mrCode(rCode) {}
 
+/////time test dbg
+double getTimeDiff(const TimeValue& t1, const TimeValue& t2)
+{
+    double tv1 = t1.Seconds;
+    double tv2 = t2.Seconds;
+    tv1 += t1.Nanosec / 1000000000.0;
+    tv2 += t2.Nanosec / 1000000000.0;
+
+    return tv1 - tv2;
+}//dbg-t
+TimeValue aTimeBefore, aTimeAfter;
+///////////////////////////////////////
+
 bool FormulaGroupInterpreter::interpret()
 {
+#ifdef ENABLE_OPENCL //dbg
+    size_t rowSize = mxGroup->mnLength, srcSize = 0;
+    fprintf(stderr,"rowSize at begin is ...%ld.\n",rowSize);
+    int *rangeStart =NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100))
+    int *rangeEnd = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100))
+    // The row quantity can be gotten from p2->GetArrayLength()
+    int count1 =0,count2 =0,count3=0;
+    int oclOp=0;
+    double *srcData = NULL; // Point to the input data from CPU
+    double *rResult=NULL; // Point to the output data from GPU
+    double *leftData=NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData)
+    double *rightData=NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData)
+                            // The rightData can't be zero for "/"
+
+    leftData  = (double *)malloc(sizeof(double) * rowSize);
+    rightData = (double *)malloc(sizeof(double) * rowSize);
+    rResult   = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C)
+    srcData = (double *)calloc(rowSize,sizeof(double));
+
+    rangeStart =(int *)malloc(sizeof(int) * rowSize);
+    rangeEnd   =(int *)malloc(sizeof(int) * rowSize);
+
+    memset(rResult,0,rowSize);
+    if(NULL==leftData||NULL==rightData||
+           NULL==rResult||NULL==rangeStart||NULL==rangeEnd)
+    {
+        printf("malloc err\n");
+        return false;
+    }
+    // printf("rowSize is %d.\n",rowsize);
+#endif
     // Until we implement group calculation for real, decompose the group into
     // individual formula token arrays for individual calculation.
     ScAddress aTmpPos = maTopPos;
@@ -51,12 +98,31 @@ bool FormulaGroupInterpreter::interpret()
                     size_t nRowEnd = p2->GetRefRowSize() - 1;
                     if (!p2->IsEndFixed())
                         nRowEnd += i;
-
                     size_t nRowSize = nRowEnd - nRowStart + 1;
                     ScMatrixRef pMat(new ScMatrix(nColSize, nRowSize, 0.0));
+#ifdef ENABLE_OPENCL
+                    //srcSize = rowSize+nRowSize-rowSize%nRowSize;//align as nRowSize
+                    //srcData = (double *)calloc(srcSize,sizeof(double));
+                    rangeStart[i] = nRowStart;//record the start position
+                    rangeEnd[i] = nRowEnd;//record the end position
+#endif
                     for (size_t nCol = 0; nCol < nColSize; ++nCol)
                     {
                         const double* pArray = rArrays[nCol];
+#ifdef ENABLE_OPENCL
+                        //printf("pArray is %p.\n",pArray);
+                        if( NULL==pArray )
+                        {
+                            fprintf(stderr,"Error: pArray is NULL!\n");
+                            return false;
+                        }
+                        //fprintf(stderr,"(rowSize+nRowSize-1) is %d.\n",rowSize+nRowSize-1);
+                        for( size_t u=0; u<rowSize; u++ )
+                        {
+                            srcData[u] = pArray[u];// note:rowSize<=srcSize
+                            //fprintf(stderr,"srcData[%d] is %f.\n",u,srcData[u]);
+                        }
+#endif
                         for (size_t nRow = 0; nRow < nRowSize; ++nRow)
                         {
                             if (nRowStart + nRow < p2->GetArrayLength())
@@ -80,16 +146,126 @@ bool FormulaGroupInterpreter::interpret()
         if (!pDest)
             return false;
 
-        ScCompiler aComp(&mrDoc, aTmpPos, aCode2);
-        aComp.SetGrammar(mrDoc.GetGrammar());
-        aComp.CompileTokenArray(); // Create RPN token array.
-        ScInterpreter aInterpreter(pDest, &mrDoc, aTmpPos, aCode2);
-        aInterpreter.Interpret();
+#ifdef ENABLE_OPENCL
+        const formula::FormulaToken *pCur = aCode2.First();
+        aCode2.Reset();
+        while( ( pCur = aCode2.Next() ) != NULL )
+        {
+            OpCode eOp = pCur->GetOpCode();
+            if(eOp==0)
+            {
+                  if(count3%2==0)
+                    leftData[count1++] = pCur->GetDouble();
+                   else
+                    rightData[count2++] = pCur->GetDouble();
+                count3++;
+               }
+               else if( eOp!=ocOpen && eOp!=ocClose )
+                oclOp = eOp;
 
-        pDest->SetResultToken(aInterpreter.GetResultToken().get());
-        pDest->ResetDirty();
-        pDest->SetChanged(true);
-    }
+//            if(count1>0){//dbg
+//                fprintf(stderr,"leftData is %f.\n",leftData[count1-1]);
+//                count1--;
+//            }
+//            if(count2>0){//dbg
+//                fprintf(stderr,"rightData is %f.\n",rightData[count2-1]);
+//                count2--;
+//            }
+        }
+#endif
+        if(getenv("SC_FORMULAGROUP")&&(!getenv("SC_GPU"))){
+            fprintf(stderr,"ccCPU flow...\n\n");
+            ScCompiler aComp(&mrDoc, aTmpPos, aCode2);
+            aComp.SetGrammar(mrDoc.GetGrammar());
+            aComp.CompileTokenArray(); // Create RPN token array.
+            ScInterpreter aInterpreter(pDest, &mrDoc, aTmpPos, aCode2);
+            aInterpreter.Interpret();
+            pDest->SetResultToken(aInterpreter.GetResultToken().get());
+            pDest->ResetDirty();
+            pDest->SetChanged(true);
+        }
+    } // for loop end (mxGroup->mnLength)
+    // For GPU calculation
+#ifdef ENABLE_OPENCL //dbg: Using "export SC_FORMULAGROUP=1;export SC_GPU=1" to open if{} in terminal
+    if(getenv("SC_FORMULAGROUP")&&(getenv("SC_GPU"))){
+            fprintf(stderr,"ggGPU flow...\n\n");
+            printf(" oclOp is... %d\n",oclOp);
+osl_getSystemTime(&aTimeBefore);//timer
+            static OclCalc ocl_calc;
+            switch(oclOp)
+            {
+                case ocAdd:
+                       ocl_calc.OclHostSignedAdd(leftData,rightData,rResult,count1);
+                    break;
+                case ocSub:
+                    ocl_calc.OclHostSignedSub(leftData,rightData,rResult,count1);
+                    break;
+                case ocMul:
+                    ocl_calc.OclHostSignedMul(leftData,rightData,rResult,count1);
+                    break;
+                case ocDiv:
+                    ocl_calc.OclHostSignedDiv(leftData,rightData,rResult,count1);
+                    break;
+                case ocMax:
+                    ocl_calc.OclHostFormulaMax(srcData,rangeStart,rangeEnd,rResult,rowSize);
+                    break;
+                case ocMin:
+                    ocl_calc.OclHostFormulaMin(srcData,rangeStart,rangeEnd,rResult,rowSize);
+                    break;
+                case ocAverage:
+                    ocl_calc.OclHostFormulaAverage(srcData,rangeStart,rangeEnd,rResult,rowSize);
+                    break;
+                default:
+                    fprintf(stderr,"No OpenCL function for this calculation.\n");
+                    break;
+            }
+/////////////////////////////////////////////////////
+osl_getSystemTime(&aTimeAfter);
+double diff = getTimeDiff(aTimeAfter, aTimeBefore);
+//if (diff >= 1.0)
+{
+    fprintf(stderr,"OpenCL,diff...%f.\n",diff);
+
+}
+/////////////////////////////////////////////////////
+
+//rResult[i];
+//            for(sal_Int32 i = 0; i < rowSize; ++i){//dbg output results
+//                fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]);
+//            }
+
+// We want to stuff the double data, which in rResult[i] from GPU calculated well, to UI view for users
+                for (sal_Int32 i = 0; i < mxGroup->mnLength; ++i)
+                    {
+                    ScFormulaCell* pDestx = mrDoc.GetFormulaCell(aTmpPos);
+                    if (!pDestx)
+                        return false;
+                    formula::FormulaTokenRef xResult = new formula::FormulaDoubleToken(rResult[i]);
+                    pDestx->SetResultToken(xResult.get());
+                    pDestx->ResetDirty();
+                    pDestx->SetChanged(true);
+                    aTmpPos.SetRow(mxGroup->mnStart + i + 1);
+                 }
+        }
+
+        if(leftData)
+            free(leftData);
+        if(rightData)
+            free(rightData);
+        if(rangeStart)
+            free(rangeStart);
+        if(rangeEnd)
+            free(rangeEnd);
+        if(rResult)
+            free(rResult);
+        if(srcData)
+            free(srcData);
+
+if(getenv("SC_GPUSAMPLE")){
+    //fprintf(stderr,"FormulaGroupInterpreter::interpret(),iniflag...%d\n",ocl_calc.GetOpenclState());
+    //ocl_calc.OclTest();//opencl test sample for debug
+}
+#endif
 
     return true;
 }
diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx
index afe994e..ce0ce41 100644
--- a/sc/source/core/tool/interpr1.cxx
+++ b/sc/source/core/tool/interpr1.cxx
@@ -4327,10 +4327,6 @@ void ScInterpreter::ScProduct()
 void ScInterpreter::ScAverage( bool bTextAsZero )
 {
     RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "sc", "er", "ScInterpreter::ScAverage" );
-#ifdef ENABLE_OPENCL
-    static OclCalc ocl_calc;
-    ocl_calc.OclTest();
-#endif
     PushDouble( IterateParameters( ifAVERAGE, bTextAsZero ) );
 }
 
diff --git a/sc/source/ui/app/scmod.cxx b/sc/source/ui/app/scmod.cxx
index ac3daad..05cef41 100644
--- a/sc/source/ui/app/scmod.cxx
+++ b/sc/source/ui/app/scmod.cxx
@@ -101,6 +101,10 @@
 #include "scabstdlg.hxx"
 #include "formula/errorcodes.hxx"
 
+#ifdef ENABLE_OPENCL
+#include "openclwrapper.hxx"
+#endif
+
 #define SC_IDLE_MIN     150
 #define SC_IDLE_MAX     3000
 #define SC_IDLE_STEP    75
@@ -148,6 +152,9 @@ ScModule::ScModule( SfxObjectFactory* pFact ) :
     mbIsInSharedDocLoading( false ),
     mbIsInSharedDocSaving( false )
 {
+#ifdef ENABLE_OPENCL
+    OclCalc::InitEnv();
+#endif
     //  im ctor ist der ResManager (DLL-Daten) noch nicht initialisiert!
 
     SetName(OUString("StarCalc"));       // fuer Basic
@@ -181,6 +188,9 @@ ScModule::ScModule( SfxObjectFactory* pFact ) :
 
 ScModule::~ScModule()
 {
+#ifdef ENABLE_OPENCL
+    OclCalc::ReleaseOpenclRunEnv();
+#endif
     OSL_ENSURE( !pSelTransfer, "Selection Transfer object not deleted" );
 
     //  InputHandler braucht nicht mehr geloescht zu werden (gibt keinen an der App mehr)