[Libreoffice-commits] core.git: Branch 'feature/calc-group-interpreter' - 2 commits - sc/source
Michael Meeks
michael.meeks at suse.com
Wed Jun 26 04:52:19 PDT 2013
sc/source/core/opencl/oclkernels.hxx | 144 ++-
sc/source/core/opencl/openclwrapper.cxx | 1469 +++++++++++++++++++-------------
sc/source/core/opencl/openclwrapper.hxx | 160 +--
sc/source/core/tool/formulagroup.cxx | 198 ++++
sc/source/core/tool/interpr1.cxx | 4
sc/source/ui/app/scmod.cxx | 10
6 files changed, 1290 insertions(+), 695 deletions(-)
New commits:
commit e984d8f9e3dbfa39c1e2a21f9e5d81d97d556fba
Author: Michael Meeks <michael.meeks at suse.com>
Date: Wed Jun 26 12:52:53 2013 +0100
avoid srand / time / rand calls.
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index bcb9cd8..b06af59 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -900,8 +900,6 @@ double OclCalc::OclProcess(cl_kernel_function function, double *data, formulax t
double OclCalc::OclTest() {
double data[NUM];
- srand((unsigned int) time(NULL));
-
for (int i = 0; i < NUM; i++) {
data[i] = sc::rng::uniform();
fprintf(stderr, "%f\t", data[i]);
@@ -912,10 +910,9 @@ double OclCalc::OclTest() {
double OclCalc::OclTestDll() {
double data[NUM];
- srand((unsigned int) time(NULL));
for (int i = 0; i < NUM; i++) {
- data[i] = rand() / (RAND_MAX + 1.0);
+ data[i] = sc::rng::uniform();
fprintf(stderr, "%f\t", data[i]);
}
OclProcess(&OclFormulaxDll, data, AVG);
commit 0b9381812bab1cd85925cbc8a707185fd0a13e2c
Author: Jing Xian <jingxian at multicorewareinc.com>
Date: Wed Jun 26 12:19:51 2013 +0100
more work on formula interpretation.
diff --git a/sc/source/core/opencl/oclkernels.hxx b/sc/source/core/opencl/oclkernels.hxx
index f9db447..3269f3a 100644
--- a/sc/source/core/opencl/oclkernels.hxx
+++ b/sc/source/core/opencl/oclkernels.hxx
@@ -6,17 +6,15 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
-
#ifndef _OCL_KERNEL_H_
#define _OCL_KERNEL_H_
#ifndef USE_EXTERNAL_KERNEL
#define KERNEL( ... )# __VA_ARGS__
+
/////////////////////////////////////////////
const char *kernel_src = KERNEL(
-//Add kernel here
-//use \n ... \n to define macro
__kernel void hello(__global uint *buffer)
{
@@ -27,83 +25,134 @@ buffer[idx]=idx;
}
__kernel void oclformula(__global float *data,
- const uint type)
+ const uint type)
{
- const unsigned int i = get_global_id(0);
-
- switch (type)
- {
- case 0: //MAX
- {
- //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
- if(data[2*i]>data[2*i+1])
- data[i] = data[2*i];
- else
- data[i] = data[2*i+1];
- break;
- }
- case 1: //MIN
- {
- //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
- if(data[2*i]<data[2*i+1])
- data[i] = data[2*i];
- else
- data[i] = data[2*i+1];
- break;
- }
- case 2: //SUM
- case 3: //AVG
- {
- //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
- data[i] = data[2*i] + data[2*i+1];
- break;
- }
- default:
- break;
-
- }
+ const unsigned int i = get_global_id(0);
+
+ switch (type)
+ {
+ case 0: //MAX
+ {
+ //printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
+ if(data[2*i]>data[2*i+1])
+ data[i] = data[2*i];
+ else
+ data[i] = data[2*i+1];
+ break;
+ }
+ case 1: //MIN
+ {
+ //printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
+ if(data[2*i]<data[2*i+1])
+ data[i] = data[2*i];
+ else
+ data[i] = data[2*i+1];
+ break;
+ }
+ case 2: //SUM
+ case 3: //AVG
+ {
+ //printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
+ data[i] = data[2*i] + data[2*i+1];
+ break;
+ }
+ default:
+ break;
+
+ }
+}
+
+__kernel void oclSignedAdd(__global float *ltData,__global float *rtData,__global float *otData)
+{
+ const unsigned int id = get_global_id(0);
+ otData[id] = ltData[id] + rtData[id];
}
-__kernel void oclFormulaMin(__global float *data,
- const uint type)
+
+__kernel void oclSignedSub(__global float *ltData,__global float *rtData,__global float *otData)
{
+ const unsigned int id = get_global_id(0);
+ otData[id] = ltData[id] - rtData[id];
}
-__kernel void oclFormulaMax(__global float *data,
- const uint type)
+__kernel void oclSignedMul(__global float *ltData,__global float *rtData,__global float *otData)
{
+ int id = get_global_id(0);
+ otData[id] =ltData[id] * rtData[id];
+}
+
+__kernel void oclSignedDiv(__global float *ltData,__global float *rtData,__global float *otData)
+{
+ const unsigned int id = get_global_id(0);
+ otData[id] = ltData[id] / rtData[id];
+}
+
+__kernel void oclFormulaMin(__global float *input,__global int *start,__global int *end,__global float *output)
+{
+ const unsigned int id = get_global_id(0);
+ int i=0;
+ unsigned int startFlag = start[id];
+ unsigned int endFlag = end[id];
+ float min = input[startFlag];
+ for(i=startFlag;i<=endFlag;i++)
+ {
+ if(input[i]<min)
+ min = input[i];
+ }
+ output[id] = min;
+
+}
+
+__kernel void oclFormulaMax(__global float *input,__global int *start,__global int *end,__global float *output)
+{
+ const unsigned int id = get_global_id(0);
+ int i=0;
+ unsigned int startFlag = start[id];
+ unsigned int endFlag = end[id];
+ float max = input[startFlag];
+ for(i=startFlag;i<=endFlag;i++)
+ {
+ if(input[i]>max)
+ max = input[i];
+ }
+ output[id] = max;
}
__kernel void oclFormulaSum(__global float *data,
- const uint type)
+ const uint type)
{
}
__kernel void oclFormulaCount(__global float *data,
- const uint type)
+ const uint type)
{
}
-__kernel void oclFormulaAverage(__global float *data,
- const uint type)
+__kernel void oclFormulaAverage(__global float *input,__global int *start,__global int *end,__global float *output)
{
+ const unsigned int id = get_global_id(0);
+ int i=0;
+ float sum=0;
+ for(i = start[id];i<=end[id];i++)
+ sum += input[i];
+ output[id] = sum / (end[id]-start[id]+1);
}
__kernel void oclFormulaSumproduct(__global float *data,
- const uint type)
+ const uint type)
{
}
__kernel void oclFormulaMinverse(__global float *data,
- const uint type)
+ const uint type)
{
}
@@ -112,5 +161,4 @@ __kernel void oclFormulaMinverse(__global float *data,
#endif // USE_EXTERNAL_KERNEL
#endif //_OCL_KERNEL_H_
-
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx
index 6c3935e..bcb9cd8 100644
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -10,57 +10,140 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-
#include "random.hxx"
#include "openclwrapper.hxx"
#include "oclkernels.hxx"
+#ifdef WIN32
+#include <Windows.h>
+#endif
+//#define USE_KERNEL_FILE
+using namespace std;
+GPUEnv OpenclDevice::gpuEnv;
+int OpenclDevice::isInited =0;
+#ifdef WIN32
-inline int OpenclDevice::add_kernel_cfg(int kCount, const char *kName) {
- strcpy(gpu_env.kernel_names[kCount], kName);
- gpu_env.kernel_count++;
- return 0;
+#define OPENCL_DLL_NAME "opencllo.dll"
+#define OCLERR -1
+#define OCLSUCCESS 1
+HINSTANCE HOpenclDll = NULL;
+ void *OpenclDll = NULL;
+
+int OpenclDevice::LoadOpencl()
+{
+ //fprintf(stderr, " LoadOpenclDllxx... \n");
+ OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
+ OpenclDll = LoadLibrary(OPENCL_DLL_NAME);
+ if (!static_cast<HINSTANCE>(OpenclDll))
+ {
+ fprintf(stderr, " Load opencllo.dll failed! \n");
+ FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+ return OCLERR;
+ }
+ fprintf(stderr, " Load opencllo.dll successfully!\n");
+ return OCLSUCCESS;
}
-int OpenclDevice::regist_opencl_kernel() {
- if (!gpu_env.isUserCreated) {
- memset(&gpu_env, 0, sizeof(gpu_env));
- }
+void OpenclDevice::FreeOpenclDll()
+{
+ fprintf(stderr, " Free opencllo.dll ... \n");
+ if(!static_cast<HINSTANCE>(OpenclDll))
+ FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+}
+#endif
- gpu_env.file_count = 0; //argc;
- gpu_env.kernel_count = 0UL;
-
- add_kernel_cfg(0, (const char*) "hello");
- add_kernel_cfg(1, (const char*) "oclformula");
- add_kernel_cfg(2, (const char*) "oclFormulaMin");
- add_kernel_cfg(3, (const char*) "oclFormulaMax");
- add_kernel_cfg(4, (const char*) "oclFormulaSum");
- add_kernel_cfg(5, (const char*) "oclFormulaCount");
- add_kernel_cfg(6, (const char*) "oclFormulaAverage");
- add_kernel_cfg(7, (const char*) "oclFormulaSumproduct");
- add_kernel_cfg(8, (const char*) "oclFormulaMinverse");
+int OpenclDevice::InitEnv()
+{
+#ifdef WIN32
+ while(1){
+ if(1==LoadOpencl())
+ break;
+ }
+#endif
+ InitOpenclRunEnv(0,NULL);
+ return 1;
+}
+
+int OpenclDevice::ReleaseOpenclRunEnv() {
+ ReleaseOpenclEnv(&gpuEnv);
+#ifdef WIN32
+ FreeOpenclDll();
+#endif
+ return 1;
+}
+///////////////////////////////////////////////////////
+///////////////////////////////////////////////////////
+inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName) {
+ strcpy(gpuEnv.kernelNames[kCount], kName);
+ gpuEnv.kernelCount++;
return 0;
}
-OpenclDevice::OpenclDevice() :
- isInited(0) {
+int OpenclDevice::RegistOpenclKernel() {
+ if (!gpuEnv.isUserCreated) {
+ memset(&gpuEnv, 0, sizeof(gpuEnv));
+ }
+
+ gpuEnv.fileCount = 0; //argc;
+ gpuEnv.kernelCount = 0UL;
+
+ AddKernelConfig(0, (const char*) "hello");
+ AddKernelConfig(1, (const char*) "oclformula");
+ AddKernelConfig(2, (const char*) "oclFormulaMin");
+ AddKernelConfig(3, (const char*) "oclFormulaMax");
+ AddKernelConfig(4, (const char*) "oclFormulaSum");
+ AddKernelConfig(5, (const char*) "oclFormulaCount");
+ AddKernelConfig(6, (const char*) "oclFormulaAverage");
+ AddKernelConfig(7, (const char*) "oclFormulaSumproduct");
+ AddKernelConfig(8, (const char*) "oclFormulaMinverse");
+
+ AddKernelConfig(9, (const char*) "oclSignedAdd");
+ AddKernelConfig(10, (const char*) "oclSignedSub");
+ AddKernelConfig(11, (const char*) "oclSignedMul");
+ AddKernelConfig(12, (const char*) "oclSignedDiv");
+ return 0;
+}
+OpenclDevice::OpenclDevice(){
+ //InitEnv();
}
OpenclDevice::~OpenclDevice() {
+ //ReleaseOpenclRunEnv();
+}
+int OpenclDevice::CheckKernelName(KernelEnv *envInfo,const char *kernelName){
+ //printf("CheckKernelName,total count of kernels...%d\n", gpuEnv.kernelCount);
+ int kCount;
+ for(kCount=0; kCount < gpuEnv.kernelCount; kCount++) {
+ if(strcasecmp(kernelName, gpuEnv.kernelNames[kCount]) == 0) {
+ printf("match %s kernel right\n",kernelName);
+ break;
+ }
+ }
+ envInfo->context = gpuEnv.context;
+ envInfo->commandQueue = gpuEnv.commandQueue;
+ envInfo->program = gpuEnv.programs[0];
+ envInfo->kernel = gpuEnv.kernels[kCount];
+ strcpy(envInfo->kernelName, kernelName);
+ if (envInfo == (KernelEnv *) NULL)
+ {
+ printf("get err func and env\n");
+ return 0;
+ }
+ return 1;
}
-#ifdef USE_KERNEL_FILE
-int OpenclDevice::convert_to_string(const char *filename, char **source) {
+
+int OpenclDevice::ConvertToString(const char *filename, char **source) {
int file_size;
size_t result;
FILE *file = NULL;
-
file_size = 0;
result = 0;
file = fopen(filename, "rb+");
- printf("open kernel file %s.\n", filename);
+ printf("open kernel file %s.\n",filename);
if (file != NULL) {
+ printf("Open ok!\n");
fseek(file, 0, SEEK_END);
file_size = ftell(file);
@@ -82,68 +165,41 @@ int OpenclDevice::convert_to_string(const char *filename, char **source) {
printf("open kernel file failed.\n");
return (0);
}
-#endif
-int OpenclDevice::binary_generated(cl_context context,
- const char * cl_file_name, FILE ** fhandle) {
- unsigned int i = 0;
- cl_int status;
-
- size_t numDevices;
-
- cl_device_id *devices;
-
- char *str = NULL;
-
- FILE *fd = NULL;
-
- status = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
- sizeof(numDevices), &numDevices, NULL);
-
- CHECK_OPENCL(status)
-
- devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
-
- if (devices == NULL) {
- return 0;
- }
-
- /* grab the handles to all of the devices in the context. */
- status = clGetContextInfo(context, CL_CONTEXT_DEVICES,
- sizeof(cl_device_id) * numDevices, devices, NULL);
-
- status = 0;
- /* dump out each binary into its own separate file. */
- for (i = 0; i < numDevices; i++) {
- char fileName[256] = { 0 }, cl_name[128] = { 0 };
-
- if (devices[i] != 0) {
- char deviceName[1024];
- status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
- sizeof(deviceName), deviceName, NULL);
- CHECK_OPENCL(status)
- str = (char*) strstr(cl_file_name, (char*) ".cl");
- memcpy(cl_name, cl_file_name, str - cl_file_name);
- cl_name[str - cl_file_name] = '\0';
- sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
- fd = fopen(fileName, "rb");
- status = (fd != NULL) ? 1 : 0;
- }
- }
-
- if (devices != NULL) {
- free(devices);
- devices = NULL;
- }
-
- if (fd != NULL) {
- *fhandle = fd;
- }
+int OpenclDevice::BinaryGenerated(const char * clFileName, FILE ** fhandle) {
+ unsigned int i = 0;
+ cl_int status;
+ char *str = NULL;
+ FILE *fd = NULL;
+ cl_uint numDevices=0;
+ status = clGetDeviceIDs(gpuEnv.platform, // platform
+ CL_DEVICE_TYPE_GPU, // device_type
+ 0, // num_entries
+ NULL, // devices
+ &numDevices);
+ for (i = 0; i <numDevices; i++) {
+ char fileName[256] = { 0 }, cl_name[128] = { 0 };
+ if (gpuEnv.devices[i] != 0) {
+ char deviceName[1024];
+ status = clGetDeviceInfo(gpuEnv.devices[i], CL_DEVICE_NAME,sizeof(deviceName), deviceName, NULL);
+ CHECK_OPENCL(status);
+ str = (char*) strstr(clFileName, (char*) ".cl");
+ memcpy(cl_name, clFileName, str - clFileName);
+ cl_name[str - clFileName] = '\0';
+ sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
+ fd = fopen(fileName, "rb");
+ status = (fd != NULL) ? 1 : 0;
+ }
+ }
+ if (fd != NULL) {
+ *fhandle = fd;
+ }
+
+ return status;
- return status;
}
-int OpenclDevice::write_binary_to_file(const char* fileName, const char* birary,
+int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary,
size_t numBytes) {
FILE *output = NULL;
output = fopen(fileName, "wb");
@@ -155,11 +211,12 @@ int OpenclDevice::write_binary_to_file(const char* fileName, const char* birary,
fclose(output);
return 1;
+
}
-int OpenclDevice::generat_bin_from_kernel_source(cl_program program,
- const char * cl_file_name) {
- unsigned int i = 0;
+int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
+ const char * clFileName) {
+ unsigned int i = 0;
cl_int status;
size_t *binarySizes, numDevices;
cl_device_id *devices;
@@ -216,12 +273,12 @@ int OpenclDevice::generat_bin_from_kernel_source(cl_program program,
sizeof(deviceName), deviceName, NULL);
CHECK_OPENCL(status)
- str = (char*) strstr(cl_file_name, (char*) ".cl");
- memcpy(cl_name, cl_file_name, str - cl_file_name);
- cl_name[str - cl_file_name] = '\0';
+ str = (char*) strstr(clFileName, (char*) ".cl");
+ memcpy(cl_name, clFileName, str - clFileName);
+ cl_name[str - clFileName] = '\0';
sprintf(fileName, "./%s-%s.bin", cl_name, deviceName);
- if (!write_binary_to_file(fileName, binaries[i], binarySizes[i])) {
+ if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) {
printf("opencl-wrapper: write binary[%s] failds\n", fileName);
return 0;
} //else
@@ -254,164 +311,36 @@ int OpenclDevice::generat_bin_from_kernel_source(cl_program program,
return 1;
}
-int OpenclDevice::init_opencl_attr(OpenCLEnv * env) {
- if (gpu_env.isUserCreated) {
+int OpenclDevice::InitOpenclAttr(OpenCLEnv * env) {
+ if (gpuEnv.isUserCreated) {
return 1;
}
- gpu_env.context = env->context;
- gpu_env.platform = env->platform;
- gpu_env.dev = env->devices;
- gpu_env.command_queue = env->command_queue;
+ gpuEnv.context = env->context;
+ gpuEnv.platform = env->platform;
+ gpuEnv.dev = env->devices;
+ gpuEnv.commandQueue = env->commandQueue;
- gpu_env.isUserCreated = 1;
+ gpuEnv.isUserCreated = 1;
return 0;
}
-int OpenclDevice::create_kernel(char * kernelname, KernelEnv * env) {
+int OpenclDevice::CreateKernel(char * kernelname, KernelEnv * env) {
int status;
- env->kernel = clCreateKernel(gpu_env.programs[0], kernelname, &status);
- env->context = gpu_env.context;
- env->command_queue = gpu_env.command_queue;
+ env->kernel = clCreateKernel(gpuEnv.programs[0], kernelname, &status);
+ env->context = gpuEnv.context;
+ env->commandQueue = gpuEnv.commandQueue;
return status != CL_SUCCESS ? 1 : 0;
}
-int OpenclDevice::release_kernel(KernelEnv * env) {
+int OpenclDevice::ReleaseKernel(KernelEnv * env) {
int status = clReleaseKernel(env->kernel);
return status != CL_SUCCESS ? 1 : 0;
}
-int OpenclDevice::init_opencl_env(GPUEnv *gpu_info) {
- size_t length;
- cl_int status;
- cl_uint numPlatforms, numDevices;
- cl_platform_id *platforms;
- cl_context_properties cps[3];
- char platformName[100];
- unsigned int i;
-
- /*
- * Have a look at the available platforms.
- */
- if (!gpu_info->isUserCreated) {
- status = clGetPlatformIDs(0, NULL, &numPlatforms);
- if (status != CL_SUCCESS) {
- return (1);
- }
- gpu_info->platform = NULL;
- ;
- if (0 < numPlatforms) {
- platforms = (cl_platform_id*) malloc(
- numPlatforms * sizeof(cl_platform_id));
- if (platforms == (cl_platform_id*) NULL) {
- return (1);
- }
- status = clGetPlatformIDs(numPlatforms, platforms, NULL);
-
- if (status != CL_SUCCESS) {
- return (1);
- }
-
- for (i = 0; i < numPlatforms; i++) {
- status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
- sizeof(platformName), platformName, NULL);
-
- if (status != CL_SUCCESS) {
- return (1);
- }
- gpu_info->platform = platforms[i];
-
- //if (!strcmp(platformName, "Intel(R) Coporation"))
- //if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
- {
- gpu_info->platform = platforms[i];
-
- status = clGetDeviceIDs(gpu_info->platform /* platform */,
- CL_DEVICE_TYPE_GPU /* device_type */,
- 0 /* num_entries */, NULL /* devices */,
- &numDevices);
-
- if (status != CL_SUCCESS) {
- return (1);
- }
-
- if (numDevices) {
- break;
- }
- }
- }
- free(platforms);
- }
- if (NULL == gpu_info->platform) {
- return (1);
- }
-
- /*
- * Use available platform.
- */
- cps[0] = CL_CONTEXT_PLATFORM;
- cps[1] = (cl_context_properties) gpu_info->platform;
- cps[2] = 0;
- /* Check for GPU. */
- gpu_info->dType = CL_DEVICE_TYPE_GPU;
- gpu_info->context = clCreateContextFromType(cps, gpu_info->dType, NULL,
- NULL, &status);
-
- if ((gpu_info->context == (cl_context) NULL)
- || (status != CL_SUCCESS)) {
- gpu_info->dType = CL_DEVICE_TYPE_CPU;
- gpu_info->context = clCreateContextFromType(cps, gpu_info->dType,
- NULL, NULL, &status);
- }
- if ((gpu_info->context == (cl_context) NULL)
- || (status != CL_SUCCESS)) {
- gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;
- gpu_info->context = clCreateContextFromType(cps, gpu_info->dType,
- NULL, NULL, &status);
- }
- if ((gpu_info->context == (cl_context) NULL)
- || (status != CL_SUCCESS)) {
- return (1);
- }
- /* Detect OpenCL devices. */
- /* First, get the size of device list data */
- status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, 0,
- NULL, &length);
-
- if ((status != CL_SUCCESS) || (length == 0)) {
- return (1);
- }
- /* Now allocate memory for device list based on the size we got earlier */
- gpu_info->devices = (cl_device_id*) malloc(length);
- if (gpu_info->devices == (cl_device_id*) NULL) {
- return (1);
- }
- /* Now, get the device list data */
- status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, length,
- gpu_info->devices, NULL);
-
- if (status != CL_SUCCESS) {
- return (1);
- }
-
- /* Create OpenCL command queue. */
- gpu_info->command_queue = clCreateCommandQueue(gpu_info->context,
- gpu_info->devices[0], 0, &status);
-
- if (status != CL_SUCCESS) {
- return (1);
- }
- }
-
- status = clGetCommandQueueInfo(gpu_info->command_queue,
- CL_QUEUE_THREAD_HANDLE_AMD, 0, NULL, NULL);
-
- return 0;
-}
-
-int OpenclDevice::release_opencl_env(GPUEnv *gpu_info) {
+int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) {
int i = 0;
int status = 0;
@@ -419,60 +348,44 @@ int OpenclDevice::release_opencl_env(GPUEnv *gpu_info) {
return 1;
}
- for (i = 0; i < gpu_env.file_count; i++) {
- if (gpu_env.programs[i]) {
- status = clReleaseProgram(gpu_env.programs[i]);
+ for (i = 0; i < gpuEnv.fileCount; i++) {
+ if (gpuEnv.programs[i]) {
+ status = clReleaseProgram(gpuEnv.programs[i]);
CHECK_OPENCL(status)
- gpu_env.programs[i] = NULL;
+ gpuEnv.programs[i] = NULL;
}
}
- if (gpu_env.command_queue) {
- clReleaseCommandQueue(gpu_env.command_queue);
- gpu_env.command_queue = NULL;
+ if (gpuEnv.commandQueue) {
+ clReleaseCommandQueue(gpuEnv.commandQueue);
+ gpuEnv.commandQueue = NULL;
}
- if (gpu_env.context) {
- clReleaseContext(gpu_env.context);
- gpu_env.context = NULL;
+ if (gpuEnv.context) {
+ clReleaseContext(gpuEnv.context);
+ gpuEnv.context = NULL;
}
isInited = 0;
- gpu_info->isUserCreated = 0;
- free(gpu_info->devices);
+ gpuInfo->isUserCreated = 0;
+ free(gpuInfo->devices);
return 1;
}
-int OpenclDevice::run_kernel_wrapper(cl_kernel_function function,
- char * kernel_name, void **usrdata) {
- printf("oclwrapper:run_kernel_wrapper...\n");
- if (register_kernel_wrapper(kernel_name, function) != 1) {
+int OpenclDevice::RunKernelWrapper(cl_kernel_function function,
+ const char * kernelName, void **usrdata) {
+ printf("oclwrapper:RunKernel_wrapper...\n");
+ if (RegisterKernelWrapper(kernelName, function) != 1) {
fprintf(stderr,
- "Error:run_kernel_wrapper:register_kernel_wrapper fail!\n");
+ "Error:RunKernel_wrapper:RegisterKernelWrapper fail!\n");
return -1;
}
- return (run_kernel(kernel_name, usrdata));
-}
-
-int OpenclDevice::register_kernel_wrapper(const char *kernel_name,
- cl_kernel_function function) {
- int i;
- printf("oclwrapper:register_kernel_wrapper...%d\n", gpu_env.kernel_count);
- for (i = 0; i < gpu_env.kernel_count; i++) {
- //printf("oclwrapper:register_kernel_wrapper kname...%s\n", kernel_name);
- //printf("oclwrapper:register_kernel_wrapper kname...%s\n", gpu_env.kernel_names[i]);
- if (strcasecmp(kernel_name, gpu_env.kernel_names[i]) == 0) {
- //printf("oclwrapper:register_kernel_wrapper if()...\n");
- gpu_env.kernel_functions[i] = function;
- return (1);
- }
- }
- return (0);
+ return (RunKernel(kernelName, usrdata));
}
-int OpenclDevice::cached_of_kerner_prg(const GPUEnv *gpu_env_cached,
- const char * cl_file_name) {
- int i;
- for (i = 0; i < gpu_env_cached->file_count; i++) {
- if (strcasecmp(gpu_env_cached->kernelSrcFile[i], cl_file_name) == 0) {
- if (gpu_env_cached->programs[i] != NULL) {
+int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached,
+ const char * clFileName) {
+ int i;
+ for (i = 0; i < gpuEnvCached->fileCount; i++) {
+ if (strcasecmp(gpuEnvCached->kernelSrcFile[i], clFileName) == 0) {
+ if (gpuEnvCached->programs[i] != NULL) {
return (1);
}
}
@@ -481,37 +394,30 @@ int OpenclDevice::cached_of_kerner_prg(const GPUEnv *gpu_env_cached,
return (0);
}
-int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option) {
+int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) {
cl_int status;
-
size_t length;
-
char *buildLog = NULL, *binary;
-
const char *source;
size_t source_size[1];
-
int b_error, binary_status, binaryExisted, idx;
-
size_t numDevices;
-
cl_device_id *devices;
-
FILE *fd, *fd1;
const char* filename = "kernel.cl";
- if (cached_of_kerner_prg(gpu_info, filename) == 1) {
+ fprintf(stderr, "CompileKernelFile ... \n");
+ if (CachedOfKernerPrg(gpuInfo, filename) == 1) {
return (1);
}
- idx = gpu_info->file_count;
+ idx = gpuInfo->fileCount;
source = kernel_src;
source_size[0] = strlen(source);
-
binaryExisted = 0;
- if ((binaryExisted = binary_generated(gpu_info->context, filename, &fd)) == 1) {
- status = clGetContextInfo(gpu_info->context, CL_CONTEXT_NUM_DEVICES,
+ if ((binaryExisted = BinaryGenerated(filename, &fd)) == 1) {
+ status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_NUM_DEVICES,
sizeof(numDevices), &numDevices, NULL);
CHECK_OPENCL(status)
@@ -543,11 +449,11 @@ int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option
fclose(fd);
fd = NULL;
// grab the handles to all of the devices in the context.
- status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
+ status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES,
sizeof(cl_device_id) * numDevices, devices, NULL);
CHECK_OPENCL(status)
- gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,
+ gpuInfo->programs[idx] = clCreateProgramWithBinary(gpuInfo->context,
numDevices, devices, &length, (const unsigned char**) &binary,
&binary_status, &status);
CHECK_OPENCL(status)
@@ -556,40 +462,37 @@ int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option
free(devices);
devices = NULL;
} else {
-
// create a CL program using the kernel source
- gpu_info->programs[idx] = clCreateProgramWithSource(gpu_info->context,
+ gpuEnv.programs[idx] = clCreateProgramWithSource(gpuEnv.context,
1, &source, source_size, &status);
- CHECK_OPENCL(status)
-
- printf("clCreateProgramWithSource.\n");
+ CHECK_OPENCL(status);
}
- if (gpu_info->programs[idx] == (cl_program) NULL) {
+ if (gpuInfo->programs[idx] == (cl_program) NULL) {
return (0);
}
//char options[512];
// create a cl program executable for all the devices specified
- if (!gpu_info->isUserCreated) {
- status = clBuildProgram(gpu_info->programs[idx], 1, gpu_info->devices,
- build_option, NULL, NULL);
+ if (!gpuInfo->isUserCreated) {
+ status = clBuildProgram(gpuInfo->programs[idx], 1, gpuInfo->devices,
+ buildOption, NULL, NULL);
CHECK_OPENCL(status)
} else {
- status = clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->dev),
- build_option, NULL, NULL);
+ status = clBuildProgram(gpuInfo->programs[idx], 1, &(gpuInfo->dev),
+ buildOption, NULL, NULL);
CHECK_OPENCL(status)
}
printf("BuildProgram.\n");
if (status != CL_SUCCESS) {
- if (!gpu_info->isUserCreated) {
- status = clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
+ if (!gpuInfo->isUserCreated) {
+ status = clGetProgramBuildInfo(gpuInfo->programs[idx],
+ gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
&length);
} else {
- status = clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
+ status = clGetProgramBuildInfo(gpuInfo->programs[idx],
+ gpuInfo->dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
}
if (status != CL_SUCCESS) {
printf("opencl create build log fail\n");
@@ -599,13 +502,13 @@ int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option
if (buildLog == (char*) NULL) {
return (0);
}
- if (!gpu_info->isUserCreated) {
- status = clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->devices[0], CL_PROGRAM_BUILD_LOG, length,
+ if (!gpuInfo->isUserCreated) {
+ status = clGetProgramBuildInfo(gpuInfo->programs[idx],
+ gpuInfo->devices[0], CL_PROGRAM_BUILD_LOG, length,
buildLog, &length);
} else {
- status = clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->dev, CL_PROGRAM_BUILD_LOG, length, buildLog,
+ status = clGetProgramBuildInfo(gpuInfo->programs[idx],
+ gpuInfo->dev, CL_PROGRAM_BUILD_LOG, length, buildLog,
&length);
}
@@ -619,199 +522,35 @@ int OpenclDevice::compile_kernel_file(GPUEnv *gpu_info, const char *build_option
return (0);
}
- strcpy(gpu_env.kernelSrcFile[idx], filename);
+ strcpy(gpuEnv.kernelSrcFile[idx], filename);
if (binaryExisted == 0)
- generat_bin_from_kernel_source(gpu_env.programs[idx], filename);
+ GeneratBinFromKernelSource(gpuEnv.programs[idx], filename);
- gpu_info->file_count += 1;
+ gpuInfo->fileCount += 1;
return (1);
-}
-
-int OpenclDevice::compile_kernel_file(const char *filename, GPUEnv *gpu_info,
- const char *build_option) {
- cl_int status;
-
- size_t length;
-
-#ifdef USE_KERNEL_FILE
- char
- *source_str;
-#endif
- char *buildLog = NULL, *binary;
-
- const char *source;
- size_t source_size[1];
-
- int b_error, binary_status, binaryExisted, idx;
-
- size_t numDevices;
-
- cl_device_id *devices;
-
- FILE *fd, *fd1;
-
- if (cached_of_kerner_prg(gpu_info, filename) == 1) {
- return (1);
- }
-
- idx = gpu_info->file_count;
-#ifdef USE_KERNEL_FILE
- status = convert_to_string( filename, &source_str, gpu_info, idx );
-
- if( status == 0 )
- {
- printf("convert_to_string failed.\n");
- return(0);
- }
- source = source_str;
-#else
-
- source = kernel_src;
-#endif
- source_size[0] = strlen(source);
-
- binaryExisted = 0;
- if ((binaryExisted = binary_generated(gpu_info->context, filename, &fd))
- == 1) {
- status = clGetContextInfo(gpu_info->context, CL_CONTEXT_NUM_DEVICES,
- sizeof(numDevices), &numDevices, NULL);
- CHECK_OPENCL(status)
-
- devices = (cl_device_id*) malloc(sizeof(cl_device_id) * numDevices);
- if (devices == NULL) {
- return 0;
- }
-
- b_error = 0;
- length = 0;
- b_error |= fseek(fd, 0, SEEK_END) < 0;
- b_error |= (length = ftell(fd)) <= 0;
- b_error |= fseek(fd, 0, SEEK_SET) < 0;
- if (b_error) {
- return 0;
- }
-
- binary = (char*) malloc(length + 2);
- if (!binary) {
- return 0;
- }
-
- memset(binary, 0, length + 2);
- b_error |= fread(binary, 1, length, fd) != length;
- if (binary[length - 1] != '\n') {
- binary[length++] = '\n';
- }
-
- fclose(fd);
- fd = NULL;
- /* grab the handles to all of the devices in the context. */
- status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
- sizeof(cl_device_id) * numDevices, devices, NULL);
- CHECK_OPENCL(status)
-
- gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,
- numDevices, devices, &length, (const unsigned char**) &binary,
- &binary_status, &status);
- CHECK_OPENCL(status)
-
- free(binary);
- free(devices);
- devices = NULL;
- } else {
-
- // create a CL program using the kernel source
- gpu_info->programs[idx] = clCreateProgramWithSource(gpu_info->context,
- 1, &source, source_size, &status);
- CHECK_OPENCL(status)
-#ifdef USE_KERNEL_FILE
- free((char*)source);
-#endif
- printf("clCreateProgramWithSource.\n");
- }
-
- if (gpu_info->programs[idx] == (cl_program) NULL) {
- return (0);
- }
-
- //char options[512];
- // create a cl program executable for all the devices specified
- if (!gpu_info->isUserCreated) {
- status = clBuildProgram(gpu_info->programs[idx], 1, gpu_info->devices,
- build_option, NULL, NULL);
- CHECK_OPENCL(status)
- } else {
- status = clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->dev),
- build_option, NULL, NULL);
- CHECK_OPENCL(status)
- }
- printf("BuildProgram.\n");
-
- if (status != CL_SUCCESS) {
- if (!gpu_info->isUserCreated) {
- status = clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
- &length);
- } else {
- status = clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
- }
- if (status != CL_SUCCESS) {
- printf("opencl create build log fail\n");
- return (0);
- }
- buildLog = (char*) malloc(length);
- if (buildLog == (char*) NULL) {
- return (0);
- }
- if (!gpu_info->isUserCreated) {
- status = clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->devices[0], CL_PROGRAM_BUILD_LOG, length,
- buildLog, &length);
- } else {
- status = clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->dev, CL_PROGRAM_BUILD_LOG, length, buildLog,
- &length);
- }
-
- fd1 = fopen("kernel-build.log", "w+");
- if (fd1 != NULL) {
- fwrite(buildLog, sizeof(char), length, fd1);
- fclose(fd1);
- }
- free(buildLog);
- return (0);
- }
- strcpy(gpu_env.kernelSrcFile[idx], filename);
-
- if (binaryExisted == 0)
- generat_bin_from_kernel_source(gpu_env.programs[idx], filename);
-
- gpu_info->file_count += 1;
-
- return (1);
}
-
-int OpenclDevice::get_kernel_env_and_func(const char *kernel_name,
+int OpenclDevice::GetKernelEnvAndFunc(const char *kernelName,
KernelEnv *env, cl_kernel_function *function) {
int i; //,program_idx ;
- for (i = 0; i < gpu_env.kernel_count; i++) {
- if (strcasecmp(kernel_name, gpu_env.kernel_names[i]) == 0) {
- env->context = gpu_env.context;
- env->command_queue = gpu_env.command_queue;
- env->program = gpu_env.programs[0];
- env->kernel = gpu_env.kernels[i];
- *function = gpu_env.kernel_functions[i];
+ printf("----------------OpenclDevice::GetKernelEnvAndFunc\n");
+ for (i = 0; i < gpuEnv.kernelCount; i++) {
+ if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0) {
+ env->context = gpuEnv.context;
+ env->commandQueue = gpuEnv.commandQueue;
+ env->program = gpuEnv.programs[0];
+ env->kernel = gpuEnv.kernels[i];
+ *function = gpuEnv.kernelFunctions[i];
return (1);
}
}
return (0);
}
-int OpenclDevice::run_kernel(const char *kernel_name, void **userdata) {
+int OpenclDevice::RunKernel(const char *kernelName, void **userdata) {
KernelEnv env;
cl_kernel_function function;
@@ -819,8 +558,8 @@ int OpenclDevice::run_kernel(const char *kernel_name, void **userdata) {
int status;
memset(&env, 0, sizeof(KernelEnv));
- status = get_kernel_env_and_func(kernel_name, &env, &function);
- strcpy(env.kernel_name, kernel_name);
+ status = GetKernelEnvAndFunc(kernelName, &env, &function);
+ strcpy(env.kernelName, kernelName);
if (status == 1) {
if (&env == (KernelEnv *) NULL
|| &function == (cl_kernel_function *) NULL) {
@@ -830,11 +569,9 @@ int OpenclDevice::run_kernel(const char *kernel_name, void **userdata) {
}
return (0);
}
-
-int OpenclDevice::init_opencl_run_env(int argc, const char *build_option_kernelfiles)
+int OpenclDevice::InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles)
{
int status = 0;
-
if (MAX_CLKERNEL_NUM <= 0) {
return 1;
}
@@ -843,82 +580,177 @@ int OpenclDevice::init_opencl_run_env(int argc, const char *build_option_kernelf
}
if (!isInited) {
- printf("regist_opencl_kernel start.\n");
- regist_opencl_kernel();
+ RegistOpenclKernel();
//initialize devices, context, comand_queue
- status = init_opencl_env(&gpu_env);
+ status = InitOpenclRunEnv(&gpuEnv);
if (status) {
printf("init_opencl_env failed.\n");
return (1);
}
printf("init_opencl_env successed.\n");
- //initialize program, kernel_name, kernel_count
- status = compile_kernel_file( &gpu_env, build_option_kernelfiles);
- if (status == 0 || gpu_env.kernel_count == 0) {
- printf("compile_kernel_file failed.\n");
+ //initialize program, kernelName, kernelCount
+ status = CompileKernelFile( &gpuEnv, buildOptionKernelfiles);
+ if (status == 0 || gpuEnv.kernelCount == 0) {
+ printf("CompileKernelFile failed.\n");
return (1);
}
- printf("compile_kernel_file successed.\n");
+ printf("CompileKernelFile successed.\n");
isInited = 1;
}
-
return (0);
}
-int OpenclDevice::init_opencl_run_env(int argc, const char *argv_kernelfiles[],
- const char *build_option_kernelfiles) {
- int status = 0;
+int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
+{
+ size_t length;
+ cl_int status;
+ cl_uint numPlatforms, numDevices;
+ cl_platform_id *platforms;
+ cl_context_properties cps[3];
+ char platformName[100];
+ unsigned int i;
- if (MAX_CLKERNEL_NUM <= 0) {
- return 1;
- }
- if ((argc > MAX_CLFILE_NUM) || (argc < 0)) {
- return 1;
- }
+ // Have a look at the available platforms.
- if (!isInited) {
- printf("regist_opencl_kernel start.\n");
- regist_opencl_kernel();
- //initialize devices, context, comand_queue
- status = init_opencl_env(&gpu_env);
- if (status) {
- printf("init_opencl_env failed.\n");
+ if (!gpuInfo->isUserCreated) {
+ status = clGetPlatformIDs(0, NULL, &numPlatforms);
+ if (status != CL_SUCCESS) {
return (1);
}
- printf("init_opencl_env successed.\n");
- //initialize program, kernel_name, kernel_count
- status = compile_kernel_file(argv_kernelfiles[0], &gpu_env,
- build_option_kernelfiles);
- if (status == 0 || gpu_env.kernel_count == 0) {
- printf("compile_kernel_file failed.\n");
+ gpuInfo->platform = NULL;
+
+ if (0 < numPlatforms) {
+ platforms = (cl_platform_id*) malloc(
+ numPlatforms * sizeof(cl_platform_id));
+ if (platforms == (cl_platform_id*) NULL) {
+ return (1);
+ }
+ status = clGetPlatformIDs(numPlatforms, platforms, NULL);
+
+ if (status != CL_SUCCESS) {
+ return (1);
+ }
+
+ for (i = 0; i < numPlatforms; i++) {
+ status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
+ sizeof(platformName), platformName, NULL);
+
+ if (status != CL_SUCCESS) {
+ return (1);
+ }
+ gpuInfo->platform = platforms[i];
+
+ //if (!strcmp(platformName, "Intel(R) Coporation"))
+ //if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
+ {
+ gpuInfo->platform = platforms[i];
+
+ status = clGetDeviceIDs(gpuInfo->platform, // platform
+ CL_DEVICE_TYPE_GPU, // device_type
+ 0, // num_entries
+ NULL, // devices
+ &numDevices);
+
+ if (status != CL_SUCCESS) {
+ return (1);
+ }
+
+ if (numDevices) {
+ break;
+ }
+ }
+ }
+ free(platforms);
+ }
+ if (NULL == gpuInfo->platform) {
+ return (1);
+ }
+
+ // Use available platform.
+
+ cps[0] = CL_CONTEXT_PLATFORM;
+ cps[1] = (cl_context_properties) gpuInfo->platform;
+ cps[2] = 0;
+ // Check for GPU.
+ gpuInfo->dType = CL_DEVICE_TYPE_GPU;
+ gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType, NULL,
+ NULL, &status);
+
+ if ((gpuInfo->context == (cl_context) NULL)
+ || (status != CL_SUCCESS)) {
+ gpuInfo->dType = CL_DEVICE_TYPE_CPU;
+ gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType,
+ NULL, NULL, &status);
+ }
+ if ((gpuInfo->context == (cl_context) NULL)
+ || (status != CL_SUCCESS)) {
+ gpuInfo->dType = CL_DEVICE_TYPE_DEFAULT;
+ gpuInfo->context = clCreateContextFromType(cps, gpuInfo->dType,
+ NULL, NULL, &status);
+ }
+ if ((gpuInfo->context == (cl_context) NULL)
+ || (status != CL_SUCCESS)) {
+ return (1);
+ }
+ // Detect OpenCL devices.
+ // First, get the size of device list data
+ status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, 0,
+ NULL, &length);
+ if ((status != CL_SUCCESS) || (length == 0)) {
+ return (1);
+ }
+ // Now allocate memory for device list based on the size we got earlier
+ gpuInfo->devices = (cl_device_id*) malloc(length);
+ if (gpuInfo->devices == (cl_device_id*) NULL) {
+ return (1);
+ }
+ // Now, get the device list data
+ status = clGetContextInfo(gpuInfo->context, CL_CONTEXT_DEVICES, length,
+ gpuInfo->devices, NULL);
+ if (status != CL_SUCCESS) {
+ return (1);
+ }
+
+ // Create OpenCL command queue.
+ gpuInfo->commandQueue = clCreateCommandQueue(gpuInfo->context,
+ gpuInfo->devices[0], 0, &status);
+
+ if (status != CL_SUCCESS) {
return (1);
}
- printf("compile_kernel_file successed.\n");
- isInited = 1;
}
- return (0);
-}
+ status = clGetCommandQueueInfo(gpuInfo->commandQueue,
+ CL_QUEUE_THREAD_HANDLE_AMD, 0, NULL, NULL);
+
+ return 0;
-int OpenclDevice::release_opencl_run_env() {
- return release_opencl_env(&gpu_env);
}
+int OpenclDevice::RegisterKernelWrapper(const char *kernelName,cl_kernel_function function)
+{
+ int i;
+ printf("oclwrapper:RegisterKernelWrapper...%d\n", gpuEnv.kernelCount);
+ for (i = 0; i < gpuEnv.kernelCount; i++)
+ {
+ if (strcasecmp(kernelName, gpuEnv.kernelNames[i]) == 0)
+ {
+ gpuEnv.kernelFunctions[i] = function;
+ return (1);
+ }
+ }
+ return (0);
+}
+
-void OpenclDevice::setOpenclState(int state) {
+void OpenclDevice::SetOpenclState(int state) {
+ //printf("OpenclDevice::setOpenclState...\n");
isInited = state;
}
-int OpenclDevice::getOpenclState() {
+int OpenclDevice::GetOpenclState() {
return isInited;
}
//ocldbg
-int OclFormulaMin(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaMax(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaSum(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaCount(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaAverage(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaSumproduct(void ** usrdata, KernelEnv *env) { return 0; }
-int OclFormulaMinverse(void ** usrdata, KernelEnv *env) { return 0; }
int OclFormulax(void ** usrdata, KernelEnv *env) {
fprintf(stderr, "In OpenclDevice,...Formula_proc\n");
@@ -958,16 +790,16 @@ int OclFormulax(void ** usrdata, KernelEnv *env) {
while (global_work_size[0] != 1) {
global_work_size[0] = global_work_size[0] / 2;
- status = clEnqueueNDRangeKernel(env->command_queue, env->kernel, 1,
+ status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1,
NULL, global_work_size, NULL, 0, NULL, NULL);
CHECK_OPENCL(status)
}
//fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
- status = clEnqueueReadBuffer(env->command_queue, formula_data, CL_FALSE, 0,
+ status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0,
sizeof(float), (void *) &tdata, 0, NULL, NULL);
CHECK_OPENCL(status)
- status = clFinish(env->command_queue);
+ status = clFinish(env->commandQueue);
CHECK_OPENCL(status)
//PPAStopCpuEvent(ppa_proc);
@@ -986,46 +818,572 @@ int OclFormulax(void ** usrdata, KernelEnv *env) {
return 0;
}
-double OclCalc::OclProcess(cl_kernel_function function, double *data,
- formulax type) {
- fprintf(stderr, "\In OpenclDevice, proc...begin\n");
- double ret = 0;
- void *usrdata[2];
+int OclFormulaxDll(void ** usrdata, KernelEnv *env) {
+
+ fprintf(stderr, "In OclFormulaxDll...\n");
+ cl_int clStatus;
+ int status;
+ size_t global_work_size[1];
+ float tdata[NUM];
+
+ double *data = (double *) usrdata[0];
+ const formulax type = *((const formulax *) usrdata[1]);
+ double ret = 0.0;
- usrdata[0] = (void *) data;
- usrdata[1] = (void *) &type;
+ for (int i = 0; i < NUM; i++) {
+ tdata[i] = (float) data[i];
+ }
- run_kernel_wrapper(function, "oclformula", usrdata);
- //fprintf(stderr, "\In OpenclDevice, proc...after run_kernel_wrapper\n");
- return ret;
+ env->kernel = clCreateKernel(env->program, "oclformula", &clStatus);
+ //printf("ScInterpreter::IterateParameters...after clCreateKernel.\n");
+ //fprintf(stderr, "\nIn OpenclDevice,...after clCreateKernel\n");
+ int size = NUM;
+
+ cl_mem formula_data = clCreateBuffer(env->context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
+ size * sizeof(float), (void *) tdata, &clStatus);
+ //fprintf(stderr, "\nIn OpenclDevice,...after clCreateBuffer\n");
+
+ status = clSetKernelArg(env->kernel, 0, sizeof(cl_mem),
+ (void *) &formula_data);
+ CHECK_OPENCL(status)
+ status = clSetKernelArg(env->kernel, 1, sizeof(unsigned int),
+ (void *) &type);
+ CHECK_OPENCL(status)
+
+ global_work_size[0] = size;
+ //fprintf(stderr, "\nIn OpenclDevice,...after global_work_size\n");
+ //PPAStartCpuEvent(ppa_proc);
+
+ while (global_work_size[0] != 1) {
+ global_work_size[0] = global_work_size[0] / 2;
+ status = clEnqueueNDRangeKernel(env->commandQueue, env->kernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(status)
+
+ }
+ //fprintf(stderr, "\nIn OpenclDevice,...before clEnqueueReadBuffer\n");
+ status = clEnqueueReadBuffer(env->commandQueue, formula_data, CL_FALSE, 0,
+ sizeof(float), (void *) &tdata, 0, NULL, NULL);
+ CHECK_OPENCL(status)
+ status = clFinish(env->commandQueue);
+ CHECK_OPENCL(status)
+
+ //PPAStopCpuEvent(ppa_proc);
+ //fprintf(stderr, "\nIn OpenclDevice,...before clReleaseKernel\n");
+ status = clReleaseKernel(env->kernel);
+ CHECK_OPENCL(status)
+ status = clReleaseMemObject(formula_data);
+ CHECK_OPENCL(status)
+
+ if (type == AVG)
+ ret = (double) tdata[0] / NUM;
+ else
+ ret = (double) tdata[0];
+
+ printf("OclFormulaxDllxx:size = %d ret = %f.\n\n", NUM, ret);
+
+ return 0;
+}
+double OclCalc::OclProcess(cl_kernel_function function, double *data, formulax type)
+{
+ fprintf(stderr, "\n OpenclDevice, proc...begin\n");
+ double ret = 0;
+ void *usrdata[2];
+ usrdata[0] = (void *) data;
+ usrdata[1] = (void *) &type;
+ RunKernelWrapper(function, "oclformula", usrdata);
+ return ret;
}
double OclCalc::OclTest() {
double data[NUM];
+ srand((unsigned int) time(NULL));
+
for (int i = 0; i < NUM; i++) {
data[i] = sc::rng::uniform();
fprintf(stderr, "%f\t", data[i]);
}
OclProcess(&OclFormulax, data, AVG);
- //fprintf(stderr, "\nIn OpenclDevice,OclTest() after proc,data0...%f\n", data[0]);
+ return 0.0;
+}
+
+double OclCalc::OclTestDll() {
+ double data[NUM];
+ srand((unsigned int) time(NULL));
+ for (int i = 0; i < NUM; i++) {
+ data[i] = rand() / (RAND_MAX + 1.0);
+ fprintf(stderr, "%f\t", data[i]);
+ }
+ OclProcess(&OclFormulaxDll, data, AVG);
return 0.0;
}
OclCalc::OclCalc()
{
- OpenclDevice::init_opencl_run_env(0, NULL);
- OpenclDevice::setOpenclState(1);
- fprintf(stderr,"OclCalc:: init opencl.\n");
+ OpenclDevice::SetOpenclState(1);
+ fprintf(stderr,"OclCalc:: init opencl ok.\n");
}
OclCalc::~OclCalc()
{
- OpenclDevice::release_opencl_run_env();
- OpenclDevice::setOpenclState(0);
- fprintf(stderr,"OclCalc:: opencl end.\n");
+ OpenclDevice::SetOpenclState(0);
+ fprintf(stderr,"OclCalc:: opencl end ok.\n");
+}
+/////////////////////////////////////////////////////////////////////////////
+int OclCalc::OclHostFormulaMax(double *srcData,int *start,int *end,double *output,int size) {
+ KernelEnv env;
+ const char *kernelName = "oclFormulaMax";
+ CheckKernelName(&env,kernelName);
+ cl_int clStatus;
+ size_t global_work_size[1];
+ int alignSize = size + end[0]-start[0];
+
+ env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+ cl_int ret=0;
+ cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
+ alignSize * sizeof(float), NULL, &clStatus);
+ cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size* sizeof(float), NULL, &ret);
+
+ float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+ int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ {
+ hostMapStart[i] = start[i];
+ hostMapEnd[i] = end[i];
+ }
+ for(int i=0;i<alignSize;i++)
+ hostMapSrc[i] = (float)srcData[i];
+ clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+ clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+ (void *)&inputCl);
+ clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+ (void *)&startCl);
+ clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+ (void *)&endCl);
+ clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
+ (void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus)
+
+ float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ output[i]=outPutMap[i];
+
+ clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
+ clStatus = clFinish(env.commandQueue);
+
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseKernel(env.kernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(inputCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(startCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(endCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
+}
+int OclCalc::OclHostFormulaMin(double *srcData,int *start,int *end,double *output,int size) {
+ KernelEnv env;
+ const char *kernelName = "oclFormulaMin";
+ CheckKernelName(&env,kernelName);
+
+ cl_int clStatus;
+ size_t global_work_size[1];
+ int alignSize = size + end[0]-start[0];
+
+ env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+ cl_int ret=0;
+ cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
+ alignSize * sizeof(float), NULL, &clStatus);
+ cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size* sizeof(float), NULL, &ret);
+
+ float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+ int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ {
+ hostMapStart[i] = start[i];
+ hostMapEnd[i] = end[i];
+ }
+ for(int i=0;i<alignSize;i++)
+ hostMapSrc[i] = (float)srcData[i];
+ clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+ clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+ (void *)&inputCl);
+ clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+ (void *)&startCl);
+ clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+ (void *)&endCl);
+ clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
+ (void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus)
+
+ float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ output[i]=outPutMap[i];
+
+ clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
+ clStatus = clFinish(env.commandQueue);
+
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseKernel(env.kernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(inputCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(startCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(endCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
+}
+int OclCalc::OclHostFormulaAverage(double *srcData,int *start,int *end,double *output,int size) {
+ KernelEnv env;
+ const char *kernelName = "oclFormulaAverage";
+ CheckKernelName(&env,kernelName);
+
+ cl_int clStatus;
+ size_t global_work_size[1];
+ int alignSize = size + end[0]-start[0];
+
+ env.kernel = clCreateKernel(env.program, kernelName, &clStatus);
+ cl_int ret=0;
+ cl_mem inputCl = clCreateBuffer(env.context,(cl_mem_flags) (CL_MEM_READ_WRITE),
+ alignSize * sizeof(float), NULL, &clStatus);
+ cl_mem startCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem endCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size * sizeof(unsigned int), NULL, &ret);
+ cl_mem outputCl = clCreateBuffer(env.context, (cl_mem_flags) (CL_MEM_READ_WRITE),
+ size* sizeof(float), NULL, &ret);
+
+ float * hostMapSrc = (float *)clEnqueueMapBuffer(env.commandQueue,inputCl,CL_TRUE,CL_MAP_WRITE,0,alignSize * sizeof(float),0,NULL,NULL,NULL);
+ int * hostMapStart = (int *)clEnqueueMapBuffer(env.commandQueue,startCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ int * hostMapEnd = (int *)clEnqueueMapBuffer(env.commandQueue,endCl,CL_TRUE,CL_MAP_WRITE,0,size * sizeof(unsigned int),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ {
+ hostMapStart[i] = start[i];
+ hostMapEnd[i] = end[i];
+ }
+ for(int i=0;i<alignSize;i++)
+ hostMapSrc[i] = (float)srcData[i];
+ clEnqueueUnmapMemObject(env.commandQueue,inputCl,hostMapSrc,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,startCl,hostMapStart,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,endCl,hostMapEnd,0,NULL,NULL);
+
+ clStatus = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+ (void *)&inputCl);
+ clStatus = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+ (void *)&startCl);
+ clStatus = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+ (void *)&endCl);
+ clStatus = clSetKernelArg(env.kernel, 3, sizeof(cl_mem),
+ (void *)&outputCl);
+ CHECK_OPENCL(clStatus);
+
+ global_work_size[0] = size;
+ clStatus = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(clStatus)
+
+ float * outPutMap = (float *)clEnqueueMapBuffer(env.commandQueue,outputCl,CL_TRUE,CL_MAP_READ,0,size*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<size;i++)
+ output[i]=outPutMap[i];
+
+ clEnqueueUnmapMemObject(env.commandQueue,outputCl,outPutMap,0,NULL,NULL);
+ clStatus = clFinish(env.commandQueue);
+
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseKernel(env.kernel);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(inputCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(startCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(endCl);
+ CHECK_OPENCL(clStatus);
+ clStatus = clReleaseMemObject(outputCl);
+ CHECK_OPENCL(clStatus);
+ return 0;
+
+
+}
+
+
+int OclCalc::OclHostSignedAdd(double *lData,double *rData,double *rResult,int dSize) {
+
+ KernelEnv env;
+ int status;
+ const char *kernelName = "oclSignedAdd";
+ CheckKernelName(&env,kernelName);
+
+
+ cl_int clStatus;
+ size_t global_work_size[1];
+
+ env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+ cl_mem clLiftData = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clRightData = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clResult = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+
+ float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ {
+ hostMapLeftData[i] = (float)lData[i];
+ hostMapRightData[i] = (float)rData[i];
+ }
+ clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+ status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+ (void *)&clLiftData);
+ status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+ (void *)&clRightData);
+ status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+ (void *)&clResult);
+ CHECK_OPENCL(status)
+ global_work_size[0] = dSize;
+ status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(status);
+
+ float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ rResult[i]=hostMapResult[i];
+ clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
+
+ CHECK_OPENCL(status);
+ status = clFinish(env.commandQueue);
+ CHECK_OPENCL(status);
+ status = clReleaseKernel(env.kernel);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clLiftData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clRightData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clResult);
+ CHECK_OPENCL(status);
+ return 0;
+}
+int OclCalc::OclHostSignedMul(double *lData,double *rData,double *rResult,int dSize) {
+ KernelEnv env;
+ int status;
+ const char *kernelName = "oclSignedMul";
+ CheckKernelName(&env,kernelName);
+
+
+ size_t global_work_size[1];
+ cl_int clStatus;
+ env.kernel = clCreateKernel(env.program, kernelName, &clStatus);
+ cl_mem clLiftData = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clRightData = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clResult = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+
+ float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ {
+ hostMapLeftData[i] = (float)lData[i];
+ hostMapRightData[i] = (float)rData[i];
+ }
+ clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+ status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+ (void *)&clLiftData);
+ status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+ (void *)&clRightData);
+ status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+ (void *)&clResult);
+ CHECK_OPENCL(status)
+ global_work_size[0] = dSize;
+ status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(status);
+
+ float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ rResult[i]=hostMapResult[i];
+ clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
+
+ CHECK_OPENCL(status);
+ status = clFinish(env.commandQueue);
+ CHECK_OPENCL(status);
+ status = clReleaseKernel(env.kernel);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clLiftData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clRightData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clResult);
+ CHECK_OPENCL(status);
+ return 0;
+}
+int OclCalc::OclHostSignedSub(double *lData,double *rData,double *rResult,int dSize) {
+ KernelEnv env;
+ int status;
+ const char *kernelName = "oclSignedSub";
+ CheckKernelName(&env,kernelName);
+
+ cl_int clStatus;
+ size_t global_work_size[1];
+ env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+ cl_mem clLiftData = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clRightData = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clResult = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+
+ float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ {
+ hostMapLeftData[i] = (float)lData[i];
+ hostMapRightData[i] = (float)rData[i];
+ }
+ clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+ status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+ (void *)&clLiftData);
+ status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+ (void *)&clRightData);
+ status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+ (void *)&clResult);
+ CHECK_OPENCL(status)
+ global_work_size[0] = dSize;
+ status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(status);
+
+ float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ rResult[i]=hostMapResult[i];
+ clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
+
+ CHECK_OPENCL(status);
+ status = clFinish(env.commandQueue);
+ CHECK_OPENCL(status);
+ status = clReleaseKernel(env.kernel);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clLiftData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clRightData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clResult);
+ CHECK_OPENCL(status);
+ return 0;
+}
+int OclCalc::OclHostSignedDiv(double *lData,double *rData,double *rResult,int dSize) {
+ KernelEnv env;
+ int status;
+ const char *kernelName = "oclSignedDiv";
+ CheckKernelName(&env,kernelName);
+
+
+ size_t global_work_size[1];
+ cl_int clStatus;
+ env.kernel = clCreateKernel(env.program,kernelName, &clStatus);
+ cl_mem clLiftData = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clRightData = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+ cl_mem clResult = clCreateBuffer(env.context,
+ (cl_mem_flags) (CL_MEM_READ_WRITE),
+ dSize * sizeof(float), NULL, &clStatus);
+
+ float * hostMapLeftData = (float *)clEnqueueMapBuffer(env.commandQueue,clLiftData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ float * hostMapRightData = (float *)clEnqueueMapBuffer(env.commandQueue,clRightData,CL_TRUE,CL_MAP_WRITE,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ {
+ hostMapLeftData[i] = (float)lData[i];
+ hostMapRightData[i] = (float)rData[i];
+ }
+ clEnqueueUnmapMemObject(env.commandQueue,clLiftData,hostMapLeftData,0,NULL,NULL);
+ clEnqueueUnmapMemObject(env.commandQueue,clRightData,hostMapRightData,0,NULL,NULL);
+
+ status = clSetKernelArg(env.kernel, 0, sizeof(cl_mem),
+ (void *)&clLiftData);
+ status = clSetKernelArg(env.kernel, 1, sizeof(cl_mem),
+ (void *)&clRightData);
+ status = clSetKernelArg(env.kernel, 2, sizeof(cl_mem),
+ (void *)&clResult);
+ CHECK_OPENCL(status)
+ global_work_size[0] = dSize;
+ status = clEnqueueNDRangeKernel(env.commandQueue, env.kernel, 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ CHECK_OPENCL(status);
+
+ float * hostMapResult = (float *)clEnqueueMapBuffer(env.commandQueue,clResult,CL_TRUE,CL_MAP_READ,0,dSize*sizeof(float),0,NULL,NULL,NULL);
+ for(int i=0;i<dSize;i++)
+ rResult[i]=hostMapResult[i];
+ clEnqueueUnmapMemObject(env.commandQueue,clResult,hostMapResult,0,NULL,NULL);
+
+ CHECK_OPENCL(status);
+ status = clFinish(env.commandQueue);
+ CHECK_OPENCL(status);
+ status = clReleaseKernel(env.kernel);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clLiftData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clRightData);
+ CHECK_OPENCL(status);
+ status = clReleaseMemObject(clResult);
+ CHECK_OPENCL(status);
+ return 0;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/opencl/openclwrapper.hxx b/sc/source/core/opencl/openclwrapper.hxx
index 4646954..d3b5354 100644
--- a/sc/source/core/opencl/openclwrapper.hxx
+++ b/sc/source/core/opencl/openclwrapper.hxx
@@ -5,16 +5,6 @@
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * This file incorporates work covered by the following license notice:
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed
- * with this work for additional information regarding copyright
- * ownership. The ASF licenses this file to you under the Apache
- * License, Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of
- * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#ifndef _OPENCL_WRAPPER_H_
@@ -32,20 +22,20 @@
#define strcasecmp strcmp
#endif
#endif
-
+#define ENABLE_OPENCL //dbg
typedef struct _KernelEnv {
cl_context context;
- cl_command_queue command_queue;
+ cl_command_queue commandQueue;
cl_program program;
cl_kernel kernel;
- char kernel_name[150];
+ char kernelName[150];
} KernelEnv;
typedef struct _OpenCLEnv {
cl_platform_id platform;
cl_context context;
cl_device_id devices;
- cl_command_queue command_queue;
+ cl_command_queue commandQueue;
} OpenCLEnv;
#if defined __cplusplus
@@ -64,10 +54,10 @@ typedef int (*cl_kernel_function)(void **userdata, KernelEnv *kenv);
#define CHECK_OPENCL(status) \
if(status != CL_SUCCESS) \
{ \
- printf ("error code is %d.",status); \
+ printf ("error code is %d.\n",status); \
return (0); \
}
-#endif
+
#define MAX_KERNEL_STRING_LEN 64
#define MAX_CLFILE_NUM 50
@@ -77,27 +67,17 @@ if(status != CL_SUCCESS) \
typedef struct _GPUEnv {
//share vb in all modules in hb library
cl_platform_id platform;
-
cl_device_type dType;
-
cl_context context;
-
cl_device_id *devices;
-
cl_device_id dev;
-
- cl_command_queue command_queue;
-
+ cl_command_queue commandQueue;
cl_kernel kernels[MAX_CLFILE_NUM];
-
cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file
-
char kernelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256
- kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
-
- cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
-
- int kernel_count, file_count, // only one kernel file
+ kernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
+ cl_kernel_function kernelFunctions[MAX_CLKERNEL_NUM];
+ int kernelCount, fileCount, // only one kernel file
isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
} GPUEnv;
@@ -107,66 +87,96 @@ typedef struct {
char *kernelStr;
} kernel_node;
+class OpenclCalcBase{
+public:
+ OpenclCalcBase(){};
+ virtual ~OpenclCalcBase(){};
+ virtual int OclHostSignedAdd(double *lData,double *rData,double *rResult,int rowSize)=0;
+ virtual int OclHostSignedSub(double *lData,double *rData,double *rResult,int rowSize)=0;
+ virtual int OclHostSignedMul(double *lData,double *rData,double *rResult,int rowSize)=0;
+ virtual int OclHostSignedDiv(double *lData,double *rData,double *rResult,int rowSize)=0;
+ virtual int OclHostFormulaMax(double *srcData,int *startPos,int *endPos,double *output,int outputSize)=0;
+ virtual int OclHostFormulaMin(double *srcData,int *startPos,int *endPos,double *output,int outputSize)=0;
+ virtual int OclHostFormulaAverage(double *srcData,int *startPos,int *endPos,double *output,int outputSize)=0;
+
+};
+
+
class OpenclDevice {
-private:
- GPUEnv gpu_env;
- int isInited;
public:
+ static GPUEnv gpuEnv;
+ static int isInited;
OpenclDevice();
~OpenclDevice();
- int regist_opencl_kernel();
- int convert_to_string(const char *filename, char **source);
- int binary_generated(cl_context context, const char * cl_file_name,
- FILE ** fhandle);
- int write_binary_to_file(const char* fileName, const char* birary,
- size_t numBytes);
- int generat_bin_from_kernel_source(cl_program program,
- const char * cl_file_name);
- int init_opencl_attr(OpenCLEnv * env);
- int create_kernel(char * kernelname, KernelEnv * env);
- int release_kernel(KernelEnv * env);
- int init_opencl_env(GPUEnv *gpu_info);
- int release_opencl_env(GPUEnv *gpu_info);
- int run_kernel_wrapper(cl_kernel_function function, char * kernel_name,
- void **usrdata);
- int register_kernel_wrapper(const char *kernel_name,
- cl_kernel_function function);
- int cached_of_kerner_prg(const GPUEnv *gpu_env_cached,
- const char * cl_file_name);
- int compile_kernel_file(GPUEnv *gpu_info, const char *build_option);
- int compile_kernel_file(const char *filename, GPUEnv *gpu_info,
- const char *build_option);
- int get_kernel_env_and_func(const char *kernel_name, KernelEnv *env,
- cl_kernel_function *function);
- int run_kernel(const char *kernel_name, void **userdata);
- int init_opencl_run_env(int argc, const char *build_option_kernelfiles);
- int init_opencl_run_env(int argc, const char *argv_kernelfiles[],
- const char *build_option_kernelfiles);
- int release_opencl_run_env();
- void setOpenclState(int state);
- int getOpenclState();
- inline int add_kernel_cfg(int kCount, const char *kName);
+ static int InitEnv();
+ static int RegistOpenclKernel();
+ static int ReleaseOpenclRunEnv();
+ static int InitOpenclRunEnv(GPUEnv *gpu);
+ static int ReleaseOpenclEnv(GPUEnv *gpuInfo);
+ static int CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption);
+ static int InitOpenclRunEnv(int argc, const char *buildOptionKernelfiles);
+ static int CachedOfKernerPrg(const GPUEnv *gpuEnvCached, const char * clFileName);
+ static int GeneratBinFromKernelSource(cl_program program, const char * clFileName);
+ static int WriteBinaryToFile(const char* fileName, const char* birary, size_t numBytes);
+ static int BinaryGenerated(const char * clFileName, FILE ** fhandle);
+ static int CompileKernelFile(const char *filename, GPUEnv *gpuInfo, const char *buildOption);
+
+ int ReleaseKernel(KernelEnv * env);
+ int InitOpenclAttr(OpenCLEnv * env);
+ int CreateKernel(char * kernelname, KernelEnv * env);
+ int RunKernel(const char *kernelName, void **userdata);
+ int ConvertToString(const char *filename, char **source);
+ int CheckKernelName(KernelEnv *envInfo,const char *kernelName);
+ int RegisterKernelWrapper(const char *kernelName,cl_kernel_function function);
+ int RunKernelWrapper(cl_kernel_function function, const char * kernelName, void **usrdata);
+ int GetKernelEnvAndFunc(const char *kernelName, KernelEnv *env,cl_kernel_function *function);
+
+
+#ifdef WIN32
+ static int LoadOpencl();
+ static int OpenclInite();
+ static void FreeOpenclDll();
+#endif
+
+ int GetOpenclState();
+ void SetOpenclState(int state);
+ inline static int AddKernelConfig(int kCount, const char *kName);
};
#define NUM 4//(16*16*16)
typedef enum _formulax_ {
- MIN, MAX, SUM, AVG, COUNT, SUMPRODUCT, MINVERSE
+ MIN,
+ MAX,
+ SUM,
+ AVG,
+ COUNT,
+ SUMPRODUCT,
+ MINVERSE,
+ SIGNEDADD,
+ SIGNEDNUL,
+ SIGNEDDIV,
+ SIGNEDSUB
} formulax;
-class OclCalc: public OpenclDevice {
+
+class OclCalc: public OpenclDevice,OpenclCalcBase {
+
public:
OclCalc();
~OclCalc();
- double OclProcess(cl_kernel_function function, double *data, formulax type);
double OclTest();
+ double OclTestDll();
double OclMin();
- double OclMax();
- double OclSum();
- double OclCount();
- double OclAverage();
- double OclSumproduct();
- double OclMinverse();
-
+ double OclProcess(cl_kernel_function function, double *data, formulax type);
+ int OclHostSignedAdd(double *lData,double *rData,double *rResult,int rowSize);
+ int OclHostSignedSub(double *lData,double *rData,double *rResult,int rowSize);
+ int OclHostSignedMul(double *lData,double *rData,double *rResult,int rowSize);
+ int OclHostSignedDiv(double *lData,double *rData,double *rResult,int rowSize);
+ int OclHostFormulaMax(double *srcData,int *startPos,int *endPos,double *output,int outputSize);
+ int OclHostFormulaMin(double *srcData,int *startPos,int *endPos,double *output,int outputSize);
+ int OclHostFormulaAverage(double *srcData,int *startPos,int *endPos,double *output,int outputSize);
};
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/formulagroup.cxx b/sc/source/core/tool/formulagroup.cxx
index 1ee57b5..88c33cf 100644
--- a/sc/source/core/tool/formulagroup.cxx
+++ b/sc/source/core/tool/formulagroup.cxx
@@ -13,17 +13,64 @@
#include "tokenarray.hxx"
#include "compiler.hxx"
#include "interpre.hxx"
-
#include "formula/vectortoken.hxx"
+#ifdef ENABLE_OPENCL
+#include "openclwrapper.hxx"
+#endif
+
namespace sc {
FormulaGroupInterpreter::FormulaGroupInterpreter(
ScDocument& rDoc, const ScAddress& rTopPos, const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode) :
mrDoc(rDoc), maTopPos(rTopPos), mxGroup(xGroup), mrCode(rCode) {}
+/////time test dbg
+double getTimeDiff(const TimeValue& t1, const TimeValue& t2)
+{
+ double tv1 = t1.Seconds;
+ double tv2 = t2.Seconds;
+ tv1 += t1.Nanosec / 1000000000.0;
+ tv2 += t2.Nanosec / 1000000000.0;
+
+ return tv1 - tv2;
+}//dbg-t
+TimeValue aTimeBefore, aTimeAfter;
+///////////////////////////////////////
+
bool FormulaGroupInterpreter::interpret()
{
+#ifdef ENABLE_OPENCL //dbg
+ size_t rowSize = mxGroup->mnLength, srcSize = 0;
+ fprintf(stderr,"rowSize at begin is ...%ld.\n",rowSize);
+ int *rangeStart =NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100))
+ int *rangeEnd = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100))
+ // The row quantity can be gotten from p2->GetArrayLength()
+ int count1 =0,count2 =0,count3=0;
+ int oclOp=0;
+ double *srcData = NULL; // Point to the input data from CPU
+ double *rResult=NULL; // Point to the output data from GPU
+ double *leftData=NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData)
+ double *rightData=NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData)
+ // The rightData can't be zero for "/"
+
+ leftData = (double *)malloc(sizeof(double) * rowSize);
+ rightData = (double *)malloc(sizeof(double) * rowSize);
+ rResult = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C)
+ srcData = (double *)calloc(rowSize,sizeof(double));
+
+ rangeStart =(int *)malloc(sizeof(int) * rowSize);
+ rangeEnd =(int *)malloc(sizeof(int) * rowSize);
+
+ memset(rResult,0,rowSize);
+ if(NULL==leftData||NULL==rightData||
+ NULL==rResult||NULL==rangeStart||NULL==rangeEnd)
+ {
+ printf("malloc err\n");
+ return false;
+ }
+ // printf("rowSize is %d.\n",rowsize);
+#endif
// Until we implement group calculation for real, decompose the group into
// individual formula token arrays for individual calculation.
ScAddress aTmpPos = maTopPos;
@@ -51,12 +98,31 @@ bool FormulaGroupInterpreter::interpret()
size_t nRowEnd = p2->GetRefRowSize() - 1;
if (!p2->IsEndFixed())
nRowEnd += i;
-
size_t nRowSize = nRowEnd - nRowStart + 1;
ScMatrixRef pMat(new ScMatrix(nColSize, nRowSize, 0.0));
+#ifdef ENABLE_OPENCL
+ //srcSize = rowSize+nRowSize-rowSize%nRowSize;//align as nRowSize
+ //srcData = (double *)calloc(srcSize,sizeof(double));
+ rangeStart[i] = nRowStart;//record the start position
+ rangeEnd[i] = nRowEnd;//record the end position
+#endif
for (size_t nCol = 0; nCol < nColSize; ++nCol)
{
const double* pArray = rArrays[nCol];
+#ifdef ENABLE_OPENCL
+ //printf("pArray is %p.\n",pArray);
+ if( NULL==pArray )
+ {
+ fprintf(stderr,"Error: pArray is NULL!\n");
+ return false;
+ }
+ //fprintf(stderr,"(rowSize+nRowSize-1) is %d.\n",rowSize+nRowSize-1);
+ for( size_t u=0; u<rowSize; u++ )
+ {
+ srcData[u] = pArray[u];// note:rowSize<=srcSize
+ //fprintf(stderr,"srcData[%d] is %f.\n",u,srcData[u]);
+ }
+#endif
for (size_t nRow = 0; nRow < nRowSize; ++nRow)
{
if (nRowStart + nRow < p2->GetArrayLength())
@@ -80,16 +146,126 @@ bool FormulaGroupInterpreter::interpret()
if (!pDest)
return false;
- ScCompiler aComp(&mrDoc, aTmpPos, aCode2);
- aComp.SetGrammar(mrDoc.GetGrammar());
- aComp.CompileTokenArray(); // Create RPN token array.
- ScInterpreter aInterpreter(pDest, &mrDoc, aTmpPos, aCode2);
- aInterpreter.Interpret();
+#ifdef ENABLE_OPENCL
+ const formula::FormulaToken *pCur = aCode2.First();
+ aCode2.Reset();
+ while( ( pCur = aCode2.Next() ) != NULL )
+ {
+ OpCode eOp = pCur->GetOpCode();
+ if(eOp==0)
+ {
+ if(count3%2==0)
+ leftData[count1++] = pCur->GetDouble();
+ else
+ rightData[count2++] = pCur->GetDouble();
+ count3++;
+ }
+ else if( eOp!=ocOpen && eOp!=ocClose )
+ oclOp = eOp;
- pDest->SetResultToken(aInterpreter.GetResultToken().get());
- pDest->ResetDirty();
- pDest->SetChanged(true);
- }
+// if(count1>0){//dbg
+// fprintf(stderr,"leftData is %f.\n",leftData[count1-1]);
+// count1--;
+// }
+// if(count2>0){//dbg
+// fprintf(stderr,"rightData is %f.\n",rightData[count2-1]);
+// count2--;
+// }
+ }
+#endif
+ if(getenv("SC_FORMULAGROUP")&&(!getenv("SC_GPU"))){
+ fprintf(stderr,"ccCPU flow...\n\n");
+ ScCompiler aComp(&mrDoc, aTmpPos, aCode2);
+ aComp.SetGrammar(mrDoc.GetGrammar());
+ aComp.CompileTokenArray(); // Create RPN token array.
+ ScInterpreter aInterpreter(pDest, &mrDoc, aTmpPos, aCode2);
+ aInterpreter.Interpret();
+ pDest->SetResultToken(aInterpreter.GetResultToken().get());
+ pDest->ResetDirty();
+ pDest->SetChanged(true);
+ }
+ } // for loop end (mxGroup->mnLength)
+ // For GPU calculation
+#ifdef ENABLE_OPENCL //dbg: Using "export SC_FORMULAGROUP=1;export SC_GPU=1" to open if{} in terminal
+ if(getenv("SC_FORMULAGROUP")&&(getenv("SC_GPU"))){
+ fprintf(stderr,"ggGPU flow...\n\n");
+ printf(" oclOp is... %d\n",oclOp);
+osl_getSystemTime(&aTimeBefore);//timer
+ static OclCalc ocl_calc;
+ switch(oclOp)
+ {
+ case ocAdd:
+ ocl_calc.OclHostSignedAdd(leftData,rightData,rResult,count1);
+ break;
+ case ocSub:
+ ocl_calc.OclHostSignedSub(leftData,rightData,rResult,count1);
+ break;
+ case ocMul:
+ ocl_calc.OclHostSignedMul(leftData,rightData,rResult,count1);
+ break;
+ case ocDiv:
+ ocl_calc.OclHostSignedDiv(leftData,rightData,rResult,count1);
+ break;
+ case ocMax:
+ ocl_calc.OclHostFormulaMax(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ break;
+ case ocMin:
+ ocl_calc.OclHostFormulaMin(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ break;
+ case ocAverage:
+ ocl_calc.OclHostFormulaAverage(srcData,rangeStart,rangeEnd,rResult,rowSize);
+ break;
+ default:
+ fprintf(stderr,"No OpenCL function for this calculation.\n");
+ break;
+ }
+/////////////////////////////////////////////////////
+osl_getSystemTime(&aTimeAfter);
+double diff = getTimeDiff(aTimeAfter, aTimeBefore);
+//if (diff >= 1.0)
+{
+ fprintf(stderr,"OpenCL,diff...%f.\n",diff);
+
+}
+/////////////////////////////////////////////////////
+
+//rResult[i];
+// for(sal_Int32 i = 0; i < rowSize; ++i){//dbg output results
+// fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]);
+// }
+
+// We want to stuff the double data, which in rResult[i] from GPU calculated well, to UI view for users
+ for (sal_Int32 i = 0; i < mxGroup->mnLength; ++i)
+ {
+ ScFormulaCell* pDestx = mrDoc.GetFormulaCell(aTmpPos);
+ if (!pDestx)
+ return false;
+ formula::FormulaTokenRef xResult = new formula::FormulaDoubleToken(rResult[i]);
+ pDestx->SetResultToken(xResult.get());
+ pDestx->ResetDirty();
+ pDestx->SetChanged(true);
+ aTmpPos.SetRow(mxGroup->mnStart + i + 1);
+ }
+ }
+
+ if(leftData)
+ free(leftData);
+ if(rightData)
+ free(rightData);
+ if(rangeStart)
+ free(rangeStart);
+ if(rangeEnd)
+ free(rangeEnd);
+ if(rResult)
+ free(rResult);
+ if(srcData)
+ free(srcData);
+
+if(getenv("SC_GPUSAMPLE")){
+ //fprintf(stderr,"FormulaGroupInterpreter::interpret(),iniflag...%d\n",ocl_calc.GetOpenclState());
+ //ocl_calc.OclTest();//opencl test sample for debug
+}
+#endif
return true;
}
diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx
index afe994e..ce0ce41 100644
--- a/sc/source/core/tool/interpr1.cxx
+++ b/sc/source/core/tool/interpr1.cxx
@@ -4327,10 +4327,6 @@ void ScInterpreter::ScProduct()
void ScInterpreter::ScAverage( bool bTextAsZero )
{
RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "sc", "er", "ScInterpreter::ScAverage" );
-#ifdef ENABLE_OPENCL
- static OclCalc ocl_calc;
- ocl_calc.OclTest();
-#endif
PushDouble( IterateParameters( ifAVERAGE, bTextAsZero ) );
}
diff --git a/sc/source/ui/app/scmod.cxx b/sc/source/ui/app/scmod.cxx
index ac3daad..05cef41 100644
--- a/sc/source/ui/app/scmod.cxx
+++ b/sc/source/ui/app/scmod.cxx
@@ -101,6 +101,10 @@
#include "scabstdlg.hxx"
#include "formula/errorcodes.hxx"
+#ifdef ENABLE_OPENCL
+#include "openclwrapper.hxx"
+#endif
+
#define SC_IDLE_MIN 150
#define SC_IDLE_MAX 3000
#define SC_IDLE_STEP 75
@@ -148,6 +152,9 @@ ScModule::ScModule( SfxObjectFactory* pFact ) :
mbIsInSharedDocLoading( false ),
mbIsInSharedDocSaving( false )
{
+#ifdef ENABLE_OPENCL
+ OclCalc::InitEnv();
+#endif
// im ctor ist der ResManager (DLL-Daten) noch nicht initialisiert!
SetName(OUString("StarCalc")); // fuer Basic
@@ -181,6 +188,9 @@ ScModule::ScModule( SfxObjectFactory* pFact ) :
ScModule::~ScModule()
{
+#ifdef ENABLE_OPENCL
+ OclCalc::ReleaseOpenclRunEnv();
+#endif
OSL_ENSURE( !pSelTransfer, "Selection Transfer object not deleted" );
// InputHandler braucht nicht mehr geloescht zu werden (gibt keinen an der App mehr)
More information about the Libreoffice-commits
mailing list