[Beignet] [PATCH] Add multi devices support in context.
junyan.he at inbox.com
junyan.he at inbox.com
Mon Oct 10 09:26:38 UTC 2016
From: Junyan He <junyan.he at intel.com>
In future there may be more than one device on the platform,
we need to consider multi devices within one context.
Signed-off-by: Junyan He <junyan.he at intel.com>
---
src/cl_api.c | 15 ++++++++++++---
src/cl_api_command_queue.c | 2 +-
src/cl_api_context.c | 4 ++--
src/cl_api_program.c | 14 ++++++++------
src/cl_command_queue.h | 1 +
src/cl_command_queue_gen7.c | 12 ++++++------
src/cl_context.c | 44 ++++++++++++++++++++++++++++++++++++--------
src/cl_context.h | 3 ++-
src/cl_device_id.c | 37 ++++++++++++++++++++++++++++---------
src/cl_device_id.h | 2 ++
src/cl_kernel.c | 3 ++-
src/cl_mem.c | 36 ++++++++++++++++++------------------
src/cl_program.c | 28 ++++++++++++++--------------
13 files changed, 132 insertions(+), 69 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 945d6c1..03388ec 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -153,7 +153,10 @@ clCreateCommandQueue(cl_context context,
cl_int err = CL_SUCCESS;
CHECK_CONTEXT (context);
- INVALID_DEVICE_IF (device != context->device);
+ err = cl_devices_list_include_check(context->device_num, context->devices, 1, &device);
+ if (err)
+ goto error;
+
INVALID_VALUE_IF (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE));
if(properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {/*not supported now.*/
@@ -579,7 +582,10 @@ clBuildProgram(cl_program program,
/* Everything is easy. We only support one device anyway */
if (num_devices != 0) {
assert(program->ctx);
- INVALID_DEVICE_IF (device_list[0] != program->ctx->device);
+ err = cl_devices_list_include_check(program->ctx->device_num,
+ program->ctx->devices, num_devices, device_list);
+ if (err)
+ goto error;
}
assert(program->source_type == FROM_LLVM ||
@@ -621,7 +627,10 @@ clCompileProgram(cl_program program ,
/* Everything is easy. We only support one device anyway */
if (num_devices != 0) {
assert(program->ctx);
- INVALID_DEVICE_IF (device_list[0] != program->ctx->device);
+ err = cl_devices_list_include_check(program->ctx->device_num,
+ program->ctx->devices, num_devices, device_list);
+ if (err)
+ goto error;
}
/* TODO support create program from binary */
diff --git a/src/cl_api_command_queue.c b/src/cl_api_command_queue.c
index 426b9a0..0f458a3 100644
--- a/src/cl_api_command_queue.c
+++ b/src/cl_api_command_queue.c
@@ -37,7 +37,7 @@ clGetCommandQueueInfo(cl_command_queue command_queue,
src_ptr = &command_queue->ctx;
src_size = sizeof(cl_context);
} else if (param_name == CL_QUEUE_DEVICE) {
- src_ptr = &command_queue->ctx->device;
+ src_ptr = &command_queue->device;
src_size = sizeof(cl_device_id);
} else if (param_name == CL_QUEUE_REFERENCE_COUNT) {
cl_int ref = CL_OBJECT_GET_REF(command_queue);
diff --git a/src/cl_api_context.c b/src/cl_api_context.c
index 2160950..6c633fd 100644
--- a/src/cl_api_context.c
+++ b/src/cl_api_context.c
@@ -144,8 +144,8 @@ clGetContextInfo(cl_context context,
}
if (param_name == CL_CONTEXT_DEVICES) {
- src_ptr = &context->device;
- src_size = sizeof(cl_device_id);
+ src_ptr = context->devices;
+ src_size = sizeof(cl_device_id) * context->device_num;
} else if (param_name == CL_CONTEXT_NUM_DEVICES) {
cl_uint n = 1;
src_ptr = &n;
diff --git a/src/cl_api_program.c b/src/cl_api_program.c
index 43bc2c3..0281e68 100644
--- a/src/cl_api_program.c
+++ b/src/cl_api_program.c
@@ -17,6 +17,7 @@
*/
#include "cl_program.h"
#include "cl_context.h"
+#include "cl_device_id.h"
#include <string.h>
cl_int
@@ -47,9 +48,8 @@ clGetProgramInfo(cl_program program,
src_ptr = &num_dev;
src_size = sizeof(cl_uint);
} else if (param_name == CL_PROGRAM_DEVICES) {
- cl_device_id dev_id = program->ctx->device;
- src_ptr = &dev_id;
- src_size = sizeof(cl_device_id);
+ src_ptr = program->ctx->devices;
+ src_size = sizeof(cl_device_id) * program->ctx->device_num;
} else if (param_name == CL_PROGRAM_NUM_KERNELS) {
cl_uint kernels_num = program->ker_n;
src_ptr = &kernels_num;
@@ -129,14 +129,16 @@ clGetProgramBuildInfo(cl_program program,
const void *src_ptr = NULL;
size_t src_size = 0;
const char *ret_str = "";
+ cl_int err = CL_SUCCESS;
if (!CL_OBJECT_IS_PROGRAM(program)) {
return CL_INVALID_PROGRAM;
}
- if (device != program->ctx->device) {
- return CL_INVALID_DEVICE;
- }
+ err = cl_devices_list_include_check(program->ctx->device_num,
+ program->ctx->devices, 1, &device);
+ if (err != CL_SUCCESS)
+ return err;
if (param_name == CL_PROGRAM_BUILD_STATUS) {
src_ptr = &program->build_status;
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 54a24f7..40127d6 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -44,6 +44,7 @@ struct _cl_command_queue {
_cl_base_object base;
_cl_command_queue_enqueue_worker worker;
cl_context ctx; /* Its parent context */
+ cl_device_id device; /* Its device */
cl_event* barrier_events; /* Point to array of non-complete user events that block this command queue */
cl_int barrier_events_num; /* Number of Non-complete user events */
cl_int barrier_events_size; /* The size of array that wait_events point to */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 37082a6..d1e4019 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -255,7 +255,7 @@ static void
cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
{
cl_context ctx = ker->program->ctx;
- cl_device_id device = ctx->device;
+ cl_device_id device = ctx->devices[0];
const int32_t per_lane_stack_sz = ker->stack_size;
const int32_t value = GBE_CURBE_EXTRA_ARGUMENT;
const int32_t sub_value = GBE_STACK_BUFFER;
@@ -271,7 +271,7 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
*/
assert(offset >= 0);
stack_sz *= interp_kernel_get_simd_width(ker->opaque);
- stack_sz *= device->max_compute_unit * ctx->device->max_thread_per_unit;
+ stack_sz *= device->max_compute_unit * ctx->devices[0]->max_thread_per_unit;
/* for some hardware, part of EUs are disabled with EU id reserved,
* it makes the active EU id larger than count of EUs within a subslice,
@@ -367,14 +367,14 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
kernel.curbe_sz = cst_sz;
- if (scratch_sz > ker->program->ctx->device->scratch_mem_size) {
+ if (scratch_sz > ker->program->ctx->devices[0]->scratch_mem_size) {
DEBUGP(DL_ERROR, "Out of scratch memory %d.", scratch_sz);
return CL_OUT_OF_RESOURCES;
}
/* Curbe step 1: fill the constant urb buffer data shared by all threads */
if (ker->curbe) {
kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n);
- if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) {
+ if (kernel.slm_sz > ker->program->ctx->devices[0]->local_mem_size) {
DEBUGP(DL_ERROR, "Out of shared local memory %d.", kernel.slm_sz);
return CL_OUT_OF_RESOURCES;
}
@@ -385,9 +385,9 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
/* Setup the kernel */
if (queue->props & CL_QUEUE_PROFILING_ENABLE)
- err = cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit * ctx->device->max_thread_per_unit, cst_sz / 32, 1);
+ err = cl_gpgpu_state_init(gpgpu, ctx->devices[0]->max_compute_unit * ctx->devices[0]->max_thread_per_unit, cst_sz / 32, 1);
else
- err = cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit * ctx->device->max_thread_per_unit, cst_sz / 32, 0);
+ err = cl_gpgpu_state_init(gpgpu, ctx->devices[0]->max_compute_unit * ctx->devices[0]->max_thread_per_unit, cst_sz / 32, 0);
if (err != 0)
goto error;
printf_num = interp_get_printf_num(printf_info);
diff --git a/src/cl_context.c b/src/cl_context.c
index 4617f11..e2ee4c0 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -273,7 +273,8 @@ cl_context_properties_process(const cl_context_properties *prop,
}
static cl_context
-cl_context_new(cl_context_prop props, const cl_context_properties* properties, cl_uint prop_len)
+cl_context_new(cl_context_prop props, const cl_context_properties* properties, cl_uint prop_len,
+ cl_uint dev_num, cl_device_id* all_dev)
{
cl_context ctx = cl_calloc(1, sizeof(_cl_context));
if (ctx == NULL)
@@ -299,6 +300,8 @@ cl_context_new(cl_context_prop props, const cl_context_properties* properties, c
}
CL_OBJECT_INIT_BASE(ctx, CL_OBJECT_CONTEXT_MAGIC);
+ ctx->devices = all_dev;
+ ctx->device_num = dev_num;
list_init(&ctx->queues);
list_init(&ctx->mem_objects);
list_init(&ctx->samplers);
@@ -320,27 +323,51 @@ cl_create_context(const cl_context_properties * properties,
cl_context ctx = NULL;
struct _cl_context_prop props;
cl_uint prop_len = 0;
+ cl_uint dev_num = 0;
+ cl_device_id* all_dev = NULL;
+ cl_uint i, j;
*errcode_ret = cl_context_properties_process(properties, &props, &prop_len);
if (*errcode_ret != CL_SUCCESS)
return NULL;
+ /* Filter out repeated device. */
+ assert(num_devices > 0);
+ all_dev = cl_calloc(num_devices, sizeof(cl_device_id));
+ if (all_dev == NULL) {
+ *errcode_ret = CL_OUT_OF_HOST_MEMORY;
+ return NULL;
+ }
+ for (i = 0; i < num_devices; i++) {
+ for (j = 0; j < i; j++) {
+ if (devices[j] == devices[i]) {
+ break;
+ }
+ }
+
+ if (j != i) { // Find some duplicated one.
+ continue;
+ }
+
+ all_dev[dev_num] = devices[i];
+ dev_num++;
+ }
+ assert(dev_num == 1); // TODO: multi devices later.
+
/* We are good */
- ctx = cl_context_new(&props, properties, prop_len);
+ ctx = cl_context_new(&props, properties, prop_len, dev_num, all_dev);
if (ctx == NULL) {
*errcode_ret = CL_OUT_OF_HOST_MEMORY;
return NULL;
}
-
- /* Attach the device to the context */
- assert(num_devices == 1); // TODO: multi devices later.
- ctx->device = *devices;
+ /* cl_context_new will use all_dev. */
+ all_dev = NULL;
/* Save the user callback and user data*/
ctx->pfn_notify = pfn_notify;
ctx->user_data = user_data;
- cl_driver_set_atomic_flag(ctx->drv, ctx->device->atomic_test_result);
+ cl_driver_set_atomic_flag(ctx->drv, ctx->devices[0]->atomic_test_result);
initialize_env_var();
return ctx;
@@ -404,6 +431,7 @@ cl_context_create_queue(cl_context ctx,
/* We create the command queue and store it in the context list of queues */
TRY_ALLOC (queue, cl_command_queue_new(ctx));
queue->props = properties;
+ queue->device = device;
exit:
if (errcode_ret)
@@ -431,7 +459,7 @@ cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index,
CL_OBJECT_TAKE_OWNERSHIP(ctx, 1);
if (ctx->internal_prgs[index] == NULL) {
- ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->device,
+ ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->devices[0],
&size, (const unsigned char **)&str_kernel, &binary_status, &ret);
if (!ctx->internal_prgs[index]) {
diff --git a/src/cl_context.h b/src/cl_context.h
index e582fe2..4e7c2e8 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -104,7 +104,8 @@ typedef struct _cl_context_prop {
typedef struct _cl_context {
_cl_base_object base;
cl_driver drv; /* Handles HW or simulator */
- cl_device_id device; /* All information about the GPU device */
+ cl_device_id* devices; /* All devices belong to this context */
+ cl_uint device_num; /* Devices number of this context */
list_head queues; /* All command queues currently allocated */
cl_uint queue_num; /* All queue number currently allocated */
cl_uint queue_cookie; /* Cookie will change every time we change queue list. */
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 268fa61..056cfc5 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -1341,22 +1341,22 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
{
size_t work_group_size, thread_cnt;
int simd_width = interp_kernel_get_simd_width(kernel->opaque);
- int device_id = kernel->program->ctx->device->device_id;
+ int device_id = kernel->program->ctx->devices[0]->device_id;
if (!interp_kernel_use_slm(kernel->opaque)) {
if (!IS_BAYTRAIL_T(device_id) || simd_width == 16)
work_group_size = simd_width * 64;
else
- work_group_size = kernel->program->ctx->device->max_compute_unit *
- kernel->program->ctx->device->max_thread_per_unit * simd_width;
+ work_group_size = kernel->program->ctx->devices[0]->max_compute_unit *
+ kernel->program->ctx->devices[0]->max_thread_per_unit * simd_width;
} else {
- thread_cnt = kernel->program->ctx->device->max_compute_unit *
- kernel->program->ctx->device->max_thread_per_unit / kernel->program->ctx->device->sub_slice_count;
+ thread_cnt = kernel->program->ctx->devices[0]->max_compute_unit *
+ kernel->program->ctx->devices[0]->max_thread_per_unit / kernel->program->ctx->devices[0]->sub_slice_count;
if(thread_cnt > 64)
thread_cnt = 64;
work_group_size = thread_cnt * simd_width;
}
- if(work_group_size > kernel->program->ctx->device->max_work_group_size)
- work_group_size = kernel->program->ctx->device->max_work_group_size;
+ if(work_group_size > kernel->program->ctx->devices[0]->max_work_group_size)
+ work_group_size = kernel->program->ctx->devices[0]->max_work_group_size;
return work_group_size;
}
@@ -1372,7 +1372,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
int dimension = 0;
CHECK_KERNEL(kernel);
if (device == NULL)
- device = kernel->program->ctx->device;
+ device = kernel->program->ctx->devices[0];
if (UNLIKELY(is_gen_device(device) == CL_FALSE))
return CL_INVALID_DEVICE;
@@ -1444,7 +1444,7 @@ cl_get_kernel_subgroup_info(cl_kernel kernel,
{
int err = CL_SUCCESS;
if(device != NULL)
- if (kernel->program->ctx->device != device)
+ if (kernel->program->ctx->devices[0] != device)
return CL_INVALID_DEVICE;
CHECK_KERNEL(kernel);
@@ -1546,3 +1546,22 @@ cl_devices_list_check(cl_uint num_devices, const cl_device_id *devices)
return CL_SUCCESS;
}
+
+LOCAL cl_int
+cl_devices_list_include_check(cl_uint num_devices, const cl_device_id *devices,
+ cl_uint num_to_check, const cl_device_id *devices_to_check)
+{
+ cl_uint i, j;
+
+ for (i = 0; i < num_to_check; i++) {
+ for (j = 0; j < num_devices; j++) {
+ if (devices_to_check[i] == devices[j])
+ break;
+ }
+
+ if (j == num_devices)
+ return CL_INVALID_DEVICE;
+ }
+
+ return CL_SUCCESS;
+}
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 12d6a6e..861f249 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -131,6 +131,8 @@ struct _cl_device_id {
CL_OBJECT_GET_REF(obj) >= 1))
extern cl_int cl_devices_list_check(cl_uint num_devices, const cl_device_id *devices);
+extern cl_int cl_devices_list_include_check(cl_uint num_devices, const cl_device_id *devices,
+ cl_uint num_to_check, const cl_device_id *devices_to_check);
/* Get a device from the given platform */
extern cl_int cl_get_device_ids(cl_platform_id platform,
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index b3f1e35..0e04d75 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -390,7 +390,8 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
/* Get image data & size */
k->image_sz = interp_kernel_get_image_size(k->opaque);
assert(k->sampler_sz <= GEN_MAX_SURFACES);
- assert(k->image_sz <= ctx->device->max_read_image_args + ctx->device->max_write_image_args);
+ // TODO: Devices may be more than 1
+ assert(k->image_sz <= ctx->devices[0]->max_read_image_args + ctx->devices[0]->max_write_image_args);
if (k->image_sz > 0) {
TRY_ALLOC_NO_ERR(k->images, cl_calloc(k->image_sz, sizeof(k->images[0])));
interp_kernel_get_image_data(k->opaque, k->images);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index abf6dd6..57ea003 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -114,10 +114,10 @@ cl_mem_allocate(enum cl_mem_type type,
#ifdef HAS_USERPTR
uint8_t bufCreated = 0;
- if (ctx->device->host_unified_memory) {
+ if (ctx->devices[0]->host_unified_memory) {
int page_size = getpagesize();
int cacheline_size = 0;
- cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL);
+ cl_get_device_info(ctx->devices[0], CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL);
if (type == CL_MEM_BUFFER_TYPE) {
if (flags & CL_MEM_USE_HOST_PTR) {
@@ -162,7 +162,7 @@ cl_mem_allocate(enum cl_mem_type type,
// if create image from USE_HOST_PTR buffer, the buffer's base address need be aligned.
if(buffer->is_userptr) {
int base_alignement = 0;
- cl_get_device_info(ctx->device, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(base_alignement), &base_alignement, NULL);
+ cl_get_device_info(ctx->devices[0], CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(base_alignement), &base_alignement, NULL);
if(ALIGN((unsigned long)buffer->host_ptr, base_alignement) != (unsigned long)buffer->host_ptr) {
err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
goto error;
@@ -279,7 +279,7 @@ cl_mem_new_buffer(cl_context ctx,
goto error;
}
- if ((err = cl_get_device_info(ctx->device,
+ if ((err = cl_get_device_info(ctx->devices[0],
CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(max_mem_size),
&max_mem_size,
@@ -381,7 +381,7 @@ cl_mem_new_sub_buffer(cl_mem buffer,
goto error;
}
- if (info->origin & (buffer->ctx->device->mem_base_addr_align / 8 - 1)) {
+ if (info->origin & (buffer->ctx->devices[0]->mem_base_addr_align / 8 - 1)) {
err = CL_MISALIGNED_SUB_BUFFER_OFFSET;
goto error;
}
@@ -613,7 +613,7 @@ _cl_mem_new_image(cl_context ctx,
h = 1;
depth = 1;
- if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
+ if (UNLIKELY(w > ctx->devices[0]->image2d_max_width)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
@@ -623,11 +623,11 @@ _cl_mem_new_image(cl_context ctx,
image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
- if (UNLIKELY(w > ctx->device->image_mem_size)) DO_IMAGE_ERROR;
+ if (UNLIKELY(w > ctx->devices[0]->image_mem_size)) DO_IMAGE_ERROR;
/* This is an image1d buffer which exceeds normal image size restrication
We have to use a 2D image to simulate this 1D image. */
- h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
- w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
+ h = (w + ctx->devices[0]->image2d_max_width - 1) / ctx->devices[0]->image2d_max_width;
+ w = w > ctx->devices[0]->image2d_max_width ? ctx->devices[0]->image2d_max_width : w;
tiling = CL_NO_TILE;
} else if(image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) {
tiling = CL_NO_TILE;
@@ -640,8 +640,8 @@ _cl_mem_new_image(cl_context ctx,
if (data && pitch == 0)
pitch = min_pitch;
- if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
- if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
+ if (UNLIKELY(w > ctx->devices[0]->image2d_max_width)) DO_IMAGE_ERROR;
+ if (UNLIKELY(h > ctx->devices[0]->image2d_max_height)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0 && buffer == NULL)) DO_IMAGE_ERROR;
@@ -661,11 +661,11 @@ _cl_mem_new_image(cl_context ctx,
size_t min_slice_pitch = pitch * h;
if (data && slice_pitch == 0)
slice_pitch = min_slice_pitch;
- if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
- if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
+ if (UNLIKELY(w > ctx->devices[0]->image3d_max_width)) DO_IMAGE_ERROR;
+ if (UNLIKELY(h > ctx->devices[0]->image3d_max_height)) DO_IMAGE_ERROR;
if (image_type == CL_MEM_OBJECT_IMAGE3D &&
- (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR
- else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
+ (UNLIKELY(depth > ctx->devices[0]->image3d_max_depth))) DO_IMAGE_ERROR
+ else if (UNLIKELY(depth > ctx->devices[0]->image_max_array_size)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
@@ -677,9 +677,9 @@ _cl_mem_new_image(cl_context ctx,
#undef DO_IMAGE_ERROR
uint8_t enableUserptr = 0;
- if (enable_true_hostptr && ctx->device->host_unified_memory && data != NULL && (flags & CL_MEM_USE_HOST_PTR)) {
+ if (enable_true_hostptr && ctx->devices[0]->host_unified_memory && data != NULL && (flags & CL_MEM_USE_HOST_PTR)) {
int cacheline_size = 0;
- cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL);
+ cl_get_device_info(ctx->devices[0], CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL);
if (ALIGN((unsigned long)data, cacheline_size) == (unsigned long)data &&
ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)) == h &&
ALIGN(h * pitch * depth, cacheline_size) == h * pitch * depth && //h and pitch should same as aligned_h and aligned_pitch if enable userptr
@@ -856,7 +856,7 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
goto error;
}
- if ((err = cl_get_device_info(ctx->device,
+ if ((err = cl_get_device_info(ctx->devices[0],
CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,
sizeof(max_size),
&max_size,
diff --git a/src/cl_program.c b/src/cl_program.c
index 6e86675..7c7fb79 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -218,7 +218,7 @@ cl_program_create_from_binary(cl_context ctx,
assert(ctx);
INVALID_DEVICE_IF (num_devices != 1);
INVALID_DEVICE_IF (devices == NULL);
- INVALID_DEVICE_IF (devices[0] != ctx->device);
+ INVALID_DEVICE_IF (devices[0] != ctx->devices[0]);
INVALID_VALUE_IF (binaries == NULL);
INVALID_VALUE_IF (lengths == NULL);
@@ -255,7 +255,7 @@ cl_program_create_from_binary(cl_context ctx,
TRY_ALLOC(typed_binary, cl_calloc(lengths[0]+1, sizeof(char)));
memcpy(typed_binary+1, binaries[0], lengths[0]);
*typed_binary = 1;
- program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, typed_binary, program->binary_sz+1);
+ program->opaque = compiler_program_new_from_llvm_binary(program->ctx->devices[0]->device_id, typed_binary, program->binary_sz+1);
cl_free(typed_binary);
if (UNLIKELY(program->opaque == NULL)) {
err = CL_INVALID_PROGRAM;
@@ -273,7 +273,7 @@ cl_program_create_from_binary(cl_context ctx,
err= CL_INVALID_BINARY;
goto error;
}
- program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, program->binary, program->binary_sz);
+ program->opaque = compiler_program_new_from_llvm_binary(program->ctx->devices[0]->device_id, program->binary, program->binary_sz);
if (UNLIKELY(program->opaque == NULL)) {
err = CL_INVALID_PROGRAM;
@@ -282,7 +282,7 @@ cl_program_create_from_binary(cl_context ctx,
program->source_type = FROM_LLVM;
}
else if (isGenBinary((unsigned char*)program->binary)) {
- program->opaque = interp_program_new_from_binary(program->ctx->device->device_id, program->binary, program->binary_sz);
+ program->opaque = interp_program_new_from_binary(program->ctx->devices[0]->device_id, program->binary, program->binary_sz);
if (UNLIKELY(program->opaque == NULL)) {
err = CL_INVALID_PROGRAM;
goto error;
@@ -324,7 +324,7 @@ cl_program_create_with_built_in_kernles(cl_context ctx,
assert(ctx);
INVALID_DEVICE_IF (num_devices != 1);
INVALID_DEVICE_IF (devices == NULL);
- INVALID_DEVICE_IF (devices[0] != ctx->device);
+ INVALID_DEVICE_IF (devices[0] != ctx->devices[0]);
cl_int binary_status = CL_SUCCESS;
extern char cl_internal_built_in_kernel_str[];
@@ -332,7 +332,7 @@ cl_program_create_with_built_in_kernles(cl_context ctx,
char* p_built_in_kernel_str =cl_internal_built_in_kernel_str;
ctx->built_in_prgs = cl_program_create_from_binary(ctx, 1,
- &ctx->device,
+ &ctx->devices[0],
(size_t*)&cl_internal_built_in_kernel_str_size,
(const unsigned char **)&p_built_in_kernel_str,
&binary_status, &err);
@@ -358,7 +358,7 @@ cl_program_create_with_built_in_kernles(cl_context ctx,
kernel = strtok_r( local_kernel_names, delims , &saveptr);
while( kernel != NULL ) {
- matched_kernel = strstr(ctx->device->built_in_kernels, kernel);
+ matched_kernel = strstr(ctx->devices[0]->built_in_kernels, kernel);
if(matched_kernel){
for (i = 0; i < ctx->built_in_prgs->ker_n; ++i) {
assert(ctx->built_in_prgs->ker[i]);
@@ -398,7 +398,7 @@ cl_program_create_from_llvm(cl_context ctx,
assert(ctx);
INVALID_DEVICE_IF (num_devices != 1);
INVALID_DEVICE_IF (devices == NULL);
- INVALID_DEVICE_IF (devices[0] != ctx->device);
+ INVALID_DEVICE_IF (devices[0] != ctx->devices[0]);
INVALID_VALUE_IF (file_name == NULL);
program = cl_program_new(ctx);
@@ -407,7 +407,7 @@ cl_program_create_from_llvm(cl_context ctx,
goto error;
}
- program->opaque = compiler_program_new_from_llvm(ctx->device->device_id, file_name, NULL, NULL, NULL, program->build_log_max_sz, program->build_log, &program->build_log_sz, 1, NULL);
+ program->opaque = compiler_program_new_from_llvm(ctx->devices[0]->device_id, file_name, NULL, NULL, NULL, program->build_log_max_sz, program->build_log, &program->build_log_sz, 1, NULL);
if (UNLIKELY(program->opaque == NULL)) {
err = CL_INVALID_PROGRAM;
goto error;
@@ -504,7 +504,7 @@ static int check_cl_version_option(cl_program p, const char* options) {
ver1 = (s[10] - '0') * 10 + (s[12] - '0');
- if (cl_get_device_info(p->ctx->device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version_str),
+ if (cl_get_device_info(p->ctx->devices[0], CL_DEVICE_OPENCL_C_VERSION, sizeof(version_str),
version_str, NULL) != CL_SUCCESS)
return 0;
@@ -572,7 +572,7 @@ cl_program_build(cl_program p, const char *options)
goto error;
}
- p->opaque = compiler_program_new_from_source(p->ctx->device->device_id, p->source, p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
+ p->opaque = compiler_program_new_from_source(p->ctx->devices[0]->device_id, p->source, p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
if (UNLIKELY(p->opaque == NULL)) {
if (p->build_log_sz > 0 && strstr(p->build_log, "error: error reading 'options'"))
err = CL_INVALID_BUILD_OPTIONS;
@@ -600,7 +600,7 @@ cl_program_build(cl_program p, const char *options)
/* Create all the kernels */
TRY (cl_program_load_gen_program, p);
} else if (p->source_type == FROM_BINARY && p->binary_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE) {
- p->opaque = interp_program_new_from_binary(p->ctx->device->device_id, p->binary, p->binary_sz);
+ p->opaque = interp_program_new_from_binary(p->ctx->devices[0]->device_id, p->binary, p->binary_sz);
if (UNLIKELY(p->opaque == NULL)) {
err = CL_BUILD_PROGRAM_FAILURE;
goto error;
@@ -692,7 +692,7 @@ cl_program_link(cl_context context,
goto error;
}
- p->opaque = compiler_program_new_gen_program(context->device->device_id, NULL, NULL, NULL);
+ p->opaque = compiler_program_new_gen_program(context->devices[0]->device_id, NULL, NULL, NULL);
for(i = 0; i < num_input_programs; i++) {
// if program create with llvm binary, need deserilize first to get module.
if(input_programs[i])
@@ -827,7 +827,7 @@ cl_program_compile(cl_program p,
}
}
- p->opaque = compiler_program_compile_from_source(p->ctx->device->device_id, p->source, temp_header_path,
+ p->opaque = compiler_program_compile_from_source(p->ctx->devices[0]->device_id, p->source, temp_header_path,
p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
char rm_path[255]="rm ";
--
2.7.4
More information about the Beignet
mailing list