[Beignet] [PATCH] Add multi devices support in context.

junyan.he at inbox.com junyan.he at inbox.com
Thu Nov 10 06:45:25 UTC 2016


From: Junyan He <junyan.he at intel.com>

In future there may be more than one device on the platform,
we need to consider multi devices within one context.

Signed-off-by: Junyan He <junyan.he at intel.com>
---
 src/cl_api.c                | 37 +++++++++++++++++++++----------------
 src/cl_api_command_queue.c  |  2 +-
 src/cl_api_context.c        |  6 +++---
 src/cl_command_queue.h      |  1 +
 src/cl_command_queue_gen7.c | 12 ++++++------
 src/cl_context.c            | 42 ++++++++++++++++++++++++++++++++++++------
 src/cl_context.h            |  5 +++--
 src/cl_device_id.c          | 18 +++++++++---------
 src/cl_kernel.c             |  2 +-
 src/cl_mem.c                | 38 +++++++++++++++++++-------------------
 src/cl_program.c            | 28 ++++++++++++++--------------
 11 files changed, 114 insertions(+), 77 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 5ed8c99..93d1a16 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -85,13 +85,6 @@ cl_check_device_type(cl_device_type device_type)
   return CL_SUCCESS;
 }
 
-static cl_int
-cl_device_id_is_ok(const cl_device_id device)
-{
-  if(UNLIKELY(device == NULL)) return CL_FALSE;
-  return device != cl_get_gt_device() ? CL_FALSE : CL_TRUE;
-}
-
 cl_int
 clGetPlatformIDs(cl_uint          num_entries,
                  cl_platform_id * platforms,
@@ -179,7 +172,10 @@ clCreateCommandQueue(cl_context                   context,
   cl_int err = CL_SUCCESS;
   CHECK_CONTEXT (context);
 
-  INVALID_DEVICE_IF (device != context->device);
+  err = cl_devices_list_include_check(context->device_num, context->devices, 1, &device);
+  if (err)
+    goto error;
+
   INVALID_VALUE_IF (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE));
 
   if(properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {/*not supported now.*/
@@ -206,7 +202,7 @@ clCreateCommandQueueWithProperties(cl_context                         context,
   cl_uint queue_sz = 0xFFFFFFFF;
   CHECK_CONTEXT (context);
 
-  INVALID_DEVICE_IF (device != context->device);
+  INVALID_DEVICE_IF (device != context->devices[0]);
   if(properties)
   {
     cl_ulong que_type;
@@ -1136,7 +1132,10 @@ clBuildProgram(cl_program            program,
   /* Everything is easy. We only support one device anyway */
   if (num_devices != 0) {
     assert(program->ctx);
-    INVALID_DEVICE_IF (device_list[0] != program->ctx->device);
+    err = cl_devices_list_include_check(program->ctx->device_num,
+                                        program->ctx->devices, num_devices, device_list);
+    if (err)
+      goto error;
   }
 
   assert(program->source_type == FROM_LLVM ||
@@ -1178,7 +1177,10 @@ clCompileProgram(cl_program            program ,
   /* Everything is easy. We only support one device anyway */
   if (num_devices != 0) {
     assert(program->ctx);
-    INVALID_DEVICE_IF (device_list[0] != program->ctx->device);
+    err = cl_devices_list_include_check(program->ctx->device_num,
+                                        program->ctx->devices, num_devices, device_list);
+    if (err)
+      goto error;
   }
 
   /* TODO support create program from binary */
@@ -1262,11 +1264,11 @@ clGetProgramInfo(cl_program       program,
     cl_context context = program->ctx;
     FILL_GETINFO_RET (cl_context, 1, &context, CL_SUCCESS);
   } else if (param_name == CL_PROGRAM_NUM_DEVICES) {
-    cl_uint num_dev = 1; // Just 1 dev now.
+    cl_uint num_dev = program->ctx->device_num;
     FILL_GETINFO_RET (cl_uint, 1, &num_dev, CL_SUCCESS);
   } else if (param_name == CL_PROGRAM_DEVICES) {
-    cl_device_id dev_id = program->ctx->device;
-    FILL_GETINFO_RET (cl_device_id, 1, &dev_id, CL_SUCCESS);
+    cl_device_id* dev_id = program->ctx->devices;
+    FILL_GETINFO_RET (cl_device_id, program->ctx->device_num, dev_id, CL_SUCCESS);
   } else if (param_name == CL_PROGRAM_NUM_KERNELS) {
     cl_uint kernels_num = program->ker_n;
     FILL_GETINFO_RET (cl_uint, 1, &kernels_num, CL_SUCCESS);
@@ -1341,7 +1343,10 @@ clGetProgramBuildInfo(cl_program             program,
   char * ret_str = "";
 
   CHECK_PROGRAM (program);
-  INVALID_DEVICE_IF (device != program->ctx->device);
+  err = cl_devices_list_include_check(program->ctx->device_num,
+                                      program->ctx->devices, 1, &device);
+  if (err != CL_SUCCESS)
+    return err;
 
   if (param_name == CL_PROGRAM_BUILD_STATUS) {
     FILL_GETINFO_RET (cl_build_status, 1, &program->build_status, CL_SUCCESS);
@@ -1669,7 +1674,7 @@ cl_mem clCreatePipe (cl_context context,
     err = CL_INVALID_PIPE_SIZE;
     goto error;
   }
-  if ((err = cl_get_device_info(context->device,
+  if ((err = cl_get_device_info(context->devices[0],
                                 CL_DEVICE_PIPE_MAX_PACKET_SIZE,
                                 sizeof(device_max_size),
                                 &device_max_size,
diff --git a/src/cl_api_command_queue.c b/src/cl_api_command_queue.c
index 4ddebe3..149d445 100644
--- a/src/cl_api_command_queue.c
+++ b/src/cl_api_command_queue.c
@@ -37,7 +37,7 @@ clGetCommandQueueInfo(cl_command_queue command_queue,
     src_ptr = &command_queue->ctx;
     src_size = sizeof(cl_context);
   } else if (param_name == CL_QUEUE_DEVICE) {
-    src_ptr = &command_queue->ctx->device;
+    src_ptr = &command_queue->device;
     src_size = sizeof(cl_device_id);
   } else if (param_name == CL_QUEUE_REFERENCE_COUNT) {
     cl_int ref = CL_OBJECT_GET_REF(command_queue);
diff --git a/src/cl_api_context.c b/src/cl_api_context.c
index 2160950..a77c5a2 100644
--- a/src/cl_api_context.c
+++ b/src/cl_api_context.c
@@ -144,10 +144,10 @@ clGetContextInfo(cl_context context,
   }
 
   if (param_name == CL_CONTEXT_DEVICES) {
-    src_ptr = &context->device;
-    src_size = sizeof(cl_device_id);
+    src_ptr = context->devices;
+    src_size = sizeof(cl_device_id) * context->device_num;
   } else if (param_name == CL_CONTEXT_NUM_DEVICES) {
-    cl_uint n = 1;
+    cl_uint n = context->device_num;
     src_ptr = &n;
     src_size = sizeof(cl_uint);
   } else if (param_name == CL_CONTEXT_REFERENCE_COUNT) {
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 9eb1b09..c9e0d64 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -44,6 +44,7 @@ struct _cl_command_queue {
   _cl_base_object base;
   _cl_command_queue_enqueue_worker worker;
   cl_context ctx;                      /* Its parent context */
+  cl_device_id device;                 /* Its device */
   cl_event* barrier_events;            /* Point to array of non-complete user events that block this command queue */
   cl_int barrier_events_num;           /* Number of Non-complete user events */
   cl_int barrier_events_size;          /* The size of array that wait_events point to */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 4487360..f6f6a2a 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -273,7 +273,7 @@ static void
 cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
 {
   cl_context ctx = ker->program->ctx;
-  cl_device_id device = ctx->device;
+  cl_device_id device = ctx->devices[0];
   const int32_t per_lane_stack_sz = ker->stack_size;
   const int32_t value = GBE_CURBE_EXTRA_ARGUMENT;
   const int32_t sub_value = GBE_STACK_BUFFER;
@@ -289,7 +289,7 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
    */
   assert(offset_stack_buffer >= 0);
   stack_sz *= interp_kernel_get_simd_width(ker->opaque);
-  stack_sz *= device->max_compute_unit * ctx->device->max_thread_per_unit;
+  stack_sz *= device->max_compute_unit * ctx->devices[0]->max_thread_per_unit;
 
   /* for some hardware, part of EUs are disabled with EU id reserved,
    * it makes the active EU id larger than count of EUs within a subslice,
@@ -394,14 +394,14 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
   kernel.curbe_sz = cst_sz;
 
-  if (scratch_sz > ker->program->ctx->device->scratch_mem_size) {
+  if (scratch_sz > ker->program->ctx->devices[0]->scratch_mem_size) {
     DEBUGP(DL_ERROR, "Out of scratch memory %d.", scratch_sz);
     return CL_OUT_OF_RESOURCES;
   }
   /* Curbe step 1: fill the constant urb buffer data shared by all threads */
   if (ker->curbe) {
     kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz,local_wk_sz_use ,local_wk_sz, thread_n);
-    if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) {
+    if (kernel.slm_sz > ker->program->ctx->devices[0]->local_mem_size) {
       DEBUGP(DL_ERROR, "Out of shared local memory %d.", kernel.slm_sz);
       return CL_OUT_OF_RESOURCES;
     }
@@ -412,9 +412,9 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
 
   /* Setup the kernel */
   if (queue->props & CL_QUEUE_PROFILING_ENABLE)
-    err = cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit * ctx->device->max_thread_per_unit, cst_sz / 32, 1);
+    err = cl_gpgpu_state_init(gpgpu, ctx->devices[0]->max_compute_unit * ctx->devices[0]->max_thread_per_unit, cst_sz / 32, 1);
   else
-    err = cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit * ctx->device->max_thread_per_unit, cst_sz / 32, 0);
+    err = cl_gpgpu_state_init(gpgpu, ctx->devices[0]->max_compute_unit * ctx->devices[0]->max_thread_per_unit, cst_sz / 32, 0);
   if (err != 0)
     goto error;
   printf_num = interp_get_printf_num(printf_info);
diff --git a/src/cl_context.c b/src/cl_context.c
index 4417e3b..922e199 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -268,6 +268,10 @@ cl_create_context(const cl_context_properties *  properties,
   cl_context ctx = NULL;
   cl_int err = CL_SUCCESS;
   cl_uint prop_len = 0;
+  cl_uint dev_num = 0;
+  cl_device_id* all_dev = NULL;
+  cl_uint i, j;
+
   /* XXX */
   FATAL_IF (num_devices != 1, "Only one device is supported");
 
@@ -275,8 +279,31 @@ cl_create_context(const cl_context_properties *  properties,
   if (UNLIKELY(((err = cl_context_properties_process(properties, &props, &prop_len)) != CL_SUCCESS)))
     goto error;
 
+  /* Filter out repeated device. */
+  assert(num_devices > 0);
+  all_dev = cl_calloc(num_devices, sizeof(cl_device_id));
+  if (all_dev == NULL) {
+    *errcode_ret = CL_OUT_OF_HOST_MEMORY;
+    return NULL;
+  }
+  for (i = 0; i < num_devices; i++) {
+    for (j = 0; j < i; j++) {
+      if (devices[j] == devices[i]) {
+        break;
+      }
+    }
+
+    if (j != i) { // Find some duplicated one.
+      continue;
+    }
+
+    all_dev[dev_num] = devices[i];
+    dev_num++;
+  }
+  assert(dev_num == 1); // TODO: multi devices later.
+
   /* We are good */
-  if (UNLIKELY((ctx = cl_context_new(&props)) == NULL)) {
+  if (UNLIKELY((ctx = cl_context_new(&props, dev_num, all_dev)) == NULL)) {
     err = CL_OUT_OF_HOST_MEMORY;
     goto error;
   }
@@ -286,13 +313,13 @@ cl_create_context(const cl_context_properties *  properties,
     memcpy(ctx->prop_user, properties, sizeof(cl_context_properties)*prop_len);
   }
   ctx->prop_len = prop_len;
-  /* Attach the device to the context */
-  ctx->device = *devices;
+  /* cl_context_new will use all_dev. */
+  all_dev = NULL;
 
   /* Save the user callback and user data*/
   ctx->pfn_notify = pfn_notify;
   ctx->user_data = user_data;
-  cl_driver_set_atomic_flag(ctx->drv, ctx->device->atomic_test_result);
+  cl_driver_set_atomic_flag(ctx->drv, ctx->devices[0]->atomic_test_result);
 
 exit:
   if (errcode_ret != NULL)
@@ -305,12 +332,14 @@ error:
 }
 
 LOCAL cl_context
-cl_context_new(struct _cl_context_prop *props)
+cl_context_new(struct _cl_context_prop *props, cl_uint dev_num, cl_device_id* all_dev)
 {
   cl_context ctx = NULL;
 
   TRY_ALLOC_NO_ERR (ctx, CALLOC(struct _cl_context));
   CL_OBJECT_INIT_BASE(ctx, CL_OBJECT_CONTEXT_MAGIC);
+  ctx->devices = all_dev;
+  ctx->device_num = dev_num;
   list_init(&ctx->queues);
   list_init(&ctx->mem_objects);
   list_init(&ctx->samplers);
@@ -387,6 +416,7 @@ cl_context_create_queue(cl_context ctx,
   /* We create the command queue and store it in the context list of queues */
   TRY_ALLOC (queue, cl_command_queue_new(ctx));
   queue->props = properties;
+  queue->device = device;
 
 exit:
   if (errcode_ret)
@@ -414,7 +444,7 @@ cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index,
 
   CL_OBJECT_TAKE_OWNERSHIP(ctx, 1);
   if (ctx->internal_prgs[index] == NULL) {
-    ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->device,
+    ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->devices[0],
       &size, (const unsigned char **)&str_kernel, &binary_status, &ret);
 
     if (!ctx->internal_prgs[index]) {
diff --git a/src/cl_context.h b/src/cl_context.h
index 268e7b9..366fa94 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -101,7 +101,8 @@ struct _cl_context_prop {
 struct _cl_context {
   _cl_base_object base;
   cl_driver drv;                    /* Handles HW or simulator */
-  cl_device_id device;              /* All information about the GPU device */
+  cl_device_id* devices;            /* All devices belong to this context */
+  cl_uint device_num;               /* Devices number of this context */
   list_head queues;                 /* All command queues currently allocated */
   cl_uint queue_num;                /* All queue number currently allocated */
   cl_uint queue_cookie;             /* Cookie will change every time we change queue list. */
@@ -158,7 +159,7 @@ extern cl_context cl_create_context(const cl_context_properties*,
                                     cl_int*);
 
 /* Allocate and initialize a context */
-extern cl_context cl_context_new(struct _cl_context_prop *);
+extern cl_context cl_context_new(struct _cl_context_prop *prop, cl_uint dev_num, cl_device_id* all_dev);
 
 /* Destroy and deallocate a context */
 extern void cl_context_delete(cl_context);
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 2bc172c..f73d9d2 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -1408,22 +1408,22 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
 {
   size_t work_group_size, thread_cnt;
   int simd_width = interp_kernel_get_simd_width(kernel->opaque);
-  int device_id = kernel->program->ctx->device->device_id;
+  int device_id = kernel->program->ctx->devices[0]->device_id;
   if (!interp_kernel_use_slm(kernel->opaque)) {
     if (!IS_BAYTRAIL_T(device_id) || simd_width == 16)
       work_group_size = simd_width * 64;
     else
-      work_group_size = kernel->program->ctx->device->max_compute_unit *
-                        kernel->program->ctx->device->max_thread_per_unit * simd_width;
+      work_group_size = kernel->program->ctx->devices[0]->max_compute_unit *
+                        kernel->program->ctx->devices[0]->max_thread_per_unit * simd_width;
   } else {
-    thread_cnt = kernel->program->ctx->device->max_compute_unit *
-                 kernel->program->ctx->device->max_thread_per_unit / kernel->program->ctx->device->sub_slice_count;
+    thread_cnt = kernel->program->ctx->devices[0]->max_compute_unit *
+                 kernel->program->ctx->devices[0]->max_thread_per_unit / kernel->program->ctx->devices[0]->sub_slice_count;
     if(thread_cnt > 64)
       thread_cnt = 64;
     work_group_size = thread_cnt * simd_width;
   }
-  if(work_group_size > kernel->program->ctx->device->max_work_group_size)
-    work_group_size = kernel->program->ctx->device->max_work_group_size;
+  if(work_group_size > kernel->program->ctx->devices[0]->max_work_group_size)
+    work_group_size = kernel->program->ctx->devices[0]->max_work_group_size;
   return work_group_size;
 }
 
@@ -1439,7 +1439,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
   int dimension = 0;
   CHECK_KERNEL(kernel);
   if (device == NULL)
-    device = kernel->program->ctx->device;
+    device = kernel->program->ctx->devices[0];
   if (UNLIKELY(is_gen_device(device) == CL_FALSE))
     return CL_INVALID_DEVICE;
 
@@ -1511,7 +1511,7 @@ cl_get_kernel_subgroup_info(cl_kernel kernel,
 {
   int err = CL_SUCCESS;
   if(device != NULL)
-    if (kernel->program->ctx->device != device)
+    if (kernel->program->ctx->devices[0] != device)
       return CL_INVALID_DEVICE;
 
   CHECK_KERNEL(kernel);
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 49bbaf0..4366147 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -453,7 +453,7 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
   /* Get image data & size */
   k->image_sz = interp_kernel_get_image_size(k->opaque);
   assert(k->sampler_sz <= GEN_MAX_SURFACES);
-  assert(k->image_sz <= ctx->device->max_read_image_args + ctx->device->max_write_image_args);
+  assert(k->image_sz <= ctx->devices[0]->max_read_image_args + ctx->devices[0]->max_write_image_args);
   if (k->image_sz > 0) {
     TRY_ALLOC_NO_ERR(k->images, cl_calloc(k->image_sz, sizeof(k->images[0])));
     interp_kernel_get_image_data(k->opaque, k->images);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index e662d8f..175b96d 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -333,10 +333,10 @@ cl_mem_allocate(enum cl_mem_type type,
 
 #ifdef HAS_USERPTR
     uint8_t bufCreated = 0;
-    if (ctx->device->host_unified_memory) {
+    if (ctx->devices[0]->host_unified_memory) {
       int page_size = getpagesize();
       int cacheline_size = 0;
-      cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL);
+      cl_get_device_info(ctx->devices[0], CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL);
 
       if (type == CL_MEM_BUFFER_TYPE) {
         if (flags & CL_MEM_USE_HOST_PTR) {
@@ -390,7 +390,7 @@ cl_mem_allocate(enum cl_mem_type type,
       // if create image from USE_HOST_PTR buffer, the buffer's base address need be aligned.
       if(buffer->is_userptr) {
         int base_alignement = 0;
-        cl_get_device_info(ctx->device, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(base_alignement), &base_alignement, NULL);
+        cl_get_device_info(ctx->devices[0], CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(base_alignement), &base_alignement, NULL);
         if(ALIGN((unsigned long)buffer->host_ptr, base_alignement) != (unsigned long)buffer->host_ptr) {
           err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
           goto error;
@@ -507,7 +507,7 @@ cl_mem_new_buffer(cl_context ctx,
     goto error;
   }
 
-  if ((err = cl_get_device_info(ctx->device,
+  if ((err = cl_get_device_info(ctx->devices[0],
                                 CL_DEVICE_MAX_MEM_ALLOC_SIZE,
                                 sizeof(max_mem_size),
                                 &max_mem_size,
@@ -609,7 +609,7 @@ cl_mem_new_sub_buffer(cl_mem buffer,
     goto error;
   }
 
-  if (info->origin & (buffer->ctx->device->mem_base_addr_align / 8 - 1)) {
+  if (info->origin & (buffer->ctx->devices[0]->mem_base_addr_align / 8 - 1)) {
     err = CL_MISALIGNED_SUB_BUFFER_OFFSET;
     goto error;
   }
@@ -743,7 +743,7 @@ void* cl_mem_svm_allocate(cl_context ctx, cl_svm_mem_flags flags,
   if(UNLIKELY(alignment & (alignment - 1)))
     return NULL;
 
-  if ((err = cl_get_device_info(ctx->device,
+  if ((err = cl_get_device_info(ctx->devices[0],
                                  CL_DEVICE_MAX_MEM_ALLOC_SIZE,
                                  sizeof(max_mem_size),
                                  &max_mem_size,
@@ -979,7 +979,7 @@ _cl_mem_new_image(cl_context ctx,
 
     h = 1;
     depth = 1;
-    if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
+    if (UNLIKELY(w > ctx->devices[0]->image2d_max_width)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR;
     if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
@@ -989,11 +989,11 @@ _cl_mem_new_image(cl_context ctx,
              image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
 
     if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
-      if (UNLIKELY(w > ctx->device->image_mem_size)) DO_IMAGE_ERROR;
+      if (UNLIKELY(w > ctx->devices[0]->image_mem_size)) DO_IMAGE_ERROR;
       /* This is an image1d buffer which exceeds normal image size restrication
          We have to use a 2D image to simulate this 1D image. */
-      h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
-      w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
+      h = (w + ctx->devices[0]->image2d_max_width - 1) / ctx->devices[0]->image2d_max_width;
+      w = w > ctx->devices[0]->image2d_max_width ? ctx->devices[0]->image2d_max_width : w;
       tiling = CL_NO_TILE;
     } else if(image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) {
       tiling = CL_NO_TILE;
@@ -1006,8 +1006,8 @@ _cl_mem_new_image(cl_context ctx,
     if (data && pitch == 0)
       pitch = min_pitch;
 
-    if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
-    if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
+    if (UNLIKELY(w > ctx->devices[0]->image2d_max_width)) DO_IMAGE_ERROR;
+    if (UNLIKELY(h > ctx->devices[0]->image2d_max_height)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
     if (UNLIKELY(!data && pitch != 0 && buffer == NULL)) DO_IMAGE_ERROR;
 
@@ -1027,11 +1027,11 @@ _cl_mem_new_image(cl_context ctx,
     size_t min_slice_pitch = pitch * h;
     if (data && slice_pitch == 0)
       slice_pitch = min_slice_pitch;
-    if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
-    if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
+    if (UNLIKELY(w > ctx->devices[0]->image3d_max_width)) DO_IMAGE_ERROR;
+    if (UNLIKELY(h > ctx->devices[0]->image3d_max_height)) DO_IMAGE_ERROR;
     if (image_type == CL_MEM_OBJECT_IMAGE3D &&
-       (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR
-    else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
+       (UNLIKELY(depth > ctx->devices[0]->image3d_max_depth))) DO_IMAGE_ERROR
+    else if (UNLIKELY(depth > ctx->devices[0]->image_max_array_size)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
     if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
@@ -1043,9 +1043,9 @@ _cl_mem_new_image(cl_context ctx,
 #undef DO_IMAGE_ERROR
 
   uint8_t enableUserptr = 0;
-  if (enable_true_hostptr && ctx->device->host_unified_memory && data != NULL && (flags & CL_MEM_USE_HOST_PTR)) {
+  if (enable_true_hostptr && ctx->devices[0]->host_unified_memory && data != NULL && (flags & CL_MEM_USE_HOST_PTR)) {
     int cacheline_size = 0;
-    cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL);
+    cl_get_device_info(ctx->devices[0], CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL);
     if (ALIGN((unsigned long)data, cacheline_size) == (unsigned long)data &&
         ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)) == h &&
         ALIGN(h * pitch * depth, cacheline_size) == h * pitch * depth && //h and pitch should same as aligned_h and aligned_pitch if enable userptr
@@ -1222,7 +1222,7 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
     goto error;
   }
 
-  if ((err = cl_get_device_info(ctx->device,
+  if ((err = cl_get_device_info(ctx->devices[0],
                                 CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,
                                 sizeof(max_size),
                                 &max_size,
diff --git a/src/cl_program.c b/src/cl_program.c
index 3be31bd..2e0b5ba 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -268,7 +268,7 @@ cl_program_create_from_binary(cl_context             ctx,
   assert(ctx);
   INVALID_DEVICE_IF (num_devices != 1);
   INVALID_DEVICE_IF (devices == NULL);
-  INVALID_DEVICE_IF (devices[0] != ctx->device);
+  INVALID_DEVICE_IF (devices[0] != ctx->devices[0]);
   INVALID_VALUE_IF (binaries == NULL);
   INVALID_VALUE_IF (lengths == NULL);
 
@@ -305,7 +305,7 @@ cl_program_create_from_binary(cl_context             ctx,
     TRY_ALLOC(typed_binary, cl_calloc(lengths[0]+1, sizeof(char)));
     memcpy(typed_binary+1, binaries[0], lengths[0]);
     *typed_binary = 1;
-    program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, typed_binary, program->binary_sz+1);
+    program->opaque = compiler_program_new_from_llvm_binary(program->ctx->devices[0]->device_id, typed_binary, program->binary_sz+1);
     cl_free(typed_binary);
     if (UNLIKELY(program->opaque == NULL)) {
       err = CL_INVALID_PROGRAM;
@@ -323,7 +323,7 @@ cl_program_create_from_binary(cl_context             ctx,
       err= CL_INVALID_BINARY;
       goto error;
     }
-    program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, program->binary, program->binary_sz);
+    program->opaque = compiler_program_new_from_llvm_binary(program->ctx->devices[0]->device_id, program->binary, program->binary_sz);
 
     if (UNLIKELY(program->opaque == NULL)) {
       err = CL_INVALID_PROGRAM;
@@ -332,7 +332,7 @@ cl_program_create_from_binary(cl_context             ctx,
     program->source_type = FROM_LLVM;
   }
   else if (isGenBinary((unsigned char*)program->binary)) {
-    program->opaque = interp_program_new_from_binary(program->ctx->device->device_id, program->binary, program->binary_sz);
+    program->opaque = interp_program_new_from_binary(program->ctx->devices[0]->device_id, program->binary, program->binary_sz);
     if (UNLIKELY(program->opaque == NULL)) {
       err = CL_INVALID_PROGRAM;
       goto error;
@@ -374,7 +374,7 @@ cl_program_create_with_built_in_kernles(cl_context     ctx,
   assert(ctx);
   INVALID_DEVICE_IF (num_devices != 1);
   INVALID_DEVICE_IF (devices == NULL);
-  INVALID_DEVICE_IF (devices[0] != ctx->device);
+  INVALID_DEVICE_IF (devices[0] != ctx->devices[0]);
 
   cl_int binary_status = CL_SUCCESS;
   extern char cl_internal_built_in_kernel_str[];
@@ -382,7 +382,7 @@ cl_program_create_with_built_in_kernles(cl_context     ctx,
   char* p_built_in_kernel_str =cl_internal_built_in_kernel_str;
 
   ctx->built_in_prgs = cl_program_create_from_binary(ctx, 1,
-                                                          &ctx->device,
+                                                          &ctx->devices[0],
                                                           (size_t*)&cl_internal_built_in_kernel_str_size,
                                                           (const unsigned char **)&p_built_in_kernel_str,
                                                           &binary_status, &err);
@@ -408,7 +408,7 @@ cl_program_create_with_built_in_kernles(cl_context     ctx,
 
   kernel = strtok_r( local_kernel_names, delims , &saveptr);
   while( kernel != NULL ) {
-    matched_kernel = strstr(ctx->device->built_in_kernels, kernel);
+    matched_kernel = strstr(ctx->devices[0]->built_in_kernels, kernel);
     if(matched_kernel){
       for (i = 0; i < ctx->built_in_prgs->ker_n; ++i) {
         assert(ctx->built_in_prgs->ker[i]);
@@ -448,7 +448,7 @@ cl_program_create_from_llvm(cl_context ctx,
   assert(ctx);
   INVALID_DEVICE_IF (num_devices != 1);
   INVALID_DEVICE_IF (devices == NULL);
-  INVALID_DEVICE_IF (devices[0] != ctx->device);
+  INVALID_DEVICE_IF (devices[0] != ctx->devices[0]);
   INVALID_VALUE_IF (file_name == NULL);
 
   program = cl_program_new(ctx);
@@ -457,7 +457,7 @@ cl_program_create_from_llvm(cl_context ctx,
       goto error;
   }
 
-  program->opaque = compiler_program_new_from_llvm(ctx->device->device_id, file_name, NULL, NULL, NULL, program->build_log_max_sz, program->build_log, &program->build_log_sz, 1, NULL);
+  program->opaque = compiler_program_new_from_llvm(ctx->devices[0]->device_id, file_name, NULL, NULL, NULL, program->build_log_max_sz, program->build_log, &program->build_log_sz, 1, NULL);
   if (UNLIKELY(program->opaque == NULL)) {
     err = CL_INVALID_PROGRAM;
     goto error;
@@ -554,7 +554,7 @@ static int check_cl_version_option(cl_program p, const char* options) {
 
     ver1 = (s[10] - '0') * 10 + (s[12] - '0');
 
-    if (cl_get_device_info(p->ctx->device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version_str),
+    if (cl_get_device_info(p->ctx->devices[0], CL_DEVICE_OPENCL_C_VERSION, sizeof(version_str),
                                   version_str, NULL) != CL_SUCCESS)
       return 0;
 
@@ -622,7 +622,7 @@ cl_program_build(cl_program p, const char *options)
       goto error;
     }
 
-    p->opaque = compiler_program_new_from_source(p->ctx->device->device_id, p->source, p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
+    p->opaque = compiler_program_new_from_source(p->ctx->devices[0]->device_id, p->source, p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
     if (UNLIKELY(p->opaque == NULL)) {
       if (p->build_log_sz > 0 && strstr(p->build_log, "error: error reading 'options'"))
         err = CL_INVALID_BUILD_OPTIONS;
@@ -650,7 +650,7 @@ cl_program_build(cl_program p, const char *options)
     /* Create all the kernels */
     TRY (cl_program_load_gen_program, p);
   } else if (p->source_type == FROM_BINARY && p->binary_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE) {
-    p->opaque = interp_program_new_from_binary(p->ctx->device->device_id, p->binary, p->binary_sz);
+    p->opaque = interp_program_new_from_binary(p->ctx->devices[0]->device_id, p->binary, p->binary_sz);
     if (UNLIKELY(p->opaque == NULL)) {
       err = CL_BUILD_PROGRAM_FAILURE;
       goto error;
@@ -745,7 +745,7 @@ cl_program_link(cl_context            context,
     goto error;
   }
 
-  p->opaque = compiler_program_new_gen_program(context->device->device_id, NULL, NULL, NULL);
+  p->opaque = compiler_program_new_gen_program(context->devices[0]->device_id, NULL, NULL, NULL);
   for(i = 0; i < num_input_programs; i++) {
     // if program create with llvm binary, need deserilize first to get module.
     if(input_programs[i])
@@ -884,7 +884,7 @@ cl_program_compile(cl_program            p,
       }
     }
 
-    p->opaque = compiler_program_compile_from_source(p->ctx->device->device_id, p->source, temp_header_path,
+    p->opaque = compiler_program_compile_from_source(p->ctx->devices[0]->device_id, p->source, temp_header_path,
         p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
 
     char rm_path[255]="rm ";
-- 
2.7.4





More information about the Beignet mailing list