[Beignet] [PATCH 1/2] Add clEnqueueMapBuffer and clEnqueueMapImage non-blocking map support.

Yang Rong rong.r.yang at intel.com
Thu Aug 22 01:05:22 PDT 2013


There is a unsync map function drm_intel_gem_bo_map_unsynchronized in drm, that can
be used to do non-blocking map. But this function only map gtt, so force to use map
gtt for all clEnqueueMapBuffer and clEnqueueMapImage.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/cl_api.c             | 81 +++++++++++++++++++++++++++++++++++++++++++++--
 src/cl_driver.h          |  4 +++
 src/cl_driver_defs.c     |  1 +
 src/cl_enqueue.c         | 82 +++++++-----------------------------------------
 src/cl_mem.c             | 14 +++++++--
 src/cl_mem.h             |  5 ++-
 src/intel/intel_driver.c |  1 +
 7 files changed, 113 insertions(+), 75 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 4f048ee..d4fdb7f 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1576,6 +1576,9 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
                    cl_int *          errcode_ret)
 {
   cl_int err = CL_SUCCESS;
+  void *ptr = NULL;
+  void *mem_ptr = NULL;
+  cl_int slot = -1;
   enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
@@ -1602,6 +1605,69 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
     goto error;
   }
 
+  if (!(ptr = cl_mem_map_auto(buffer, CL_FALSE))) {
+    err = CL_MAP_FAILURE;
+    goto error;
+  }
+
+  ptr = (char*)ptr + offset;
+
+  if(buffer->flags & CL_MEM_USE_HOST_PTR) {
+    assert(buffer->host_ptr);
+    //memcpy(buffer->host_ptr + offset, ptr, size);
+    mem_ptr = buffer->host_ptr + offset;
+  } else {
+    mem_ptr = ptr;
+  }
+
+  /* Record the mapped address. */
+  if (!buffer->mapped_ptr_sz) {
+    buffer->mapped_ptr_sz = 16;
+    buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
+          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
+    if (!buffer->mapped_ptr) {
+      cl_mem_unmap_auto (buffer);
+      err = CL_OUT_OF_HOST_MEMORY;
+      ptr = NULL;
+      goto error;
+    }
+
+    memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+    slot = 0;
+  } else {
+   int i = 0;
+    for (; i < buffer->mapped_ptr_sz; i++) {
+      if (buffer->mapped_ptr[i].ptr == NULL) {
+        slot = i;
+        break;
+      }
+   }
+
+    if (i == buffer->mapped_ptr_sz) {
+      cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
+          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
+      if (!new_ptr) {
+       cl_mem_unmap_auto (buffer);
+        err = CL_OUT_OF_HOST_MEMORY;
+        ptr = NULL;
+        goto error;
+      }
+      memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+      memcpy(new_ptr, buffer->mapped_ptr,
+             buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+      slot = buffer->mapped_ptr_sz;
+      buffer->mapped_ptr_sz *= 2;
+      free(buffer->mapped_ptr);
+      buffer->mapped_ptr = new_ptr;
+    }
+  }
+
+  assert(slot != -1);
+  buffer->mapped_ptr[slot].ptr = mem_ptr;
+  buffer->mapped_ptr[slot].v_ptr = ptr;
+  buffer->mapped_ptr[slot].size = size;
+  buffer->map_ref++;
+
   TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
 
   data = &no_wait_data;
@@ -1610,6 +1676,7 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
   data->offset      = offset;
   data->size        = size;
   data->map_flags   = map_flags;
+  data->ptr         = ptr;
 
   if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
                    event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
@@ -1620,7 +1687,7 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
 error:
   if (errcode_ret)
     *errcode_ret = err;
-  return data->ptr;
+  return mem_ptr;
 }
 
 void *
@@ -1638,6 +1705,7 @@ clEnqueueMapImage(cl_command_queue   command_queue,
                   cl_int *           errcode_ret)
 {
   cl_int err = CL_SUCCESS;
+  void *ptr  = NULL;
   enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
@@ -1673,6 +1741,14 @@ clEnqueueMapImage(cl_command_queue   command_queue,
     goto error;
   }
 
+  if (!(ptr = cl_mem_map_auto(image, CL_FALSE))) {
+    err = CL_MAP_FAILURE;
+    goto error;
+  }
+
+  size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
+  ptr = (char*)ptr + offset;
+
   TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx);
 
   data = &no_wait_data;
@@ -1683,6 +1759,7 @@ clEnqueueMapImage(cl_command_queue   command_queue,
   data->row_pitch   = *image_row_pitch;
   data->slice_pitch = *image_slice_pitch;
   data->map_flags   = map_flags;
+  data->ptr         = ptr;
 
   if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
                    event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
@@ -1693,7 +1770,7 @@ clEnqueueMapImage(cl_command_queue   command_queue,
 error:
   if (errcode_ret)
     *errcode_ret = err;
-  return data->ptr; //TODO: map and unmap first
+  return ptr; //TODO: map and unmap first
 }
 
 cl_int
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 1a0ec38..0ce03fe 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -257,6 +257,10 @@ extern cl_buffer_unmap_cb *cl_buffer_unmap;
 typedef int (cl_buffer_map_gtt_cb)(cl_buffer);
 extern cl_buffer_map_gtt_cb *cl_buffer_map_gtt;
 
+/* Map a buffer in the GTT domain, non waiting the GPU read or write*/
+typedef int (cl_buffer_map_gtt_unsync_cb)(cl_buffer);
+extern cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync;
+
 /* Unmap a buffer in the GTT domain */
 typedef int (cl_buffer_unmap_gtt_cb)(cl_buffer);
 extern cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index e7412de..7c4c866 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -36,6 +36,7 @@ LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL;
 LOCAL cl_buffer_map_cb *cl_buffer_map = NULL;
 LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL;
 LOCAL cl_buffer_map_gtt_cb *cl_buffer_map_gtt = NULL;
+LOCAL cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync = NULL;
 LOCAL cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt = NULL;
 LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL;
 LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL;
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index a112cc4..a1c2be9 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -32,7 +32,7 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data)
   cl_int err = CL_SUCCESS;
   void* src_ptr;
 
-  if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
+  if (!(src_ptr = cl_mem_map_auto(data->mem_obj, CL_TRUE))) {
     err = CL_MAP_FAILURE;
     goto error;
   }
@@ -50,7 +50,7 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data)
   cl_int err = CL_SUCCESS;
   void* dst_ptr;
 
-  if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
+  if (!(dst_ptr = cl_mem_map_auto(data->mem_obj, CL_TRUE))) {
     err = CL_MAP_FAILURE;
     goto error;
   }
@@ -72,7 +72,7 @@ cl_int cl_enqueue_read_image(enqueue_data *data)
   const size_t* origin = data->origin;
   const size_t* region = data->region;
 
-  if (!(src_ptr = cl_mem_map_auto(image))) {
+  if (!(src_ptr = cl_mem_map_auto(image, CL_TRUE))) {
     err = CL_MAP_FAILURE;
     goto error;
   }
@@ -116,7 +116,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data)
   const size_t *origin = data->origin;
   const size_t *region = data->region;
 
-  if (!(dst_ptr = cl_mem_map_auto(image))) {
+  if (!(dst_ptr = cl_mem_map_auto(image, CL_TRUE))) {
     err = CL_MAP_FAILURE;
     goto error;
   }
@@ -156,93 +156,35 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
 
   void *ptr = NULL;
   cl_int err = CL_SUCCESS;
-  void *mem_ptr = NULL;
-  cl_int slot = -1;
   cl_mem buffer = data->mem_obj;
-
-  if (!(ptr = cl_mem_map_auto(buffer))) {
+  //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+  if (!(ptr = cl_mem_map_gtt(buffer))) {
     err = CL_MAP_FAILURE;
+    goto error;
   }
-
   ptr = (char*)ptr + data->offset;
+  assert(data->ptr == ptr);
 
   if(buffer->flags & CL_MEM_USE_HOST_PTR) {
     assert(buffer->host_ptr);
     memcpy(buffer->host_ptr + data->offset, ptr, data->size);
-    mem_ptr = buffer->host_ptr + data->offset;
-  } else {
-    mem_ptr = ptr;
-  }
-
-  /* Record the mapped address. */
-  if (!buffer->mapped_ptr_sz) {
-    buffer->mapped_ptr_sz = 16;
-    buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
-          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
-    if (!buffer->mapped_ptr) {
-      cl_mem_unmap_auto (buffer);
-      err = CL_OUT_OF_HOST_MEMORY;
-      ptr = NULL;
-      goto error;
-    }
-
-    memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
-    slot = 0;
-  } else {
-   int i = 0;
-    for (; i < buffer->mapped_ptr_sz; i++) {
-      if (buffer->mapped_ptr[i].ptr == NULL) {
-        slot = i;
-        break;
-      }
-   }
-
-    if (i == buffer->mapped_ptr_sz) {
-      cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
-          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
-      if (!new_ptr) {
-       cl_mem_unmap_auto (buffer);
-        err = CL_OUT_OF_HOST_MEMORY;
-        ptr = NULL;
-        goto error;
-      }
-      memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
-      memcpy(new_ptr, buffer->mapped_ptr,
-             buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
-      slot = buffer->mapped_ptr_sz;
-      buffer->mapped_ptr_sz *= 2;
-      free(buffer->mapped_ptr);
-      buffer->mapped_ptr = new_ptr;
-    }
   }
 
-  assert(slot != -1);
-  buffer->mapped_ptr[slot].ptr = mem_ptr;
-  buffer->mapped_ptr[slot].v_ptr = ptr;
-  buffer->mapped_ptr[slot].size = data->size;
-  buffer->map_ref++;
-
-  data->ptr = mem_ptr;
-
 error:
   return err;
 }
 
 cl_int cl_enqueue_map_image(enqueue_data *data)
 {
-  void *ptr = NULL;
   cl_int err = CL_SUCCESS;
-
   cl_mem image = data->mem_obj;
-  const size_t *origin = data->origin;
-
-  if (!(ptr = cl_mem_map_auto(image))) {
+  void *ptr = NULL;
+  //because using unsync map in clEnqueueMapImage, so force use map_gtt here
+  if (!(ptr = cl_mem_map_gtt(image))) {
     err = CL_MAP_FAILURE;
     goto error;
   }
-
-  size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
-  data->ptr = (char*)ptr + offset;
+  assert(data->ptr == (char*)ptr + data->offset);
 
 error:
   return err;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index f794ce7..5ce25e4 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -291,7 +291,7 @@ cl_mem_copy_image(cl_mem image,
 		  size_t slice_pitch,
 		  void* host_ptr)
 {
-  char* dst_ptr = cl_mem_map_auto(image);
+  char* dst_ptr = cl_mem_map_auto(image, CL_TRUE);
 
   if (row_pitch == image->row_pitch &&
       (image->depth == 1 || slice_pitch == image->slice_pitch))
@@ -552,6 +552,14 @@ cl_mem_map_gtt(cl_mem mem)
   return cl_buffer_get_virtual(mem->bo);
 }
 
+LOCAL void *
+cl_mem_map_gtt_unsync(cl_mem mem)
+{
+  cl_buffer_map_gtt_unsync(mem->bo);
+  assert(cl_buffer_get_virtual(mem->bo));
+  return cl_buffer_get_virtual(mem->bo);
+}
+
 LOCAL cl_int
 cl_mem_unmap_gtt(cl_mem mem)
 {
@@ -560,8 +568,10 @@ cl_mem_unmap_gtt(cl_mem mem)
 }
 
 LOCAL void*
-cl_mem_map_auto(cl_mem mem)
+cl_mem_map_auto(cl_mem mem, cl_bool sync)
 {
+  if(sync == CL_FALSE)
+    return cl_mem_map_gtt_unsync(mem);  //drm only support map gtt unsync map
   if (mem->is_image && mem->tiling != CL_NO_TILE)
     return cl_mem_map_gtt(mem);
   else
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 1b1709a..1826306 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -123,11 +123,14 @@ extern cl_int cl_mem_unmap(cl_mem);
 /* Directly map a memory object in GTT mode */
 extern void *cl_mem_map_gtt(cl_mem);
 
+/* Directly map a memory object in GTT mode, with out waiting gpu idle */
+extern void *cl_mem_map_gtt_unsync(cl_mem);
+
 /* Unmap a memory object in GTT mode */
 extern cl_int cl_mem_unmap_gtt(cl_mem);
 
 /* Directly map a memory object - tiled images are mapped in GTT mode */
-extern void *cl_mem_map_auto(cl_mem);
+extern void *cl_mem_map_auto(cl_mem, cl_bool);
 
 /* Unmap a memory object - tiled images are unmapped in GTT mode */
 extern cl_int cl_mem_unmap_auto(cl_mem);
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 6c6b9fb..9959447 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -519,6 +519,7 @@ intel_setup_callbacks(void)
   cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
   cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt;
   cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) drm_intel_gem_bo_unmap_gtt;
+  cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) drm_intel_gem_bo_map_unsynchronized;
   cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual;
   cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
   cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
-- 
1.8.1.2



More information about the Beignet mailing list