[Beignet] [PATCH 05/19] Runtime: fix fill image event assert and some SVM rebase error.
Yang Rong
rong.r.yang at intel.com
Mon Nov 28 11:32:31 UTC 2016
Also remove the useless function cl_context_add_svm.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_api_mem.c | 2 +-
src/cl_context.c | 14 +-------------
src/cl_context.h | 2 --
src/cl_event.c | 2 +-
src/cl_mem.c | 35 +++++++++++++++++------------------
src/cl_mem.h | 2 +-
src/intel/intel_gpgpu.c | 41 +++++++++++++++++++++++++++++++++++++----
7 files changed, 58 insertions(+), 40 deletions(-)
diff --git a/src/cl_api_mem.c b/src/cl_api_mem.c
index 7314a48..36a1421 100644
--- a/src/cl_api_mem.c
+++ b/src/cl_api_mem.c
@@ -2215,7 +2215,7 @@ clEnqueueFillImage(cl_command_queue command_queue,
break;
}
- err = cl_image_fill(command_queue, fill_color, image, origin, region);
+ err = cl_image_fill(command_queue, e, fill_color, image, origin, region);
if (err != CL_SUCCESS) {
break;
}
diff --git a/src/cl_context.c b/src/cl_context.c
index 4417e3b..c2adf3f 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -81,19 +81,6 @@ cl_context_add_mem(cl_context ctx, cl_mem mem) {
}
LOCAL void
-cl_context_add_svm(cl_context ctx, cl_mem mem) {
- assert(mem->ctx == NULL);
- cl_context_add_ref(ctx);
-
- CL_OBJECT_LOCK(ctx);
- list_add_tail(&mem->base.node, &ctx->svm_objects);
- ctx->svm_object_num++;
- CL_OBJECT_UNLOCK(ctx);
-
- mem->ctx = ctx;
-}
-
-LOCAL void
cl_context_remove_mem(cl_context ctx, cl_mem mem) {
assert(mem->ctx == ctx);
CL_OBJECT_LOCK(ctx);
@@ -476,6 +463,7 @@ cl_context_get_svm_from_ptr(cl_context ctx, const void * p)
buf = (cl_mem)list_entry(pos, _cl_base_object, node);
if(buf->host_ptr == NULL) continue;
if(buf->is_svm == 0) continue;
+ if(buf->type != CL_MEM_SVM_TYPE) continue;
if((size_t)buf->host_ptr <= (size_t)p &&
(size_t)p < ((size_t)buf->host_ptr + buf->size))
return buf;
diff --git a/src/cl_context.h b/src/cl_context.h
index 268e7b9..caa57dc 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -107,8 +107,6 @@ struct _cl_context {
cl_uint queue_cookie; /* Cookie will change every time we change queue list. */
list_head mem_objects; /* All memory object currently allocated */
cl_uint mem_object_num; /* All memory number currently allocated */
- list_head svm_objects; /* All svm object currently allocated */
- cl_uint svm_object_num; /* All svm number currently allocated */
list_head samplers; /* All sampler object currently allocated */
cl_uint sampler_num; /* All sampler number currently allocated */
list_head events; /* All event object currently allocated */
diff --git a/src/cl_event.c b/src/cl_event.c
index 0804dbd..58ec2c5 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -101,7 +101,7 @@ cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type,
list_init(&e->callbacks);
list_init(&e->enqueue_node);
- assert(type >= CL_COMMAND_NDRANGE_KERNEL && type <= CL_COMMAND_FILL_IMAGE);
+ assert(type >= CL_COMMAND_NDRANGE_KERNEL && type <= CL_COMMAND_SVM_UNMAP);
e->event_type = type;
if (type == CL_COMMAND_USER) {
e->status = CL_SUBMITTED;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 798daaf..f856ba3 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -345,19 +345,19 @@ cl_mem_allocate(enum cl_mem_type type,
mem->is_svm = 1;
/* userptr not support tiling */
if (!is_tiled) {
- if ((ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr) &&
+ if(svm_mem != NULL) { //SVM always paged alignment
+ mem->offset = 0;
+ mem->is_userptr = 1;
+ mem->bo = svm_mem->bo;
+ cl_mem_add_ref(svm_mem);
+ bufCreated = 1;
+ } else if ((ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr) &&
(ALIGN((unsigned long)sz, cacheline_size) == (unsigned long)sz)) {
void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1)));
mem->offset = host_ptr - aligned_host_ptr;
mem->is_userptr = 1;
size_t aligned_sz = ALIGN((mem->offset + sz), page_size);
-
- if(svm_mem != NULL) {
- mem->bo = svm_mem->bo;
- cl_mem_add_ref(svm_mem);
- } else
- mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0);
-
+ mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0);
bufCreated = 1;
}
}
@@ -1404,14 +1404,6 @@ cl_mem_delete(cl_mem mem)
}
}
- if(mem->is_svm && mem->type != CL_MEM_SVM_TYPE) {
- cl_mem svm_mem = cl_context_get_svm_from_ptr(mem->ctx, mem->host_ptr);
- if(svm_mem)
- cl_mem_delete(svm_mem);
- }
- /* Remove it from the list */
- cl_context_remove_mem(mem->ctx, mem);
-
/* Someone still mapped, unmap */
if(mem->map_ref > 0) {
assert(mem->mapped_ptr);
@@ -1451,10 +1443,17 @@ cl_mem_delete(cl_mem mem)
buffer->parent->subs = buffer->sub_next;
pthread_mutex_unlock(&buffer->parent->sub_lock);
cl_mem_delete((cl_mem )(buffer->parent));
+ } else if (mem->is_svm && mem->type != CL_MEM_SVM_TYPE) {
+ cl_mem svm_mem = cl_context_get_svm_from_ptr(mem->ctx, mem->host_ptr);
+ if (svm_mem != NULL)
+ cl_mem_delete(svm_mem);
} else if (LIKELY(mem->bo != NULL)) {
cl_buffer_unreference(mem->bo);
}
+ /* Remove it from the list */
+ cl_context_remove_mem(mem->ctx, mem);
+
if ((mem->is_userptr &&
(mem->flags & CL_MEM_ALLOC_HOST_PTR) &&
(mem->type != CL_MEM_SUBBUFFER_TYPE)) ||
@@ -1645,7 +1644,7 @@ cl_mem_copy(cl_command_queue queue, cl_event event, cl_mem src_buf, cl_mem dst_b
}
LOCAL cl_int
-cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image* src_image,
+cl_image_fill(cl_command_queue queue, cl_event e, const void * pattern, struct _cl_mem_image* src_image,
const size_t * origin, const size_t * region)
{
cl_int ret = CL_SUCCESS;
@@ -1725,7 +1724,7 @@ cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image
cl_kernel_set_arg(ker, 6, sizeof(cl_int), &origin[1]);
cl_kernel_set_arg(ker, 7, sizeof(cl_int), &origin[2]);
- ret = cl_command_queue_ND_range(queue, ker, NULL, 3, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, e, 3, global_off, global_sz, local_sz);
cl_kernel_delete(ker);
src_image->intel_fmt = savedIntelFmt;
return ret;
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 5a384ed..7510d13 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -260,7 +260,7 @@ extern cl_int cl_mem_copy(cl_command_queue queue, cl_event event, cl_mem src_buf
extern cl_int cl_mem_fill(cl_command_queue queue, cl_event e, const void * pattern, size_t pattern_size,
cl_mem buffer, size_t offset, size_t size);
-extern cl_int cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image*,
+extern cl_int cl_image_fill(cl_command_queue queue, cl_event e, const void * pattern, struct _cl_mem_image*,
const size_t *, const size_t *);
/* api clEnqueueCopyBufferRect help function */
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index d56d35d..b36c21a 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1713,7 +1713,7 @@ intel_gpgpu_build_idrt_gen9(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
}
static int
-intel_gpgpu_upload_curbes(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
+intel_gpgpu_upload_curbes_gen7(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
{
unsigned char *curbe = NULL;
cl_gpgpu_kernel *k = gpgpu->ker;
@@ -1731,7 +1731,38 @@ intel_gpgpu_upload_curbes(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
/* Now put all the relocations for our flat address space */
for (i = 0; i < k->thread_n; ++i)
for (j = 0; j < gpgpu->binded_n; ++j) {
- *(uint32_t*)(curbe + gpgpu->binded_offset[j]+i*k->curbe_sz) = gpgpu->binded_buf[j]->offset + gpgpu->target_buf_offset[j];
+ *(uint32_t *)(curbe + gpgpu->binded_offset[j]+i*k->curbe_sz) = gpgpu->binded_buf[j]->offset64 + gpgpu->target_buf_offset[j];
+ drm_intel_bo_emit_reloc(gpgpu->aux_buf.bo,
+ gpgpu->aux_offset.curbe_offset + gpgpu->binded_offset[j]+i*k->curbe_sz,
+ gpgpu->binded_buf[j],
+ gpgpu->target_buf_offset[j],
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
+ }
+ dri_bo_unmap(gpgpu->aux_buf.bo);
+ return 0;
+}
+
+static int
+intel_gpgpu_upload_curbes_gen8(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
+{
+ unsigned char *curbe = NULL;
+ cl_gpgpu_kernel *k = gpgpu->ker;
+ uint32_t i, j;
+
+ /* Upload the data first */
+ if (dri_bo_map(gpgpu->aux_buf.bo, 1) != 0) {
+ fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
+ return -1;
+ }
+ assert(gpgpu->aux_buf.bo->virtual);
+ curbe = (unsigned char *) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.curbe_offset);
+ memcpy(curbe, data, size);
+
+ /* Now put all the relocations for our flat address space */
+ for (i = 0; i < k->thread_n; ++i)
+ for (j = 0; j < gpgpu->binded_n; ++j) {
+ *(size_t *)(curbe + gpgpu->binded_offset[j]+i*k->curbe_sz) = gpgpu->binded_buf[j]->offset64 + gpgpu->target_buf_offset[j];
drm_intel_bo_emit_reloc(gpgpu->aux_buf.bo,
gpgpu->aux_offset.curbe_offset + gpgpu->binded_offset[j]+i*k->curbe_sz,
gpgpu->binded_buf[j],
@@ -2427,7 +2458,6 @@ intel_set_gpgpu_callbacks(int device_id)
cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack;
cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters;
- cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes;
cl_gpgpu_alloc_constant_buffer = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer;
cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup;
cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers;
@@ -2476,7 +2506,8 @@ intel_set_gpgpu_callbacks(int device_id)
intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen8;
- intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7;
+ intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7;
+ cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes_gen8;
return;
}
if (IS_GEN9(device_id)) {
@@ -2496,9 +2527,11 @@ intel_set_gpgpu_callbacks(int device_id)
cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen8;
intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen9;
+ cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes_gen8;
return;
}
+ cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes_gen7;
intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen7;
intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen7;
cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen7;
--
2.1.4
More information about the Beignet
mailing list