[Beignet] [PATCH 6/6 newRT] Add cl_context_gen file.
junyan.he at inbox.com
junyan.he at inbox.com
Tue Mar 28 08:25:37 UTC 2017
From: Junyan He <junyan.he at intel.com>
This file will implement all the logic specific to GEN.
Signed-off-by: Junyan He <junyan.he at intel.com>
---
src/gen/cl_context_gen.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++
src/gen/cl_gen.h | 55 +++++++++++++
2 files changed, 250 insertions(+)
create mode 100644 src/gen/cl_context_gen.c
diff --git a/src/gen/cl_context_gen.c b/src/gen/cl_context_gen.c
new file mode 100644
index 0000000..7bc4fc0
--- /dev/null
+++ b/src/gen/cl_context_gen.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+
+#define DECL_INTERNAL_KERN(NAME) \
+ extern char cl_internal_##NAME##_str[]; \
+ extern size_t cl_internal_##NAME##_str_size;
+
+DECL_INTERNAL_KERN(block_motion_estimate_intel)
+DECL_INTERNAL_KERN(copy_buf_align16)
+DECL_INTERNAL_KERN(copy_buf_align4)
+DECL_INTERNAL_KERN(copy_buffer_to_image_2d_align16)
+DECL_INTERNAL_KERN(copy_buffer_to_image_2d)
+DECL_INTERNAL_KERN(copy_buffer_to_image_3d)
+DECL_INTERNAL_KERN(copy_buf_rect_align4)
+DECL_INTERNAL_KERN(copy_buf_rect)
+DECL_INTERNAL_KERN(copy_buf_unalign_dst_offset)
+DECL_INTERNAL_KERN(copy_buf_unalign_same_offset)
+DECL_INTERNAL_KERN(copy_buf_unalign_src_offset)
+DECL_INTERNAL_KERN(copy_image_1d_array_to_1d_array)
+DECL_INTERNAL_KERN(copy_image_1d_to_1d)
+DECL_INTERNAL_KERN(copy_image_2d_array_to_2d_array)
+DECL_INTERNAL_KERN(copy_image_2d_array_to_2d)
+DECL_INTERNAL_KERN(copy_image_2d_array_to_3d)
+DECL_INTERNAL_KERN(copy_image_2d_to_2d_array)
+DECL_INTERNAL_KERN(copy_image_2d_to_2d)
+DECL_INTERNAL_KERN(copy_image_2d_to_3d)
+DECL_INTERNAL_KERN(copy_image_2d_to_buffer_align16)
+DECL_INTERNAL_KERN(copy_image_2d_to_buffer)
+DECL_INTERNAL_KERN(copy_image_3d_to_2d_array)
+DECL_INTERNAL_KERN(copy_image_3d_to_2d)
+DECL_INTERNAL_KERN(copy_image_3d_to_3d)
+DECL_INTERNAL_KERN(copy_image_3d_to_buffer)
+DECL_INTERNAL_KERN(fill_buf_align128)
+DECL_INTERNAL_KERN(fill_buf_align2)
+DECL_INTERNAL_KERN(fill_buf_align4)
+DECL_INTERNAL_KERN(fill_buf_align8)
+DECL_INTERNAL_KERN(fill_buf_unalign)
+DECL_INTERNAL_KERN(fill_image_1d_array)
+DECL_INTERNAL_KERN(fill_image_1d)
+DECL_INTERNAL_KERN(fill_image_2d_array)
+DECL_INTERNAL_KERN(fill_image_2d)
+DECL_INTERNAL_KERN(fill_image_3d)
+
+#define REF_INTERNAL_KERN(NAME) (cl_internal_##NAME##_str), &(cl_internal_##NAME##_str_size)
+
+static struct {
+ cl_int index;
+ void *program_binary;
+ size_t *size;
+ char *kernel_name;
+} gen_internals_kernels[] = {
+ {CL_ENQUEUE_COPY_BUFFER_ALIGN4, REF_INTERNAL_KERN(copy_buf_align4), "__cl_copy_region_align4"},
+ {CL_ENQUEUE_COPY_BUFFER_ALIGN16, REF_INTERNAL_KERN(copy_buf_align16), "__cl_copy_region_align16"},
+ {CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET, REF_INTERNAL_KERN(copy_buf_unalign_same_offset), "__cl_copy_region_unalign_same_offset"},
+ {CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, REF_INTERNAL_KERN(copy_buf_unalign_dst_offset), "__cl_copy_region_unalign_dst_offset"},
+ {CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, REF_INTERNAL_KERN(copy_buf_unalign_src_offset), "__cl_copy_region_unalign_src_offset"},
+ {CL_ENQUEUE_COPY_BUFFER_RECT, REF_INTERNAL_KERN(copy_buf_rect), "__cl_copy_buffer_rect"},
+ {CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4, REF_INTERNAL_KERN(copy_buf_rect_align4), "__cl_copy_buffer_rect_align4"},
+ {CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, REF_INTERNAL_KERN(copy_image_1d_to_1d), "__cl_copy_image_1d_to_1d"},
+ {CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, REF_INTERNAL_KERN(copy_image_2d_to_2d), "__cl_copy_image_2d_to_2d"},
+ {CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, REF_INTERNAL_KERN(copy_image_3d_to_2d), "__cl_copy_image_3d_to_2d"},
+ {CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, REF_INTERNAL_KERN(copy_image_2d_to_3d), "__cl_copy_image_2d_to_3d"},
+ {CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, REF_INTERNAL_KERN(copy_image_3d_to_3d), "__cl_copy_image_3d_to_3d"},
+ {CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, REF_INTERNAL_KERN(copy_image_2d_to_2d_array), "__cl_copy_image_2d_to_2d_array"},
+ {CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, REF_INTERNAL_KERN(copy_image_1d_array_to_1d_array), "__cl_copy_image_1d_array_to_1d_array"},
+ {CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, REF_INTERNAL_KERN(copy_image_2d_array_to_2d_array), "__cl_copy_image_2d_array_to_2d_array"},
+ {CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, REF_INTERNAL_KERN(copy_image_2d_array_to_2d), "__cl_copy_image_2d_array_to_2d"},
+ {CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, REF_INTERNAL_KERN(copy_image_2d_array_to_3d), "__cl_copy_image_2d_array_to_3d"},
+ {CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, REF_INTERNAL_KERN(copy_image_3d_to_2d_array), "__cl_copy_image_3d_to_2d_array"},
+ {CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, REF_INTERNAL_KERN(copy_image_2d_to_buffer), "__cl_copy_image_2d_to_buffer"},
+ {CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16, REF_INTERNAL_KERN(copy_image_2d_to_buffer_align16), "__cl_copy_image_2d_to_buffer_align16"},
+ {CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, REF_INTERNAL_KERN(copy_image_3d_to_buffer), "__cl_copy_image_3d_to_buffer"},
+ {CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, REF_INTERNAL_KERN(copy_buffer_to_image_2d), "__cl_copy_buffer_to_image_2d"},
+ {CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16, REF_INTERNAL_KERN(copy_buffer_to_image_2d_align16), "__cl_copy_buffer_to_image_2d_align16"},
+ {CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, REF_INTERNAL_KERN(copy_buffer_to_image_3d), "__cl_copy_buffer_to_image_3d"},
+ {CL_ENQUEUE_FILL_BUFFER_UNALIGN, REF_INTERNAL_KERN(fill_buf_unalign), "__cl_fill_region_unalign"},
+ {CL_ENQUEUE_FILL_BUFFER_ALIGN2, REF_INTERNAL_KERN(fill_buf_align2), "__cl_fill_region_align2"},
+ {CL_ENQUEUE_FILL_BUFFER_ALIGN4, REF_INTERNAL_KERN(fill_buf_align4), "__cl_fill_region_align4"},
+ {CL_ENQUEUE_FILL_BUFFER_ALIGN8_8, REF_INTERNAL_KERN(fill_buf_align8), "__cl_fill_region_align8_2"},
+ {CL_ENQUEUE_FILL_BUFFER_ALIGN8_16, REF_INTERNAL_KERN(fill_buf_align8), "__cl_fill_region_align8_4"},
+ {CL_ENQUEUE_FILL_BUFFER_ALIGN8_32, REF_INTERNAL_KERN(fill_buf_align8), "__cl_fill_region_align8_8"},
+ {CL_ENQUEUE_FILL_BUFFER_ALIGN8_64, REF_INTERNAL_KERN(fill_buf_align8), "__cl_fill_region_align8_16"},
+ {CL_ENQUEUE_FILL_BUFFER_ALIGN128, REF_INTERNAL_KERN(fill_buf_align128), "__cl_fill_region_align128"},
+ {CL_ENQUEUE_FILL_IMAGE_1D, REF_INTERNAL_KERN(fill_image_1d), "__cl_fill_image_1d"},
+ {CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, REF_INTERNAL_KERN(fill_image_1d_array), "__cl_fill_image_1d_array"},
+ {CL_ENQUEUE_FILL_IMAGE_2D, REF_INTERNAL_KERN(fill_image_2d), "__cl_fill_image_2d"},
+ {CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, REF_INTERNAL_KERN(fill_image_2d_array), "__cl_fill_image_2d_array"},
+ {CL_ENQUEUE_FILL_IMAGE_3D, REF_INTERNAL_KERN(fill_image_3d), "__cl_fill_image_3d"},
+};
+
+LOCAL void *
+cl_context_new_gen(cl_device_id device, cl_context ctx)
+{
+ cl_context_gen ctx_gen = CL_CALLOC(1, sizeof(_cl_context_gen));
+ if (ctx_gen == NULL)
+ return NULL;
+
+ ctx_gen->ctx_base.device = device;
+ return (void *)ctx_gen;
+}
+
+LOCAL cl_int
+cl_context_create_gen(cl_device_id device, cl_context ctx)
+{
+ cl_context_gen ctx_gen = NULL;
+ DEV_PRIVATE_DATA(ctx, device, ctx_gen);
+
+ ctx_gen->drv = intel_driver_create(&ctx->props);
+ if (ctx_gen->drv == NULL)
+ return CL_OUT_OF_RESOURCES;
+
+ ctx->drv = ctx_gen->drv; //TODO: delete later
+
+ ctx_gen->ver = ctx_gen->drv->gen_ver;
+ intel_driver_set_atomic_flag(ctx_gen->drv, device->atomic_test_result);
+ return CL_SUCCESS;
+}
+
+LOCAL void
+cl_context_delete_gen(cl_device_id device, cl_context ctx)
+{
+ cl_context_gen ctx_gen = NULL;
+ DEV_PRIVATE_DATA(ctx, device, ctx_gen);
+
+ intel_driver_delete(ctx_gen->drv);
+ ctx_gen->drv = NULL;
+ CL_FREE(ctx_gen);
+}
+
+LOCAL cl_kernel
+cl_context_get_builtin_kernel_gen(cl_context ctx, cl_device_id device, cl_int index)
+{
+ cl_device_id_gen dev_gen = (cl_device_id_gen)device;
+ cl_int binary_status = CL_SUCCESS;
+ cl_int err = CL_SUCCESS;
+ cl_program prog = NULL;
+ cl_kernel ker = NULL;
+
+ assert(index >= 0 && index < CL_INTERNAL_KERNEL_MAX);
+
+ CL_OBJECT_LOCK(device);
+ if (dev_gen->internal_program[index] == NULL) {
+ assert(dev_gen->internal_kernels[index] == NULL);
+ } else {
+ prog = dev_gen->internal_program[index];
+ ker = dev_gen->internal_kernels[index];
+ assert(ker);
+ }
+ CL_OBJECT_UNLOCK(device);
+
+ if (ker)
+ return ker;
+
+ prog = cl_program_create_from_binary(ctx, 1, &device, gen_internals_kernels[index].size,
+ (const unsigned char **)&gen_internals_kernels[index].program_binary,
+ &binary_status, &err);
+ assert(err == CL_SUCCESS);
+ err = cl_program_build(prog, NULL, 1, &device);
+ assert(err == CL_SUCCESS);
+ cl_program_take_out_of_context(prog);
+ ker = cl_kernel_create(prog, gen_internals_kernels[index].kernel_name, &err);
+ assert(err == CL_SUCCESS);
+
+ /* Cache the build result to device */
+ CL_OBJECT_LOCK(device);
+ if (dev_gen->internal_program[index] == NULL) {
+ dev_gen->internal_program[index] = prog;
+ dev_gen->internal_kernels[index] = ker;
+ } else { // Someone already do it ?
+ cl_kernel_delete(ker);
+ cl_program_delete(prog);
+ ker = dev_gen->internal_kernels[index];
+ assert(ker);
+ }
+ CL_OBJECT_UNLOCK(device);
+
+ return ker;
+}
diff --git a/src/gen/cl_gen.h b/src/gen/cl_gen.h
index c4294eb..6cdc405 100644
--- a/src/gen/cl_gen.h
+++ b/src/gen/cl_gen.h
@@ -196,4 +196,59 @@ extern cl_int cl_mem_copy_buffer_to_image_gen(cl_command_queue queue, cl_event e
cl_mem image, const size_t src_offset,
const size_t *dst_origin, const size_t *region);
+/*********************************** Context *****************************************/
+enum cl_internal_kernel_type_gen { // All internal kernel types for gen
+ CL_INTERNAL_KERNEL_MIN = 0,
+ CL_ENQUEUE_COPY_BUFFER_ALIGN4 = 0,
+ CL_ENQUEUE_COPY_BUFFER_ALIGN16,
+ CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET,
+ CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
+ CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
+ CL_ENQUEUE_COPY_BUFFER_RECT,
+ CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4,
+ CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, //copy image 1d to image 1d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, //copy image 2d to image 2d array
+ CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, //copy image 1d array to image 1d array
+ CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, //copy image 2d array to image 2d array
+ CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, //copy image 2d array to image 2d
+ CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, //copy image 2d array to image 3d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, //copy image 3d to image 2d array
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16,
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer
+ CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d
+ CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16,
+ CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d
+ CL_ENQUEUE_FILL_BUFFER_UNALIGN, //fill buffer with 1 aligne pattern, pattern size=1
+ CL_ENQUEUE_FILL_BUFFER_ALIGN2, //fill buffer with 2 aligne pattern, pattern size=2
+ CL_ENQUEUE_FILL_BUFFER_ALIGN4, //fill buffer with 4 aligne pattern, pattern size=4
+ CL_ENQUEUE_FILL_BUFFER_ALIGN8_8, //fill buffer with 8 aligne pattern, pattern size=8
+ CL_ENQUEUE_FILL_BUFFER_ALIGN8_16, //fill buffer with 16 aligne pattern, pattern size=16
+ CL_ENQUEUE_FILL_BUFFER_ALIGN8_32, //fill buffer with 16 aligne pattern, pattern size=32
+ CL_ENQUEUE_FILL_BUFFER_ALIGN8_64, //fill buffer with 16 aligne pattern, pattern size=64
+ CL_ENQUEUE_FILL_BUFFER_ALIGN128, //fill buffer with 128 aligne pattern, pattern size=128
+ CL_ENQUEUE_FILL_IMAGE_1D, //fill image 1d
+ CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, //fill image 1d array
+ CL_ENQUEUE_FILL_IMAGE_2D, //fill image 2d
+ CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, //fill image 2d array
+ CL_ENQUEUE_FILL_IMAGE_3D, //fill image 3d
+ CL_INTERNAL_KERNEL_MAX
+};
+
+typedef struct _cl_context_gen {
+ _cl_context_for_device ctx_base; /* Point to the device it belong to */
+ intel_driver_t *drv; /* Handles HW or simulator */
+ uint32_t ver; /* Gen version */
+} _cl_context_gen;
+typedef _cl_context_gen *cl_context_gen;
+
+extern void *cl_context_new_gen(cl_device_id device, cl_context ctx);
+extern cl_int cl_context_create_gen(cl_device_id device, cl_context ctx);
+extern void cl_context_delete_gen(cl_device_id device, cl_context ctx);
+extern cl_kernel cl_context_get_builtin_kernel_gen(cl_context ctx, cl_device_id device, cl_int index);
+
#endif /* End of __CL_GEN_H__ */
--
2.7.4
More information about the Beignet
mailing list