[Beignet] [PATCH 5/8] HSW: Use the drm flag I915_EXEC_ENABLE_SLM to set L3 control config.
He Junyan
junyan.he at inbox.com
Wed Jun 4 23:29:24 PDT 2014
Hi
I find the
drm_intel_gem_context_create, which will call
DRM_IOCTL_I915_GEM_CONTEXT_CREATE IOCtrl of kernel.
This is implemented after kernel 3.10 version.
So if the kernel version is before 3.10,
assert(driver->ctx);
will happen.
So our beignet will not support old kernel version now,
I think we should update the README.
On Mon, 2014-05-12 at 23:12 +0800, Yang Rong wrote:
> Because LRI commands will be converted to NOOP, add the I915_EXEC_ENABLE_SLM
> flag to the drm kernal driver, to enable SLM in the L3. Set the flag when
> application use slm. Still keep the L3 config in the batch buffer for fulsim.
> Also create and use the openCL own context when exec, to avoid affect the other context.
>
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
> src/intel/intel_batchbuffer.c | 10 +++++++++-
> src/intel/intel_batchbuffer.h | 3 +++
> src/intel/intel_driver.c | 19 ++++++++++++++++++
> src/intel/intel_driver.h | 1 +
> src/intel/intel_gpgpu.c | 46 +++++++++++++++++++++++++++++++++++++++----
> 5 files changed, 74 insertions(+), 5 deletions(-)
>
> diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c
> index 62eedd0..19dc901 100644
> --- a/src/intel/intel_batchbuffer.c
> +++ b/src/intel/intel_batchbuffer.c
> @@ -74,6 +74,7 @@ intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
> batch->ptr = batch->map;
> batch->atomic = 0;
> batch->last_bo = batch->buffer;
> + batch->enable_slm = 0;
> }
>
> LOCAL void
> @@ -119,7 +120,14 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
> if (!is_locked)
> intel_driver_lock_hardware(batch->intel);
>
> - dri_bo_exec(batch->buffer, used, 0, 0, 0);
> + int flag = I915_EXEC_RENDER;
> + if(batch->enable_slm) {
> + /* use the hard code here temp, must change to
> + * I915_EXEC_ENABLE_SLM when it drm accept the patch */
> + flag |= (1<<13);
> + }
> + drm_intel_gem_bo_context_exec(batch->buffer, batch->intel->ctx, used, flag);
> +
> if (!is_locked)
> intel_driver_unlock_hardware(batch->intel);
>
> diff --git a/src/intel/intel_batchbuffer.h b/src/intel/intel_batchbuffer.h
> index 74f1790..0c3bc13 100644
> --- a/src/intel/intel_batchbuffer.h
> +++ b/src/intel/intel_batchbuffer.h
> @@ -83,6 +83,9 @@ typedef struct intel_batchbuffer
> uint32_t size;
> uint8_t *map;
> uint8_t *ptr;
> + /** HSW: can't set LRI in batch buffer, set I915_EXEC_ENABLE_SLM
> + * flag when call exec. */
> + uint8_t enable_slm;
> int atomic;
> } intel_batchbuffer_t;
>
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
> index ef97835..08d6bc0 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -106,6 +106,7 @@ intel_driver_delete(intel_driver_t *driver)
> {
> if (driver == NULL)
> return;
> +
> if (driver->bufmgr)
> drm_intel_bufmgr_destroy(driver->bufmgr);
> cl_free(driver);
> @@ -139,6 +140,21 @@ intel_driver_memman_init(intel_driver_t *driver)
> drm_intel_bufmgr_gem_enable_reuse(driver->bufmgr);
> }
>
> +static void
> +intel_driver_context_init(intel_driver_t *driver)
> +{
> + driver->ctx = drm_intel_gem_context_create(driver->bufmgr);
> + assert(driver->ctx);
> +}
> +
> +static void
> +intel_driver_context_destroy(intel_driver_t *driver)
> +{
> + if(driver->ctx)
> + drm_intel_gem_context_destroy(driver->ctx);
> + driver->ctx = NULL;
> +}
> +
> static void
> intel_driver_init(intel_driver_t *driver, int dev_fd)
> {
> @@ -151,6 +167,7 @@ intel_driver_init(intel_driver_t *driver, int dev_fd)
> intel_driver_get_param(driver, I915_PARAM_CHIPSET_ID, &driver->device_id);
> assert(res);
> intel_driver_memman_init(driver);
> + intel_driver_context_init(driver);
>
> #if EMULATE_GEN
> driver->gen_ver = EMULATE_GEN;
> @@ -364,6 +381,7 @@ intel_get_device_id(void)
> assert(driver != NULL);
> intel_driver_open(driver, NULL);
> intel_device_id = driver->device_id;
> + intel_driver_context_destroy(driver);
> intel_driver_close(driver);
> intel_driver_terminate(driver);
> intel_driver_delete(driver);
> @@ -376,6 +394,7 @@ cl_intel_driver_delete(intel_driver_t *driver)
> {
> if (driver == NULL)
> return;
> + intel_driver_context_destroy(driver);
> intel_driver_close(driver);
> intel_driver_terminate(driver);
> intel_driver_delete(driver);
> diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h
> index a01d881..34efbbb 100644
> --- a/src/intel/intel_driver.h
> +++ b/src/intel/intel_driver.h
> @@ -78,6 +78,7 @@ typedef struct _XDisplay Display;
> typedef struct intel_driver
> {
> dri_bufmgr *bufmgr;
> + drm_intel_context *ctx;
> int fd;
> int device_id;
> int gen_ver;
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 603a075..103a4b2 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -118,6 +118,8 @@ struct intel_gpgpu
>
> typedef struct intel_gpgpu intel_gpgpu_t;
>
> +typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t use_slm);
> +intel_gpgpu_set_L3_t *intel_gpgpu_set_L3 = NULL;
>
> static void
> intel_gpgpu_sync(void *buf)
> @@ -330,8 +332,9 @@ intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
> }
>
> static void
> -intel_gpgpu_set_L3(intel_gpgpu_t *gpgpu, uint32_t use_slm)
> +intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm)
> {
> + /* still set L3 in batch buffer for fulsim. */
> BEGIN_BATCH(gpgpu->batch, 6);
> OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
> OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
> @@ -346,7 +349,37 @@ intel_gpgpu_set_L3(intel_gpgpu_t *gpgpu, uint32_t use_slm)
> OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[8]);
> else
> OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]);
> - ADVANCE_BATCH(gpgpu->batch);
> + ADVANCE_BATCH(gpgpu->batch);
> +
> + //To set L3 in HSW, enable the flag I915_EXEC_ENABLE_SLM flag when exec
> + if(use_slm)
> + gpgpu->batch->enable_slm = 1;
> + intel_gpgpu_pipe_control(gpgpu);
> +}
> +
> +static void
> +intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)
> +{
> + /* still set L3 in batch buffer for fulsim. */
> + BEGIN_BATCH(gpgpu->batch, 6);
> + OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
> + OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
> + if (use_slm)
> + OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[8]);
> + else
> + OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]);
> +
> + OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
> + OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET);
> + if (use_slm)
> + OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[8]);
> + else
> + OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]);
> + ADVANCE_BATCH(gpgpu->batch);
> +
> + //To set L3 in HSW, enable the flag I915_EXEC_ENABLE_SLM flag when exec
> + if(use_slm)
> + gpgpu->batch->enable_slm = 1;
> intel_gpgpu_pipe_control(gpgpu);
> }
>
> @@ -355,6 +388,7 @@ intel_gpgpu_batch_start(intel_gpgpu_t *gpgpu)
> {
> intel_batchbuffer_start_atomic(gpgpu->batch, 256);
> intel_gpgpu_pipe_control(gpgpu);
> + assert(intel_gpgpu_set_L3);
> intel_gpgpu_set_L3(gpgpu, gpgpu->ker->use_slm);
> intel_gpgpu_select_pipeline(gpgpu);
> intel_gpgpu_set_base_address(gpgpu);
> @@ -1113,10 +1147,14 @@ intel_set_gpgpu_callbacks(int device_id)
> cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
> cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
>
> - if (IS_HASWELL(device_id))
> + if (IS_HASWELL(device_id)) {
> cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
> - else if (IS_IVYBRIDGE(device_id))
> + intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen75;
> + }
> + else if (IS_IVYBRIDGE(device_id)) {
> cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
> + intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen7;
> + }
> else
> assert(0);
> }
More information about the Beignet
mailing list