[Intel-gfx] [PATCH] drm/i915/tgl: Extend MI_SEMAPHORE_WAIT
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Sep 17 13:17:58 UTC 2019
Chris Wilson <chris at chris-wilson.co.uk> writes:
> On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
> update the length field and emit that extra parameter and any padding
> noop as required.
>
> v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
> v3: Use int instead of bool in the addition so that readers are not left
> wondering about the intricacies of the C spec. Now they just have to
> worry what the integer value of a boolean operation is...
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Michal Winiarski <michal.winiarski at intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 3 +
> drivers/gpu/drm/i915/gt/intel_lrc.c | 71 ++++++++++++++++++--
> drivers/gpu/drm/i915/i915_pci.c | 1 -
> drivers/gpu/drm/i915/i915_request.c | 21 ++++--
> 4 files changed, 84 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index fbad403ab7ac..f78b13d74e17 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -112,6 +112,7 @@
> #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */
> #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
> #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
> +#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
> #define MI_SEMAPHORE_POLL (1 << 15)
> #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
> #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
> @@ -119,6 +120,8 @@
> #define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
> #define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12)
> #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
> +#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5)
> +#define MI_SEMAPHORE_TOKEN_SHIFT 5
> #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
> #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
> #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index a3f0e4999744..a99166a2d2eb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2879,6 +2879,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> return gen8_emit_fini_breadcrumb_footer(request, cs);
> }
>
> +static u32 *
> +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> +{
> + cs = gen8_emit_ggtt_write_rcs(cs,
> + request->fence.seqno,
> + request->timeline->hwsp_offset,
> + PIPE_CONTROL_CS_STALL |
> + PIPE_CONTROL_TILE_CACHE_FLUSH |
> + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> + PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> + PIPE_CONTROL_DC_FLUSH_ENABLE |
> + PIPE_CONTROL_FLUSH_ENABLE);
> +
> + return gen8_emit_fini_breadcrumb_footer(request, cs);
> +}
> +
> /*
> * Note that the CS instruction pre-parser will not stall on the breadcrumb
> * flush and will continue pre-fetching the instructions after it before the
> @@ -2897,8 +2913,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> * All the above applies only to the instructions themselves. Non-inline data
> * used by the instructions is not pre-fetched.
> */
> -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
> - u32 *cs)
> +
> +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
> +{
> + *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
> + MI_SEMAPHORE_GLOBAL_GTT |
> + MI_SEMAPHORE_POLL |
> + MI_SEMAPHORE_SAD_EQ_SDD;
> + *cs++ = 0;
> + *cs++ = intel_hws_preempt_address(request->engine);
> + *cs++ = 0;
> + *cs++ = 0;
> + *cs++ = MI_NOOP;
> +
> + return cs;
> +}
> +
> +static __always_inline u32*
> +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
> +{
> + *cs++ = MI_USER_INTERRUPT;
> +
> + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> + if (intel_engine_has_semaphores(request->engine))
> + cs = gen12_emit_preempt_busywait(request, cs);
> +
> + request->tail = intel_ring_offset(request, cs);
> + assert_ring_tail_valid(request->ring, request->tail);
> +
> + return gen8_emit_wa_tail(request, cs);
> +}
> +
> +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
> +{
> + cs = gen8_emit_ggtt_write(cs,
> + request->fence.seqno,
> + request->timeline->hwsp_offset,
> + 0);
> +
> + return gen12_emit_fini_breadcrumb_footer(request, cs);
> +}
> +
> +static u32 *
> +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> {
> cs = gen8_emit_ggtt_write_rcs(cs,
> request->fence.seqno,
> @@ -2910,7 +2967,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
> PIPE_CONTROL_DC_FLUSH_ENABLE |
> PIPE_CONTROL_FLUSH_ENABLE);
>
> - return gen8_emit_fini_breadcrumb_footer(request, cs);
> + return gen12_emit_fini_breadcrumb_footer(request, cs);
> }
>
> static void execlists_park(struct intel_engine_cs *engine)
> @@ -2939,9 +2996,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
> engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> }
>
> - if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */
> - engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
> -
> if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
> engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
> }
> @@ -2971,6 +3025,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
> engine->emit_flush = gen8_emit_flush;
> engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
> engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
> + if (INTEL_GEN(engine->i915) >= 12)
> + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
>
> engine->set_default_submission = intel_execlists_set_default_submission;
>
> @@ -3016,6 +3072,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
> {
> switch (INTEL_GEN(engine->i915)) {
> case 12:
> + engine->emit_flush = gen11_emit_flush_render;
> + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
> + break;
> case 11:
> engine->emit_flush = gen11_emit_flush_render;
> engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index e4a26bbd8788..fe6941c8fc99 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -797,7 +797,6 @@ static const struct intel_device_info intel_tigerlake_12_info = {
> .display.has_modular_fia = 1,
> .engine_mask =
> BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
> - .has_logical_ring_preemption = false, /* XXX disabled for debugging */
> .engine_mask = BIT(RCS0), /* XXX reduced for debugging */
> };
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 754a78364a63..3ecf92aa5fc1 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -783,7 +783,9 @@ emit_semaphore_wait(struct i915_request *to,
> struct i915_request *from,
> gfp_t gfp)
> {
> + const int has_token = INTEL_GEN(to->i915) >= 12;
> u32 hwsp_offset;
> + int len;
> u32 *cs;
> int err;
>
> @@ -810,7 +812,11 @@ emit_semaphore_wait(struct i915_request *to,
> if (err)
> return err;
>
> - cs = intel_ring_begin(to, 4);
> + len = 4;
> + if (has_token)
> + len += 2;
> +
> + cs = intel_ring_begin(to, len);
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> @@ -822,13 +828,18 @@ emit_semaphore_wait(struct i915_request *to,
> * (post-wrap) values than they were expecting (and so wait
> * forever).
> */
> - *cs++ = MI_SEMAPHORE_WAIT |
> - MI_SEMAPHORE_GLOBAL_GTT |
> - MI_SEMAPHORE_POLL |
> - MI_SEMAPHORE_SAD_GTE_SDD;
> + *cs++ = (MI_SEMAPHORE_WAIT |
> + MI_SEMAPHORE_GLOBAL_GTT |
> + MI_SEMAPHORE_POLL |
> + MI_SEMAPHORE_SAD_GTE_SDD) +
> + has_token;
> *cs++ = from->fence.seqno;
> *cs++ = hwsp_offset;
> *cs++ = 0;
> + if (has_token) {
> + *cs++ = 0;
> + *cs++ = MI_NOOP;
> + }
>
> intel_ring_advance(to, cs);
> to->sched.semaphores |= from->engine->mask;
> --
> 2.23.0
More information about the Intel-gfx
mailing list