[Intel-gfx] [PATCH v2 4/5] drm/i915/execlists: Read the context-status buffer from the HWSP
Zhenyu Wang
zhenyuw at linux.intel.com
Thu Jul 13 09:14:34 UTC 2017
On 2017.07.13 10:00:25 +0100, Chris Wilson wrote:
> The engine provides a mirror of the CSB in the HWSP. If we use the
> cacheable reads from the HWSP, we can shave off a few mmio reads per
> context-switch interrupt (which are quite frequent!). Just removing a
> couple of mmio is not enough to actually reduce any latency, but a small
> reduction in overall cpu usage.
Unfortunately current gvt's execlist emulation depends on MMIO CSB read
for guest workload without guest HWSP update. So this can't work for guest.
We need to fix that in gvt, also reduce MMIO trap is good benefit for vGPU too.
But might have to fallback to mmio mode if vgpu active now, and once gvt host
got fixed, will notify through pvinfo to enable this.
>
> Much appreciation for Ben dropping the bombshell that the CSB was in the
> HWSP and for Michel in digging out the details.
>
> v2: Don't be lazy, add the defines for the indices.
> v3: Include the HWSP in debugfs/i915_engine_info
>
> Suggested-by: Ben Widawsky <benjamin.widawsky at intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Michel Thierry <michel.thierry at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at intel.com>
> Acked-by: Michel Thierry <michel.thierry at intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 7 +++++--
> drivers/gpu/drm/i915/intel_lrc.c | 10 +++++-----
> drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++
> 3 files changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 620c9218d1c1..5fd01c14a3ec 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3384,6 +3384,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
> upper_32_bits(addr), lower_32_bits(addr));
>
> if (i915.enable_execlists) {
> + const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> u32 ptr, read, write;
> unsigned int idx;
>
> @@ -3404,10 +3405,12 @@ static int i915_engine_info(struct seq_file *m, void *unused)
> write += GEN8_CSB_ENTRIES;
> while (read < write) {
> idx = ++read % GEN8_CSB_ENTRIES;
> - seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
> + seq_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n",
> idx,
> I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
> - I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)));
> + hws[idx * 2],
> + I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)),
> + hws[idx * 2 + 1]);
> }
>
> rcu_read_lock();
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3469badedbe0..a887379b004d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -547,8 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
> while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
> u32 __iomem *csb_mmio =
> dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
> - u32 __iomem *buf =
> - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
> + /* The HWSP contains a (cacheable) mirror of the CSB */
> + const u32 *buf =
> + &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> unsigned int head, tail;
>
> /* The write will be ordered by the uncached read (itself
> @@ -590,13 +591,12 @@ static void intel_lrc_irq_handler(unsigned long data)
> * status notifier.
> */
>
> - status = readl(buf + 2 * head);
> + status = buf[2 * head];
> if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> continue;
>
> /* Check the context/desc id for this event matches */
> - GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
> - port->context_id);
> + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
>
> rq = port_unpack(port, &count);
> GEM_BUG_ON(count == 0);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d33c93444c0d..2c55cfa14fb5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -496,6 +496,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
> #define I915_GEM_HWS_SCRATCH_INDEX 0x40
> #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
>
> +#define I915_HWS_CSB_BUF0_INDEX 0x10
> +
> struct intel_ring *
> intel_engine_create_ring(struct intel_engine_cs *engine, int size);
> int intel_ring_pin(struct intel_ring *ring,
> --
> 2.13.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Open Source Technology Center, Intel ltd.
$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gfx/attachments/20170713/e889c06d/attachment.sig>
More information about the Intel-gfx
mailing list