[Intel-gfx] [PATCH v2] drm/i915/execlists: Unify CSB access pointers
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jun 28 11:21:44 UTC 2018
On 28/06/2018 12:13, Chris Wilson wrote:
> Following the removal of the last workarounds, the only CSB mmio access
> is for the old vGPU interface. The mmio registers presented by vGPU do
> not require forcewake and can be treated as ordinary volatile memory,
> i.e. they behave just like the HWSP access just at a different location.
> We can reduce the CSB access to a set of read/write/buffer pointers and
> treat the various paths identically and not worry about forcewake.
> (Forcewake is nightmare for worstcase latency, and we want to process
> this all with irqsoff -- no latency allowed!)
>
> v2: Comments, comments, comments. Well, 2 bonus comments.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 12 ---
> drivers/gpu/drm/i915/intel_lrc.c | 133 ++++++++++++------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 23 ++--
> 3 files changed, 82 insertions(+), 86 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index d3264bd6e9dc..7209c22798e6 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -25,7 +25,6 @@
> #include <drm/drm_print.h>
>
> #include "i915_drv.h"
> -#include "i915_vgpu.h"
> #include "intel_ringbuffer.h"
> #include "intel_lrc.h"
>
> @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
> i915_gem_batch_pool_init(&engine->batch_pool, engine);
> }
>
> -static bool csb_force_mmio(struct drm_i915_private *i915)
> -{
> - /* Older GVT emulation depends upon intercepting CSB mmio */
> - if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
> - return true;
> -
> - return false;
> -}
> -
> static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> {
> struct intel_engine_execlists * const execlists = &engine->execlists;
>
> - execlists->csb_use_mmio = csb_force_mmio(engine->i915);
> -
> execlists->port_mask = 1;
> BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
> GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 91656eb2f2db..8531a5b6f6ff 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -137,6 +137,7 @@
> #include <drm/i915_drm.h>
> #include "i915_drv.h"
> #include "i915_gem_render_state.h"
> +#include "i915_vgpu.h"
> #include "intel_lrc_reg.h"
> #include "intel_mocs.h"
> #include "intel_workarounds.h"
> @@ -953,44 +954,40 @@ static void process_csb(struct intel_engine_cs *engine)
> {
> struct intel_engine_execlists * const execlists = &engine->execlists;
> struct execlist_port *port = execlists->port;
> - struct drm_i915_private *i915 = engine->i915;
> -
> - /* The HWSP contains a (cacheable) mirror of the CSB */
> - const u32 *buf =
> - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> - unsigned int head, tail;
> - bool fw = false;
> + const u32 * const buf = execlists->csb_status;
> + u8 head, tail;
>
> /* Clear before reading to catch new interrupts */
> clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> smp_mb__after_atomic();
>
> - if (unlikely(execlists->csb_use_mmio)) {
> - intel_uncore_forcewake_get(i915, execlists->fw_domains);
> - fw = true;
> -
> - buf = (u32 * __force)
> - (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> -
> - head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> - tail = GEN8_CSB_WRITE_PTR(head);
> - head = GEN8_CSB_READ_PTR(head);
> - execlists->csb_head = head;
> - } else {
> - const int write_idx =
> - intel_hws_csb_write_index(i915) -
> - I915_HWS_CSB_BUF0_INDEX;
> + /*
> + * Note that csb_write, csb_status may be either in HWSP or mmio.
> + * When reading from the csb_write mmio register, we have to be
> + * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
> + * the low 4bits. As it happens we know the next 4bits are always
> + * zero and so we can simply masked off the low u8 of the register
> + * and treat it identically to reading from the HWSP (without having
> + * to use explicit shifting and masking, and probably bifurcating
> + * the code to handle the legacy mmio read).
> + */
> + head = execlists->csb_head;
> + tail = READ_ONCE(*execlists->csb_write);
> + GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
> + if (unlikely(head == tail))
> + return;
>
> - head = execlists->csb_head;
> - tail = READ_ONCE(buf[write_idx]);
> - rmb(); /* Hopefully paired with a wmb() in HW */
> - }
> - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> - engine->name,
> - head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> - tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> + /*
> + * Hopefully paired with a wmb() in HW!
> + *
> + * We must complete the read of the write pointer before any reads
> + * from the CSB, so that we do not see stale values. Without an rmb
> + * (lfence) the HW may speculatively perform the CSB[] reads *before*
> + * we perform the READ_ONCE(*csb_write).
> + */
> + rmb();
>
> - while (head != tail) {
> + do {
> struct i915_request *rq;
> unsigned int status;
> unsigned int count;
> @@ -1016,12 +1013,12 @@ static void process_csb(struct intel_engine_cs *engine)
> * status notifier.
> */
>
> - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> engine->name, head,
> - status, buf[2*head + 1],
> + buf[2 * head + 0], buf[2 * head + 1],
> execlists->active);
>
> + status = buf[2 * head];
> if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> GEN8_CTX_STATUS_PREEMPTED))
> execlists_set_active(execlists,
> @@ -1103,16 +1100,11 @@ static void process_csb(struct intel_engine_cs *engine)
> } else {
> port_set(port, port_pack(rq, count));
> }
> - }
> -
> - if (head != execlists->csb_head) {
> - execlists->csb_head = head;
> - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> - i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> - }
> + } while (head != tail);
>
> - if (unlikely(fw))
> - intel_uncore_forcewake_put(i915, execlists->fw_domains);
> + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> + execlists->csb_read);
> + execlists->csb_head = head;
> }
>
> /*
> @@ -2430,28 +2422,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
> static void
> logical_ring_setup(struct intel_engine_cs *engine)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> - enum forcewake_domains fw_domains;
> -
> intel_engine_setup_common(engine);
>
> /* Intentionally left blank. */
> engine->buffer = NULL;
>
> - fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
> - RING_ELSP(engine),
> - FW_REG_WRITE);
> -
> - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
> - RING_CONTEXT_STATUS_PTR(engine),
> - FW_REG_READ | FW_REG_WRITE);
> -
> - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
> - RING_CONTEXT_STATUS_BUF_BASE(engine),
> - FW_REG_READ);
> -
> - engine->execlists.fw_domains = fw_domains;
> -
> tasklet_init(&engine->execlists.tasklet,
> execlists_submission_tasklet, (unsigned long)engine);
>
> @@ -2459,34 +2434,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
> logical_ring_default_irqs(engine);
> }
>
> +static bool csb_force_mmio(struct drm_i915_private *i915)
> +{
> + /* Older GVT emulation depends upon intercepting CSB mmio */
> + return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
> +}
> +
> static int logical_ring_init(struct intel_engine_cs *engine)
> {
> + struct drm_i915_private *i915 = engine->i915;
> + struct intel_engine_execlists * const execlists = &engine->execlists;
> int ret;
>
> ret = intel_engine_init_common(engine);
> if (ret)
> goto error;
>
> - if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
> - engine->execlists.submit_reg = engine->i915->regs +
> + if (HAS_LOGICAL_RING_ELSQ(i915)) {
> + execlists->submit_reg = i915->regs +
> i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
> - engine->execlists.ctrl_reg = engine->i915->regs +
> + execlists->ctrl_reg = i915->regs +
> i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
> } else {
> - engine->execlists.submit_reg = engine->i915->regs +
> + execlists->submit_reg = i915->regs +
> i915_mmio_reg_offset(RING_ELSP(engine));
> }
>
> - engine->execlists.preempt_complete_status = ~0u;
> - if (engine->i915->preempt_context) {
> + execlists->preempt_complete_status = ~0u;
> + if (i915->preempt_context) {
> struct intel_context *ce =
> - to_intel_context(engine->i915->preempt_context, engine);
> + to_intel_context(i915->preempt_context, engine);
>
> - engine->execlists.preempt_complete_status =
> + execlists->preempt_complete_status =
> upper_32_bits(ce->lrc_desc);
> }
>
> - engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
> + execlists->csb_head = GEN8_CSB_ENTRIES - 1;
> + execlists->csb_read =
> + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
> + if (csb_force_mmio(i915)) {
> + execlists->csb_status = (u32 __force *)
> + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> +
> + execlists->csb_write = (u32 __force *)execlists->csb_read;
> + } else {
> + execlists->csb_status =
> + &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> +
> + execlists->csb_write =
> + &engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
> + }
>
> return 0;
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 78f01a35823a..970dbb3c9812 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -300,19 +300,30 @@ struct intel_engine_execlists {
> struct rb_node *first;
>
> /**
> - * @fw_domains: forcewake domains for irq tasklet
> + * @csb_head: context status buffer head
> */
> - unsigned int fw_domains;
> + unsigned int csb_head;
>
> /**
> - * @csb_head: context status buffer head
> + * @csb_read: control register for Context Switch buffer
> + *
> + * Note this register is always in mmio.
> */
> - unsigned int csb_head;
> + u32 __iomem *csb_read;
>
> /**
> - * @csb_use_mmio: access csb through mmio, instead of hwsp
> + * @csb_write: control register for Context Switch buffer
> + *
> + * Note this register may be either mmio or HWSP shadow.
> + */
> + u32 *csb_write;
> +
> + /**
> + * @csb_status: status array for Context Switch buffer
> + *
> + * Note these register may be either mmio or HWSP shadow.
> */
> - bool csb_use_mmio;
> + u32 *csb_status;
>
> /**
> * @preempt_complete_status: expected CSB upon completing preemption
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list