[Intel-gfx] [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers
Chris Wilson
chris at chris-wilson.co.uk
Thu Jun 28 10:10:58 UTC 2018
Quoting Tvrtko Ursulin (2018-06-28 11:02:17)
>
> On 27/06/2018 22:07, Chris Wilson wrote:
> > Following the removal of the last workarounds, the only CSB mmio access
> > is for the old vGPU interface. The mmio registers presented by vGPU do
> > not require forcewake and can be treated as ordinary volatile memory,
> > i.e. they behave just like the HWSP access just at a different location.
> > We can reduce the CSB access to a set of read/write/buffer pointers and
> > treat the various paths identically and not worry about forcewake.
> > (Forcewake is nightmare for worstcase latency, and we want to process
> > this all with irqsoff -- no latency allowed!)
> >
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/intel_engine_cs.c | 12 ---
> > drivers/gpu/drm/i915/intel_lrc.c | 116 ++++++++++--------------
> > drivers/gpu/drm/i915/intel_ringbuffer.h | 23 +++--
> > 3 files changed, 65 insertions(+), 86 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> > index d3264bd6e9dc..7209c22798e6 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> > @@ -25,7 +25,6 @@
> > #include <drm/drm_print.h>
> >
> > #include "i915_drv.h"
> > -#include "i915_vgpu.h"
> > #include "intel_ringbuffer.h"
> > #include "intel_lrc.h"
> >
> > @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
> > i915_gem_batch_pool_init(&engine->batch_pool, engine);
> > }
> >
> > -static bool csb_force_mmio(struct drm_i915_private *i915)
> > -{
> > - /* Older GVT emulation depends upon intercepting CSB mmio */
> > - if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
> > - return true;
> > -
> > - return false;
> > -}
> > -
> > static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> > {
> > struct intel_engine_execlists * const execlists = &engine->execlists;
> >
> > - execlists->csb_use_mmio = csb_force_mmio(engine->i915);
> > -
> > execlists->port_mask = 1;
> > BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
> > GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 91656eb2f2db..368a8c74d11d 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -137,6 +137,7 @@
> > #include <drm/i915_drm.h>
> > #include "i915_drv.h"
> > #include "i915_gem_render_state.h"
> > +#include "i915_vgpu.h"
> > #include "intel_lrc_reg.h"
> > #include "intel_mocs.h"
> > #include "intel_workarounds.h"
> > @@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
> > {
> > struct intel_engine_execlists * const execlists = &engine->execlists;
> > struct execlist_port *port = execlists->port;
> > - struct drm_i915_private *i915 = engine->i915;
> > -
> > - /* The HWSP contains a (cacheable) mirror of the CSB */
> > - const u32 *buf =
> > - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > - unsigned int head, tail;
> > - bool fw = false;
> > + const u32 * const buf = execlists->csb_status;
> > + u8 head, tail;
> >
> > /* Clear before reading to catch new interrupts */
> > clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > smp_mb__after_atomic();
> >
> > - if (unlikely(execlists->csb_use_mmio)) {
> > - intel_uncore_forcewake_get(i915, execlists->fw_domains);
> > - fw = true;
> > -
> > - buf = (u32 * __force)
> > - (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > + /* Note that csb_write, csb_status may be either in HWSP or mmio */
> > + head = execlists->csb_head;
> > + tail = READ_ONCE(*execlists->csb_write);
>
> Under GVT when this is emulated mmio I think you need to mask and shift
> it with GEN8_CSB_WRITE_PTR.
Shift is 0, mask is applied by u8.
> > + GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
> > + if (unlikely(head == tail))
> > + return;
> >
> > - head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > - tail = GEN8_CSB_WRITE_PTR(head);
> > - head = GEN8_CSB_READ_PTR(head);
> > - execlists->csb_head = head;
> > - } else {
> > - const int write_idx =
> > - intel_hws_csb_write_index(i915) -
> > - I915_HWS_CSB_BUF0_INDEX;
> > + rmb(); /* Hopefully paired with a wmb() in HW */
> >
> > - head = execlists->csb_head;
> > - tail = READ_ONCE(buf[write_idx]);
> > - rmb(); /* Hopefully paired with a wmb() in HW */
> > - }
> > - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > - engine->name,
> > - head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > - tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > -
> > - while (head != tail) {
> > + do {
>
> Why convert while to do-while? Early unlikely return above handles the
> void process_csb calls? Would the same effect happen if you put unlikely
> in the while (head != tail) condition and would be simpler?
The earlier return to circumvent the lfence.
> > struct i915_request *rq;
> > unsigned int status;
> > unsigned int count;
> > @@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
> > * status notifier.
> > */
> >
> > - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> > GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> > engine->name, head,
> > - status, buf[2*head + 1],
> > + buf[2 * head + 0], buf[2 * head + 1],
> > execlists->active);
> >
> > + status = buf[2 * head];
>
> Why not leave before GEM_TRACE above, as is, and then diff is smaller?
You made me correct the GEM_TRACE!
Looking at it I still prefer 2 buf[] vs the mixed local/buf[].
> > if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > GEN8_CTX_STATUS_PREEMPTED))
> > execlists_set_active(execlists,
> > @@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
> > } else {
> > port_set(port, port_pack(rq, count));
> > }
> > - }
> > + } while (head != tail);
> >
> > - if (head != execlists->csb_head) {
> > - execlists->csb_head = head;
> > - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > - i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > - }
> > -
> > - if (unlikely(fw))
> > - intel_uncore_forcewake_put(i915, execlists->fw_domains);
> > + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > + execlists->csb_read);
> > + execlists->csb_head = head;
>
> I guess early return helps to avoid this, but since it is going away in
> a following patch maybe it is not worth it.
lfence!
-Chris
More information about the Intel-gfx
mailing list