[Intel-gfx] [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers

Chris Wilson chris at chris-wilson.co.uk
Thu Jun 28 10:10:58 UTC 2018


Quoting Tvrtko Ursulin (2018-06-28 11:02:17)
> 
> On 27/06/2018 22:07, Chris Wilson wrote:
> > Following the removal of the last workarounds, the only CSB mmio access
> > is for the old vGPU interface. The mmio registers presented by vGPU do
> > not require forcewake and can be treated as ordinary volatile memory,
> > i.e. they behave just like the HWSP access just at a different location.
> > We can reduce the CSB access to a set of read/write/buffer pointers and
> > treat the various paths identically and not worry about forcewake.
> > (Forcewake is nightmare for worstcase latency, and we want to process
> > this all with irqsoff -- no latency allowed!)
> > 
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
> >   drivers/gpu/drm/i915/intel_lrc.c        | 116 ++++++++++--------------
> >   drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
> >   3 files changed, 65 insertions(+), 86 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> > index d3264bd6e9dc..7209c22798e6 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> > @@ -25,7 +25,6 @@
> >   #include <drm/drm_print.h>
> >   
> >   #include "i915_drv.h"
> > -#include "i915_vgpu.h"
> >   #include "intel_ringbuffer.h"
> >   #include "intel_lrc.h"
> >   
> > @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
> >       i915_gem_batch_pool_init(&engine->batch_pool, engine);
> >   }
> >   
> > -static bool csb_force_mmio(struct drm_i915_private *i915)
> > -{
> > -     /* Older GVT emulation depends upon intercepting CSB mmio */
> > -     if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
> > -             return true;
> > -
> > -     return false;
> > -}
> > -
> >   static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> >   
> > -     execlists->csb_use_mmio = csb_force_mmio(engine->i915);
> > -
> >       execlists->port_mask = 1;
> >       BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
> >       GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 91656eb2f2db..368a8c74d11d 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -137,6 +137,7 @@
> >   #include <drm/i915_drm.h>
> >   #include "i915_drv.h"
> >   #include "i915_gem_render_state.h"
> > +#include "i915_vgpu.h"
> >   #include "intel_lrc_reg.h"
> >   #include "intel_mocs.h"
> >   #include "intel_workarounds.h"
> > @@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> >       struct execlist_port *port = execlists->port;
> > -     struct drm_i915_private *i915 = engine->i915;
> > -
> > -     /* The HWSP contains a (cacheable) mirror of the CSB */
> > -     const u32 *buf =
> > -             &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > -     unsigned int head, tail;
> > -     bool fw = false;
> > +     const u32 * const buf = execlists->csb_status;
> > +     u8 head, tail;
> >   
> >       /* Clear before reading to catch new interrupts */
> >       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> >       smp_mb__after_atomic();
> >   
> > -     if (unlikely(execlists->csb_use_mmio)) {
> > -             intel_uncore_forcewake_get(i915, execlists->fw_domains);
> > -             fw = true;
> > -
> > -             buf = (u32 * __force)
> > -                     (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > +     /* Note that csb_write, csb_status may be either in HWSP or mmio */
> > +     head = execlists->csb_head;
> > +     tail = READ_ONCE(*execlists->csb_write);
> 
> Under GVT when this is emulated mmio I think you need to mask and shift 
> it with GEN8_CSB_WRITE_PTR.

Shift is 0, mask is applied by u8.

> > +     GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
> > +     if (unlikely(head == tail))
> > +             return;
> >   
> > -             head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > -             tail = GEN8_CSB_WRITE_PTR(head);
> > -             head = GEN8_CSB_READ_PTR(head);
> > -             execlists->csb_head = head;
> > -     } else {
> > -             const int write_idx =
> > -                     intel_hws_csb_write_index(i915) -
> > -                     I915_HWS_CSB_BUF0_INDEX;
> > +     rmb(); /* Hopefully paired with a wmb() in HW */
> >   
> > -             head = execlists->csb_head;
> > -             tail = READ_ONCE(buf[write_idx]);
> > -             rmb(); /* Hopefully paired with a wmb() in HW */
> > -     }
> > -     GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > -               engine->name,
> > -               head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > -               tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > -
> > -     while (head != tail) {
> > +     do {
> 
> Why convert while to do-while? Early unlikely return above handles the 
> void process_csb calls? Would the same effect happen if you put unlikely 
> in the while (head != tail) condition and would be simpler?

The earlier return to circumvent the lfence.

> >               struct i915_request *rq;
> >               unsigned int status;
> >               unsigned int count;
> > @@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
> >                * status notifier.
> >                */
> >   
> > -             status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> >               GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> >                         engine->name, head,
> > -                       status, buf[2*head + 1],
> > +                       buf[2 * head + 0], buf[2 * head + 1],
> >                         execlists->active);
> >   
> > +             status = buf[2 * head];
> 
> Why not leave before GEM_TRACE above, as is, and then diff is smaller?

You made me correct the GEM_TRACE!

Looking at it I still prefer 2 buf[] vs the mixed local/buf[].

> >               if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> >                             GEN8_CTX_STATUS_PREEMPTED))
> >                       execlists_set_active(execlists,
> > @@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
> >               } else {
> >                       port_set(port, port_pack(rq, count));
> >               }
> > -     }
> > +     } while (head != tail);
> >   
> > -     if (head != execlists->csb_head) {
> > -             execlists->csb_head = head;
> > -             writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > -                    i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > -     }
> > -
> > -     if (unlikely(fw))
> > -             intel_uncore_forcewake_put(i915, execlists->fw_domains);
> > +     writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > +            execlists->csb_read);
> > +     execlists->csb_head = head;
> 
> I guess early return helps to avoid this, but since it is going away in 
> a following patch maybe it is not worth it.

lfence!
-Chris


More information about the Intel-gfx mailing list