[Intel-gfx] [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Jun 28 10:58:26 UTC 2018


On 28/06/2018 11:10, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-28 11:02:17)
>>
>> On 27/06/2018 22:07, Chris Wilson wrote:
>>> Following the removal of the last workarounds, the only CSB mmio access
>>> is for the old vGPU interface. The mmio registers presented by vGPU do
>>> not require forcewake and can be treated as ordinary volatile memory,
>>> i.e. they behave just like the HWSP access just at a different location.
>>> We can reduce the CSB access to a set of read/write/buffer pointers and
>>> treat the various paths identically and not worry about forcewake.
>>> (Forcewake is nightmare for worstcase latency, and we want to process
>>> this all with irqsoff -- no latency allowed!)
>>>
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
>>>    drivers/gpu/drm/i915/intel_lrc.c        | 116 ++++++++++--------------
>>>    drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
>>>    3 files changed, 65 insertions(+), 86 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> index d3264bd6e9dc..7209c22798e6 100644
>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> @@ -25,7 +25,6 @@
>>>    #include <drm/drm_print.h>
>>>    
>>>    #include "i915_drv.h"
>>> -#include "i915_vgpu.h"
>>>    #include "intel_ringbuffer.h"
>>>    #include "intel_lrc.h"
>>>    
>>> @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
>>>        i915_gem_batch_pool_init(&engine->batch_pool, engine);
>>>    }
>>>    
>>> -static bool csb_force_mmio(struct drm_i915_private *i915)
>>> -{
>>> -     /* Older GVT emulation depends upon intercepting CSB mmio */
>>> -     if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
>>> -             return true;
>>> -
>>> -     return false;
>>> -}
>>> -
>>>    static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_engine_execlists * const execlists = &engine->execlists;
>>>    
>>> -     execlists->csb_use_mmio = csb_force_mmio(engine->i915);
>>> -
>>>        execlists->port_mask = 1;
>>>        BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
>>>        GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 91656eb2f2db..368a8c74d11d 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -137,6 +137,7 @@
>>>    #include <drm/i915_drm.h>
>>>    #include "i915_drv.h"
>>>    #include "i915_gem_render_state.h"
>>> +#include "i915_vgpu.h"
>>>    #include "intel_lrc_reg.h"
>>>    #include "intel_mocs.h"
>>>    #include "intel_workarounds.h"
>>> @@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_engine_execlists * const execlists = &engine->execlists;
>>>        struct execlist_port *port = execlists->port;
>>> -     struct drm_i915_private *i915 = engine->i915;
>>> -
>>> -     /* The HWSP contains a (cacheable) mirror of the CSB */
>>> -     const u32 *buf =
>>> -             &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
>>> -     unsigned int head, tail;
>>> -     bool fw = false;
>>> +     const u32 * const buf = execlists->csb_status;
>>> +     u8 head, tail;
>>>    
>>>        /* Clear before reading to catch new interrupts */
>>>        clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>>>        smp_mb__after_atomic();
>>>    
>>> -     if (unlikely(execlists->csb_use_mmio)) {
>>> -             intel_uncore_forcewake_get(i915, execlists->fw_domains);
>>> -             fw = true;
>>> -
>>> -             buf = (u32 * __force)
>>> -                     (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
>>> +     /* Note that csb_write, csb_status may be either in HWSP or mmio */
>>> +     head = execlists->csb_head;
>>> +     tail = READ_ONCE(*execlists->csb_write);
>>
>> Under GVT when this is emulated mmio I think you need to mask and shift
>> it with GEN8_CSB_WRITE_PTR.
> 
> Shift is 0, mask is applied by u8.

Evil. :) Put a comment please.

>>> +     GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
>>> +     if (unlikely(head == tail))
>>> +             return;
>>>    
>>> -             head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
>>> -             tail = GEN8_CSB_WRITE_PTR(head);
>>> -             head = GEN8_CSB_READ_PTR(head);
>>> -             execlists->csb_head = head;
>>> -     } else {
>>> -             const int write_idx =
>>> -                     intel_hws_csb_write_index(i915) -
>>> -                     I915_HWS_CSB_BUF0_INDEX;
>>> +     rmb(); /* Hopefully paired with a wmb() in HW */
>>>    
>>> -             head = execlists->csb_head;
>>> -             tail = READ_ONCE(buf[write_idx]);
>>> -             rmb(); /* Hopefully paired with a wmb() in HW */
>>> -     }
>>> -     GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
>>> -               engine->name,
>>> -               head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
>>> -               tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
>>> -
>>> -     while (head != tail) {
>>> +     do {
>>
>> Why convert while to do-while? Early unlikely return above handles the
>> void process_csb calls? Would the same effect happen if you put unlikely
>> in the while (head != tail) condition and would be simpler?
> 
> The earlier return to circumvent the lfence.

Oh that one.. so why it is safe to compare head and tail before the rmb 
since we need the rmb afterwards?

> 
>>>                struct i915_request *rq;
>>>                unsigned int status;
>>>                unsigned int count;
>>> @@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
>>>                 * status notifier.
>>>                 */
>>>    
>>> -             status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
>>>                GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
>>>                          engine->name, head,
>>> -                       status, buf[2*head + 1],
>>> +                       buf[2 * head + 0], buf[2 * head + 1],
>>>                          execlists->active);
>>>    
>>> +             status = buf[2 * head];
>>
>> Why not leave before GEM_TRACE above, as is, and then diff is smaller?
> 
> You made me correct the GEM_TRACE!

Hm?

> Looking at it I still prefer 2 buf[] vs the mixed local/buf[].

Not touching lines which don't need touching trumps in my book. But OK.

Regards,

Tvrtko

> 
>>>                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>                              GEN8_CTX_STATUS_PREEMPTED))
>>>                        execlists_set_active(execlists,
>>> @@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
>>>                } else {
>>>                        port_set(port, port_pack(rq, count));
>>>                }
>>> -     }
>>> +     } while (head != tail);
>>>    
>>> -     if (head != execlists->csb_head) {
>>> -             execlists->csb_head = head;
>>> -             writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
>>> -                    i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
>>> -     }
>>> -
>>> -     if (unlikely(fw))
>>> -             intel_uncore_forcewake_put(i915, execlists->fw_domains);
>>> +     writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
>>> +            execlists->csb_read);
>>> +     execlists->csb_head = head;
>>
>> I guess early return helps to avoid this, but since it is going away in
>> a following patch maybe it is not worth it.
> 
> lfence!
> -Chris
> 


More information about the Intel-gfx mailing list