[Intel-gfx] [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jun 28 10:58:26 UTC 2018
On 28/06/2018 11:10, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-28 11:02:17)
>>
>> On 27/06/2018 22:07, Chris Wilson wrote:
>>> Following the removal of the last workarounds, the only CSB mmio access
>>> is for the old vGPU interface. The mmio registers presented by vGPU do
>>> not require forcewake and can be treated as ordinary volatile memory,
>>> i.e. they behave just like the HWSP access just at a different location.
>>> We can reduce the CSB access to a set of read/write/buffer pointers and
>>> treat the various paths identically and not worry about forcewake.
>>> (Forcewake is nightmare for worstcase latency, and we want to process
>>> this all with irqsoff -- no latency allowed!)
>>>
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> ---
>>> drivers/gpu/drm/i915/intel_engine_cs.c | 12 ---
>>> drivers/gpu/drm/i915/intel_lrc.c | 116 ++++++++++--------------
>>> drivers/gpu/drm/i915/intel_ringbuffer.h | 23 +++--
>>> 3 files changed, 65 insertions(+), 86 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> index d3264bd6e9dc..7209c22798e6 100644
>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> @@ -25,7 +25,6 @@
>>> #include <drm/drm_print.h>
>>>
>>> #include "i915_drv.h"
>>> -#include "i915_vgpu.h"
>>> #include "intel_ringbuffer.h"
>>> #include "intel_lrc.h"
>>>
>>> @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
>>> i915_gem_batch_pool_init(&engine->batch_pool, engine);
>>> }
>>>
>>> -static bool csb_force_mmio(struct drm_i915_private *i915)
>>> -{
>>> - /* Older GVT emulation depends upon intercepting CSB mmio */
>>> - if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
>>> - return true;
>>> -
>>> - return false;
>>> -}
>>> -
>>> static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>>> {
>>> struct intel_engine_execlists * const execlists = &engine->execlists;
>>>
>>> - execlists->csb_use_mmio = csb_force_mmio(engine->i915);
>>> -
>>> execlists->port_mask = 1;
>>> BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
>>> GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 91656eb2f2db..368a8c74d11d 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -137,6 +137,7 @@
>>> #include <drm/i915_drm.h>
>>> #include "i915_drv.h"
>>> #include "i915_gem_render_state.h"
>>> +#include "i915_vgpu.h"
>>> #include "intel_lrc_reg.h"
>>> #include "intel_mocs.h"
>>> #include "intel_workarounds.h"
>>> @@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
>>> {
>>> struct intel_engine_execlists * const execlists = &engine->execlists;
>>> struct execlist_port *port = execlists->port;
>>> - struct drm_i915_private *i915 = engine->i915;
>>> -
>>> - /* The HWSP contains a (cacheable) mirror of the CSB */
>>> - const u32 *buf =
>>> - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
>>> - unsigned int head, tail;
>>> - bool fw = false;
>>> + const u32 * const buf = execlists->csb_status;
>>> + u8 head, tail;
>>>
>>> /* Clear before reading to catch new interrupts */
>>> clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>>> smp_mb__after_atomic();
>>>
>>> - if (unlikely(execlists->csb_use_mmio)) {
>>> - intel_uncore_forcewake_get(i915, execlists->fw_domains);
>>> - fw = true;
>>> -
>>> - buf = (u32 * __force)
>>> - (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
>>> + /* Note that csb_write, csb_status may be either in HWSP or mmio */
>>> + head = execlists->csb_head;
>>> + tail = READ_ONCE(*execlists->csb_write);
>>
>> Under GVT when this is emulated mmio I think you need to mask and shift
>> it with GEN8_CSB_WRITE_PTR.
>
> Shift is 0, mask is applied by u8.
Evil. :) Put a comment please.
>>> + GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
>>> + if (unlikely(head == tail))
>>> + return;
>>>
>>> - head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
>>> - tail = GEN8_CSB_WRITE_PTR(head);
>>> - head = GEN8_CSB_READ_PTR(head);
>>> - execlists->csb_head = head;
>>> - } else {
>>> - const int write_idx =
>>> - intel_hws_csb_write_index(i915) -
>>> - I915_HWS_CSB_BUF0_INDEX;
>>> + rmb(); /* Hopefully paired with a wmb() in HW */
>>>
>>> - head = execlists->csb_head;
>>> - tail = READ_ONCE(buf[write_idx]);
>>> - rmb(); /* Hopefully paired with a wmb() in HW */
>>> - }
>>> - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
>>> - engine->name,
>>> - head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
>>> - tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
>>> -
>>> - while (head != tail) {
>>> + do {
>>
>> Why convert while to do-while? Early unlikely return above handles the
>> void process_csb calls? Would the same effect happen if you put unlikely
>> in the while (head != tail) condition and would be simpler?
>
> The earlier return to circumvent the lfence.
Oh that one.. so why it is safe to compare head and tail before the rmb
since we need the rmb afterwards?
>
>>> struct i915_request *rq;
>>> unsigned int status;
>>> unsigned int count;
>>> @@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
>>> * status notifier.
>>> */
>>>
>>> - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
>>> GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
>>> engine->name, head,
>>> - status, buf[2*head + 1],
>>> + buf[2 * head + 0], buf[2 * head + 1],
>>> execlists->active);
>>>
>>> + status = buf[2 * head];
>>
>> Why not leave before GEM_TRACE above, as is, and then diff is smaller?
>
> You made me correct the GEM_TRACE!
Hm?
> Looking at it I still prefer 2 buf[] vs the mixed local/buf[].
Not touching lines which don't need touching trumps in my book. But OK.
Regards,
Tvrtko
>
>>> if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> GEN8_CTX_STATUS_PREEMPTED))
>>> execlists_set_active(execlists,
>>> @@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
>>> } else {
>>> port_set(port, port_pack(rq, count));
>>> }
>>> - }
>>> + } while (head != tail);
>>>
>>> - if (head != execlists->csb_head) {
>>> - execlists->csb_head = head;
>>> - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
>>> - i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
>>> - }
>>> -
>>> - if (unlikely(fw))
>>> - intel_uncore_forcewake_put(i915, execlists->fw_domains);
>>> + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
>>> + execlists->csb_read);
>>> + execlists->csb_head = head;
>>
>> I guess early return helps to avoid this, but since it is going away in
>> a following patch maybe it is not worth it.
>
> lfence!
> -Chris
>
More information about the Intel-gfx
mailing list