[Intel-gfx] [PATCH] drm/i915: Broadwell expands ACTHD to 64bit

Fri Mar 21 11:50:05 CET 2014

On 03/21/2014 10:14 AM, Chris Wilson wrote:
> On Fri, Mar 21, 2014 at 10:03:38AM +0000, Tvrtko Ursulin wrote:
>>
>> On 03/20/2014 09:48 PM, Chris Wilson wrote:
>>> As Broadwell has an increased virtual address size, it requires more
>>> than 32 bits to store offsets into its address space. This includes the
>>> debug registers to track the current HEAD of the individual rings, which
>>> may be anywhere within the per-process address spaces. In order to find
>>> the full location, we need to read the high bits from a second register.
>>> We then also need to expand our storage to keep track of the larger
>>> address.
>>>
>>> v2: Carefully read the two registers to catch wraparound between
>>>      the reads.
>>>
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Ben Widawsky <benjamin.widawsky at intel.com>
>>> Cc: Timo Aaltonen <tjaalton at ubuntu.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h         |  2 +-
>>>   drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
>>>   drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
>>>   drivers/gpu/drm/i915/i915_reg.h         |  1 +
>>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 30 ++++++++++++++++++++++++------
>>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
>>>   6 files changed, 35 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 54182536dc46..d5a4a14d6723 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -354,12 +354,12 @@ struct drm_i915_error_state {
>>>   		u32 ipeir;
>>>   		u32 ipehr;
>>>   		u32 instdone;
>>> -		u32 acthd;
>>>   		u32 bbstate;
>>>   		u32 instpm;
>>>   		u32 instps;
>>>   		u32 seqno;
>>>   		u64 bbaddr;
>>> +		u64 acthd;
>>>   		u32 fault_reg;
>>>   		u32 faddr;
>>>   		u32 rc_psmi; /* sleep state */
>>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>>> index b153a16ead0a..9519aa240614 100644
>>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>>> @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>>>   	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
>>>   	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
>>>   	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
>>> -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
>>> +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
>>
>> I thought conclusion elsewhere in the thread for this was to include
>> all 64-bits in the output?
>
> Eh? They are... There's a second patch to change the way we print 64bit
> values.

Ok, missed that.

>>>   	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
>>>   	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
>>>   	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
>>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>>> index 77dbef6af185..9caae9840c78 100644
>>> --- a/drivers/gpu/drm/i915/i915_irq.c
>>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>>> @@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
>>>   semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
>>>   {
>>>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>>> -	u32 cmd, ipehr, acthd, acthd_min;
>>> +	u64 acthd, acthd_min;
>>> +	u32 cmd, ipehr;
>>>
>>>   	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
>>>   	if ((ipehr & ~(0x3 << 16)) !=
>>> @@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>>>   }
>>>
>>>   static enum intel_ring_hangcheck_action
>>> -ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
>>> +ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
>>>   {
>>>   	struct drm_device *dev = ring->dev;
>>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>> @@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
>>>   		return;
>>>
>>>   	for_each_ring(ring, dev_priv, i) {
>>> -		u32 seqno, acthd;
>>> +		u64 acthd;
>>> +		u32 seqno;
>>>   		bool busy = true;
>>>
>>>   		semaphore_clear_deadlocks(dev_priv);
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>>> index f010ff7e7e2a..3c464d307a2b 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -708,6 +708,7 @@ enum punit_power_well {
>>>   #define BLT_HWS_PGA_GEN7	(0x04280)
>>>   #define VEBOX_HWS_PGA_GEN7	(0x04380)
>>>   #define RING_ACTHD(base)	((base)+0x74)
>>> +#define RING_ACTHD_UDW(base)	((base)+0x5c)
>>>   #define RING_NOPID(base)	((base)+0x94)
>>>   #define RING_IMR(base)		((base)+0xa8)
>>>   #define RING_TIMESTAMP(base)	((base)+0x358)
>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> index 7a01911c16f8..45d8011e5a6c 100644
>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> @@ -417,13 +417,28 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
>>>   	I915_WRITE_TAIL(ring, value);
>>>   }
>>>
>>> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>>> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>>>   {
>>>   	drm_i915_private_t *dev_priv = ring->dev->dev_private;
>>> -	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
>>> -			RING_ACTHD(ring->mmio_base) : ACTHD;
>>> +	u64 acthd;
>>>
>>> -	return I915_READ(acthd_reg);
>>> +	if (INTEL_INFO(ring->dev)->gen >= 8) {
>>> +		u32 upper, lower, tmp;
>>> +
>>> +		tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
>>> +		do {
>>> +			upper = tmp;
>>> +			lower = I915_READ(RING_ACTHD(ring->mmio_base));
>>> +			tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
>>> +		} while (upper != tmp);
>>
>> Looks good. Slightly more defensive approach would be to only retry
>> once and log something horrible if upper word wraps twice. Ben's
>> suggestion to also validate that the lower dword has really wrapped
>> makes sense as well, if stuffing more and more conditionals and this
>> call path is not a problem.
>
> We don't elsewhere. If you go crazy, there is an indeterminant time
> lapse between the two reads, so the wrap around may be undetectable. As
> it is an expected condition that will happen eventually, we don't even
> need to log it, just handle it. Particularly as I was thinking of

No, think you misunderstood me. I said "slightly more defensive" just in 
the sense that in case of weird hardware failures you have a potentially 
infinite loop now, where you don't really need a loop - probabilities 
strongly suggest you cannot get two upper dword wraps between the reads. 
So it is enough to read the upper dword twice, without the loop. Same 
effect, slightly more defensive in reality.

Lower dword WARN (What Ben suggests I think) would be another level of 
defensiveness, to double-check wrap looks sensible if it was detected.

> refactoring this as I915_READ64_2x32(lower_reg, upper_reg);

Thats good.

Tvrtko