[PATCH v7] drm/amdgpu: handle IH ring1 overflow

Thu Nov 25 18:03:11 UTC 2021

Am 2021-11-25 um 12:52 p.m. schrieb Felix Kuehling:
> Am 2021-11-25 um 10:16 a.m. schrieb Philip Yang:
>> IH ring1 is used to process GPU retry fault, overflow is enabled to
>> drain retry fault because we want receive other interrupts while
>> handling retry fault to recover range. There is no overflow flag set
>> when wptr pass rptr. Use timestamp of rptr and wptr to handle overflow
>> and drain retry fault.
>>
>> If fault timestamp goes backward, the fault is filtered and should not
>> be processed. Drain fault is finished if latest_decoded_timestamp is
>> equal to or larger than checkpoint timestamp.
> If there can be multiple faults with the same time stamp, then this is
> not sufficient because it would allow a stale fault with the same
> timestamp to sneak through.
>
> For example there are 3 faults with the same timestamp in the ring:
>
>     ...     <- rptr
>     ...
>     fault1
>     fault2
>     fault3  <- wptr
>
> The timestamp is taken from fault3, the current wptr.
> amdgpu_ih_wait_on_checkpoint_process_ts returns when the rptr reaches
> fault1 because it has the same timestamp.
>
>     fault1  <- rptr
>     fault2
>     fault3  <- wptr
>
> At that time fault2 and fault3 are still stale faults that could lead to
> a VM fault.
>
> You would need to wait for latest_decoded_timestamp to be truly greater
> than the checkpoint (or the ring to be empty) to be sure that you've
> seen all stale faults. Other than that, this patch looks good to me.
>
> Regards,
>   Felix
>
>
>> Add amdgpu_ih_function interface decode_iv_ts for different chips to get
>> timestamp from IV entry with different iv size and timestamp offset.
>> amdgpu_ih_decode_iv_ts_helper is used for vega10, vega20, navi10.
>>
>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  8 +++-
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  3 +-
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 57 ++++++++++++-------------
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  | 16 ++++++-
>>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  2 +-
>>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  2 +-
>>  drivers/gpu/drm/amd/amdgpu/navi10_ih.c  |  1 +
>>  drivers/gpu/drm/amd/amdgpu/vega10_ih.c  |  1 +
>>  drivers/gpu/drm/amd/amdgpu/vega20_ih.c  |  1 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c    |  2 +-
>>  10 files changed, 56 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> index 45761d0328c7..45e08677207d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> @@ -350,6 +350,7 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
>>   * amdgpu_gmc_filter_faults - filter VM faults
>>   *
>>   * @adev: amdgpu device structure
>> + * @ih: interrupt ring that the fault received from
>>   * @addr: address of the VM fault
>>   * @pasid: PASID of the process causing the fault
>>   * @timestamp: timestamp of the fault
>> @@ -358,7 +359,8 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
>>   * True if the fault was filtered and should not be processed further.
>>   * False if the fault is a new one and needs to be handled.
>>   */
>> -bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
>> +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
>> +			      struct amdgpu_ih_ring *ih, uint64_t addr,
>>  			      uint16_t pasid, uint64_t timestamp)
>>  {
>>  	struct amdgpu_gmc *gmc = &adev->gmc;
>> @@ -366,6 +368,10 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
>>  	struct amdgpu_gmc_fault *fault;
>>  	uint32_t hash;
>>  
>> +	/* Stale retry fault if timestamp goes backward */
>> +	if (amdgpu_ih_ts_after(timestamp, ih->latest_decoded_timestamp))
>> +		return true;
>> +
>>  	/* If we don't have space left in the ring buffer return immediately */
>>  	stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
>>  		AMDGPU_GMC_FAULT_TIMEOUT;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index e55201134a01..8458cebc6d5b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -316,7 +316,8 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
>>  			      struct amdgpu_gmc *mc);
>>  void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
>>  			     struct amdgpu_gmc *mc);
>> -bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
>> +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
>> +			      struct amdgpu_ih_ring *ih, uint64_t addr,
>>  			      uint16_t pasid, uint64_t timestamp);
>>  void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
>>  				     uint16_t pasid);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>> index 0c7963dfacad..8d02f975f915 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>> @@ -164,52 +164,32 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>>  	}
>>  }
>>  
>> -/* Waiter helper that checks current rptr matches or passes checkpoint wptr */
>> -static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
>> -					struct amdgpu_ih_ring *ih,
>> -					uint32_t checkpoint_wptr,
>> -					uint32_t *prev_rptr)
>> -{
>> -	uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask);
>> -
>> -	/* rptr has wrapped. */
>> -	if (cur_rptr < *prev_rptr)
>> -		cur_rptr += ih->ptr_mask + 1;
>> -	*prev_rptr = cur_rptr;
>> -
>> -	/* check ring is empty to workaround missing wptr overflow flag */
>> -	return cur_rptr >= checkpoint_wptr ||
>> -	       (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
>> -}
>> -
>>  /**
>> - * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint
>> + * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
>>   *
>>   * @adev: amdgpu_device pointer
>>   * @ih: ih ring to process
>>   *
>>   * Used to ensure ring has processed IVs up to the checkpoint write pointer.
>>   */
>> -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
>> +int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
>>  					struct amdgpu_ih_ring *ih)
>>  {
>> -	uint32_t checkpoint_wptr, rptr;
>> +	uint32_t checkpoint_wptr;
>> +	uint64_t checkpoint_ts;
>> +	long timeout = HZ;
>>  
>>  	if (!ih->enabled || adev->shutdown)
>>  		return -ENODEV;
>>  
>>  	checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
>> -	/* Order wptr with rptr. */
>> +	/* Order wptr with ring data. */
>>  	rmb();
>> -	rptr = READ_ONCE(ih->rptr);
>> -
>> -	/* wptr has wrapped. */
>> -	if (rptr > checkpoint_wptr)
>> -		checkpoint_wptr += ih->ptr_mask + 1;
>> +	checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
>>  
>> -	return wait_event_interruptible(ih->wait_process,
>> -				amdgpu_ih_has_checkpoint_processed(adev, ih,
>> -						checkpoint_wptr, &rptr));
>> +	return wait_event_interruptible_timeout(ih->wait_process,
>> +		    !amdgpu_ih_ts_after(ih->latest_decoded_timestamp, checkpoint_ts),
>> +		    timeout);
Your code actually does this correctly (waiting for a timestamt that's
truly greater than the checkpoint), only the commit description was
wrong. But I think you have a chance of getting a timeout when IH never
sends an interrupt with a larger timestamp, e.g. because you've already
handled all the faults before calling
amdgpu_ih_wait_on_checkpoint_process_ts and no new faults are generated.
So it may be good to add an extra check for the ring being empty to
avoid that.

Regards,
  Felix


>>  }
>>  
>>  /**
>> @@ -298,4 +278,21 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
>>  
>>  	/* wptr/rptr are in bytes! */
>>  	ih->rptr += 32;
>> +	if (amdgpu_ih_ts_after(ih->latest_decoded_timestamp, entry->timestamp))
>> +		ih->latest_decoded_timestamp = entry->timestamp;
>> +}
>> +
>> +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
>> +				       signed int offset)
>> +{
>> +	uint32_t iv_size = 32;
>> +	uint32_t ring_index;
>> +	uint32_t dw1, dw2;
>> +
>> +	rptr += iv_size * offset;
>> +	ring_index = (rptr & ih->ptr_mask) >> 2;
>> +
>> +	dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
>> +	dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
>> +	return dw1 | ((u64)(dw2 & 0xffff) << 32);
>>  }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>> index 0649b59830a5..322e1521287b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>> @@ -68,20 +68,30 @@ struct amdgpu_ih_ring {
>>  
>>  	/* For waiting on IH processing at checkpoint. */
>>  	wait_queue_head_t wait_process;
>> +	uint64_t		latest_decoded_timestamp;
>>  };
>>  
>> +/* return true if time stamp t2 is after t1 with 48bit wrap around */
>> +#define amdgpu_ih_ts_after(t1, t2) \
>> +		(((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL)
>> +
>>  /* provided by the ih block */
>>  struct amdgpu_ih_funcs {
>>  	/* ring read/write ptr handling, called from interrupt context */
>>  	u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>>  	void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>>  			  struct amdgpu_iv_entry *entry);
>> +	uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr,
>> +				 signed int offset);
>>  	void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>>  };
>>  
>>  #define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih))
>>  #define amdgpu_ih_decode_iv(adev, iv) \
>>  	(adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv))
>> +#define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \
>> +	(WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \
>> +	(adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr), (offset)))
>>  #define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih))
>>  
>>  int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>> @@ -89,10 +99,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>>  void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>>  void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>>  			  unsigned int num_dw);
>> -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
>> -					struct amdgpu_ih_ring *ih);
>> +int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
>> +					    struct amdgpu_ih_ring *ih);
>>  int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>>  void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
>>  				struct amdgpu_ih_ring *ih,
>>  				struct amdgpu_iv_entry *entry);
>> +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
>> +				       signed int offset);
>>  #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 3ec5ff5a6dbe..d696c4754bea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -107,7 +107,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
>>  
>>  		/* Process it onyl if it's the first fault for this address */
>>  		if (entry->ih != &adev->irq.ih_soft &&
>> -		    amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
>> +		    amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
>>  					     entry->timestamp))
>>  			return 1;
>>  
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index cb82404df534..7490ce8295c1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -523,7 +523,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>>  
>>  		/* Process it onyl if it's the first fault for this address */
>>  		if (entry->ih != &adev->irq.ih_soft &&
>> -		    amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
>> +		    amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
>>  					     entry->timestamp))
>>  			return 1;
>>  
>> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>> index 38241cf0e1f1..8ce5b8ca1fd7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>> @@ -716,6 +716,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = {
>>  static const struct amdgpu_ih_funcs navi10_ih_funcs = {
>>  	.get_wptr = navi10_ih_get_wptr,
>>  	.decode_iv = amdgpu_ih_decode_iv_helper,
>> +	.decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
>>  	.set_rptr = navi10_ih_set_rptr
>>  };
>>  
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> index a9ca6988009e..3070466f54e1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> @@ -640,6 +640,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = {
>>  static const struct amdgpu_ih_funcs vega10_ih_funcs = {
>>  	.get_wptr = vega10_ih_get_wptr,
>>  	.decode_iv = amdgpu_ih_decode_iv_helper,
>> +	.decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
>>  	.set_rptr = vega10_ih_set_rptr
>>  };
>>  
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
>> index f51dfc38ac65..3b4eb8285943 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
>> @@ -688,6 +688,7 @@ const struct amd_ip_funcs vega20_ih_ip_funcs = {
>>  static const struct amdgpu_ih_funcs vega20_ih_funcs = {
>>  	.get_wptr = vega20_ih_get_wptr,
>>  	.decode_iv = amdgpu_ih_decode_iv_helper,
>> +	.decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
>>  	.set_rptr = vega20_ih_set_rptr
>>  };
>>  
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 10868d5b549f..663489ae56d7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -1974,7 +1974,7 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
>>  
>>  		pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
>>  
>> -		amdgpu_ih_wait_on_checkpoint_process(pdd->dev->adev,
>> +		amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
>>  						     &pdd->dev->adev->irq.ih1);
>>  		pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
>>  	}