[PATCH 5/5] drm/amdgpu: Improve the software ring priority scheduler

Tue Oct 18 11:24:56 UTC 2022

Am 18.10.22 um 11:08 schrieb jiadong.zhu at amd.com:
> From: Jiadong Zhu <Jiadong.Zhu at amd.com>
>
> Using the drm scheduler, the software rings' scheduling threads with different
> priorities have the same opportunity to get the spinlock and copy its packages
> into the real ring. Though preemption may happen for the low priority ring, it
> will catch up with the high priority ring by copying more data (the resubmit
> package and the current ibs) on the next calling of set_wptr. As a result, the
> priority is not quite effective.
>
> Here are some details to improve the priority of software rings at the bottom
> of drm scheduler by slowing down the low priority thread with following
> strategy.
> 1. If all the high priority fences are signaled which indicates gpu is idle
>     while there are low priority packages to submit, no delay happens.
> 2. When there are unsignaled fences on high priority rings, we account for the
>     portion of the ibs sent from the low priority ring. If the portion exceeds
>     a certain threshold(eg, 30%), a timeout wait happens on low priority
>     threads till more high priority ibs submitted.
> 3. The mechanism is started when the first time mcbp triggered, ended when all
>     the high priority fences are signaled.

This is a really big NAK for this approach since it will break GPU reset 
and can lead to deadlocks. You can't sleep in any of the callbacks 
waiting for the hardware to do something.

What we could do instead is to insert a dependency for the low priority 
ring. E.g. in amdgpu_job_dependency() return the latest high priority 
fence whenever a low priority job is about to be scheduled.

Regards,
Christian.

>
> Cc: Christian Koenig <Christian.Koenig at amd.com>
> Cc: Michel Dänzer <michel at daenzer.net>
> Signed-off-by: Jiadong Zhu <Jiadong.Zhu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 93 ++++++++++++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  3 +
>   2 files changed, 90 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> index 41b057b9358e..eac89094f1d1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> @@ -27,7 +27,13 @@
>   #include "amdgpu_ring.h"
>   #include "amdgpu.h"
>   
> +/* The jiffies fallback resubmission happens */
>   #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
> +
> +/* Maximum waiting jiffies on low priority ring thread */
> +#define AMDGPU_MUX_DELAY_JIFFIES_TIMEOUT (HZ / 10)
> +
> +/* The time threshold of unsignaled fence that trigger mcbp */
>   #define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
>   
>   static const struct ring_info {
> @@ -47,6 +53,69 @@ static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ri
>   			&mux->ring_entry[ring->entry_index] : NULL;
>   }
>   
> +static uint32_t ring_priority_to_ratio_pct(unsigned int hw_prio)
> +{
> +	uint32_t ratio;
> +
> +	switch (hw_prio) {
> +	case AMDGPU_RING_PRIO_DEFAULT:
> +		ratio = 30;
> +		break;
> +	case AMDGPU_RING_PRIO_2:
> +		ratio = 100;
> +		break;
> +	default:
> +		ratio = 100;
> +	}
> +	return ratio;
> +}
> +
> +static void reset_wcnt_on_all_rings(struct amdgpu_ring_mux *mux)
> +{
> +	int i;
> +
> +	for (i = 0; i < mux->num_ring_entries; i++)
> +		mux->ring_entry[i].w_cnt = 0;
> +}
> +
> +/**
> + * Decide if the low priority ring task should be delayed when there are high
> + * priority ibs ongoing. If all the high priority fences are signaled at that
> + * time, gpu is idle, we do not need to wait. Otherwise we calculate the
> + * percentage of portions copying ibs on the current ring and compare with the
> + * threshold according to the priority.
> + */
> +static bool proceed_on_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_ring *high_prio_ring;
> +	u64 current_cnt, total_cnt = 0;
> +	int i;
> +
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		if (mux->ring_entry[i].ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
> +			high_prio_ring = mux->ring_entry[i].ring;
> +			break;
> +		}
> +	}
> +
> +	/*All high priority fences signaled indicates gpu is idle.*/
> +	if (amdgpu_fence_count_emitted(high_prio_ring) == 0) {
> +		reset_wcnt_on_all_rings(mux);
> +		return true;
> +	}
> +
> +	for (i = 0; i < mux->num_ring_entries; i++) {
> +		if (mux->ring_entry[i].ring->hw_prio == ring->hw_prio)
> +			current_cnt = mux->ring_entry[i].w_cnt;
> +		total_cnt += mux->ring_entry[i].w_cnt;
> +	}
> +
> +	if (total_cnt == 0)
> +		return true;
> +
> +	return ring_priority_to_ratio_pct(ring->hw_prio) > current_cnt * 100 / total_cnt;
> +}
> +
>   /* copy packages on sw ring range[begin, end) */
>   static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>   						  struct amdgpu_ring *ring,
> @@ -73,6 +142,13 @@ static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
>   	}
>   }
>   
> +/* delay low priotiry task depending on high priority rings fence signal condition*/
> +static void wait_on_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> +{
> +	wait_event_interruptible_timeout(mux->wait, proceed_on_ring(mux, ring),
> +					 AMDGPU_MUX_DELAY_JIFFIES_TIMEOUT);
> +}
> +
>   static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
>   {
>   	struct amdgpu_mux_entry *e = NULL;
> @@ -126,7 +202,6 @@ static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
>   static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
>   {
>   	struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
> -
>   	if (!spin_trylock(&mux->lock)) {
>   		amdgpu_ring_mux_schedule_resubmit(mux);
>   		DRM_ERROR("reschedule resubmit\n");
> @@ -158,6 +233,7 @@ int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
>   	}
>   
>   	spin_lock_init(&mux->lock);
> +	init_waitqueue_head(&mux->wait);
>   	timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
>   
>   	return 0;
> @@ -205,8 +281,10 @@ void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *r
>   {
>   	struct amdgpu_mux_entry *e;
>   
> -	spin_lock(&mux->lock);
> +	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && !proceed_on_ring(mux, ring))
> +		wait_on_ring(mux, ring);
>   
> +	spin_lock(&mux->lock);
>   	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
>   		amdgpu_mux_resubmit_chunks(mux);
>   
> @@ -238,7 +316,12 @@ void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *r
>   	} else {
>   		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
>   	}
> +
> +	e->w_cnt++;
>   	spin_unlock(&mux->lock);
> +
> +	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
> +		wake_up_interruptible(&mux->wait);
>   }
>   
>   u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
> @@ -373,7 +456,9 @@ int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
>   	spin_lock(&mux->lock);
>   	mux->pending_trailing_fence_signaled = true;
>   	r = amdgpu_ring_preempt_ib(mux->real_ring);
> +	reset_wcnt_on_all_rings(mux);
>   	spin_unlock(&mux->lock);
> +
>   	return r;
>   }
>   
> @@ -408,10 +493,6 @@ void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *r
>   	struct amdgpu_mux_entry *e;
>   	struct amdgpu_mux_chunk *chunk;
>   
> -	spin_lock(&mux->lock);
> -	amdgpu_mux_resubmit_chunks(mux);
> -	spin_unlock(&mux->lock);
> -
>   	e = amdgpu_ring_mux_sw_entry(mux, ring);
>   	if (!e) {
>   		DRM_ERROR("cannot find entry!\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> index aa758ebc86ae..a99647e33c9e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -39,6 +39,7 @@ struct amdgpu_ring;
>    * @sw_rptr: the read pointer in software ring.
>    * @sw_wptr: the write pointer in software ring.
>    * @list: list head for amdgpu_mux_chunk
> + * @w_cnt: the write count of the current ring.
>    */
>   struct amdgpu_mux_entry {
>   	struct                  amdgpu_ring *ring;
> @@ -48,6 +49,7 @@ struct amdgpu_mux_entry {
>   	u64                     sw_rptr;
>   	u64                     sw_wptr;
>   	struct list_head        list;
> +	u64                     w_cnt;
>   };
>   
>   struct amdgpu_ring_mux {
> @@ -64,6 +66,7 @@ struct amdgpu_ring_mux {
>   	struct timer_list       resubmit_timer;
>   
>   	bool                    pending_trailing_fence_signaled;
> +	wait_queue_head_t       wait;
>   };
>   
>   /**