[PATCH] drm/i915/gvt: Give new born vGPU higher scheduling chance

Wed Aug 22 09:51:17 UTC 2018

On 2018.08.22 16:45:35 +0800, Hang Yuan wrote:
> On 08/22/2018 04:18 PM, Zhenyu Wang wrote:
> > On 2018.08.22 16:03:38 +0800, Hang Yuan wrote:
> > > On 08/21/2018 10:23 AM, Zhenyu Wang wrote:
> > > > This trys to give new born vGPU with higher scheduling chance
> > > > not only with adding to sched list head and also have higher
> > > > priority for workload sched for 5 seconds after starting to
> > > > schedule it. In order for fast GPU execution during VM boot,
> > > > and ensure guest driver setup with required state given in time.
> > > > 
> > > > This fixes recent failure seen on one VM with multiple linux VMs running on
> > > > kernel with commit 2621cefaa42b3("drm/i915: Provide a timeout to
> > > > i915_gem_wait_for_idle() on setup"), which had shorter setup timeout
> > > > that caused context state init failed.
> > > > 
> > > > Cc: Yuan Hang <hang.yuan at intel.com>
> > > > Signed-off-by: Zhenyu Wang <zhenyuw at linux.intel.com>
> > > > ---
> > > >    drivers/gpu/drm/i915/gvt/sched_policy.c | 34 ++++++++++++++++++++-----
> > > >    1 file changed, 27 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
> > > > index 09d7bb72b4ff..0d28702ed545 100644
> > > > --- a/drivers/gpu/drm/i915/gvt/sched_policy.c
> > > > +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
> > > > @@ -47,11 +47,15 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
> > > >    	return false;
> > > >    }
> > > > +/* We give 5 seconds higher prio for vGPU during start */
> > > > +#define GVT_SCHED_VGPU_PRI_TIME  5
> > > > +
> > > >    struct vgpu_sched_data {
> > > >    	struct list_head lru_list;
> > > >    	struct intel_vgpu *vgpu;
> > > >    	bool active;
> > > > -
> > > > +	bool pri_sched;
> > > > +	ktime_t pri_time;
> > > >    	ktime_t sched_in_time;
> > > >    	ktime_t sched_time;
> > > >    	ktime_t left_ts;
> > > > @@ -183,6 +187,14 @@ static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
> > > >    		if (!vgpu_has_pending_workload(vgpu_data->vgpu))
> > > >    			continue;
> > > > +		if (vgpu_data->pri_sched) {
> > > > +			if (ktime_before(ktime_get(), vgpu_data->pri_time)) {
> > > > +				vgpu = vgpu_data->vgpu;
> > > > +				break;
> > > > +			} else
> > > > +				vgpu_data->pri_sched = false;
> > > > +		}
> > > > +
> > > >    		/* Return the vGPU only if it has time slice left */
> > > >    		if (vgpu_data->left_ts > 0) {
> > > >    			vgpu = vgpu_data->vgpu;
> > > > @@ -202,6 +214,7 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
> > > >    	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
> > > >    	struct vgpu_sched_data *vgpu_data;
> > > >    	struct intel_vgpu *vgpu = NULL;
> > > > +
> > > >    	/* no active vgpu or has already had a target */
> > > >    	if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
> > > >    		goto out;
> > > > @@ -209,12 +222,13 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
> > > >    	vgpu = find_busy_vgpu(sched_data);
> > > >    	if (vgpu) {
> > > >    		scheduler->next_vgpu = vgpu;
> > > > -
> > > > -		/* Move the last used vGPU to the tail of lru_list */
> > > >    		vgpu_data = vgpu->sched_data;
> > > > -		list_del_init(&vgpu_data->lru_list);
> > > > -		list_add_tail(&vgpu_data->lru_list,
> > > > -				&sched_data->lru_runq_head);
> > > > +		if (!vgpu_data->pri_sched) {
> > > > +			/* Move the last used vGPU to the tail of lru_list */
> > > > +			list_del_init(&vgpu_data->lru_list);
> > > > +			list_add_tail(&vgpu_data->lru_list,
> > > > +				      &sched_data->lru_runq_head);
> > > > +		}
> > > >    	} else {
> > > >    		scheduler->next_vgpu = gvt->idle_vgpu;
> > > >    	}
> > > Henry: just have a concern here. If another windows guest is already
> > > running. With the new born vGPU, the windows guest will not be scheduled for
> > > 5 seconds which exceeds windows default TDR time 2 seconds.
> > > 
> > 
> > 5 seconds is the length of time we will apply higher sched policy for new born vGPU,
> > e.g if new born vGPU has no workload, it won't be scheduled, but still keep on head
> > of lru_list. But if new born vGPU does always issue workloads during 5 seconds, it would
> > cause other guest not scheduled..Maybe we can do better for new born vGPU even when in
> > higher sched policy by looking up any other hungry vGPU? Other suggestion?
> > 
> Henry: Since the problem that this patch wants to solve happens on
> __intel_engines_record_defaults which I understand is the first i915_request
> of vGPU, so only the first workload of new born vGPU has higher priority
> instead of all workloads in 5 seconds?

All workloads from new born vGPU has higher chance to be scheduled during 5s.

> Or gvt scheduler calculates all pending workloads' awaiting time, and
> promote their priority if awaiting time exceeds one throttle to avoid TDR?
>

I was thinking to check if vgpu->left_ts == vgpu->allocated_ts, which means
that vgpu hasn't been scheduled in our default period, then we can put it in
fronter of lru list. Maybe 5s is too large, we might just change to 2s..

> > > > @@ -328,11 +342,17 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
> > > >    {
> > > >    	struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
> > > >    	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
> > > > +	ktime_t now;
> > > >    	if (!list_empty(&vgpu_data->lru_list))
> > > >    		return;
> > > > -	list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head);
> > > > +	now = ktime_get();
> > > > +	vgpu_data->pri_time = ktime_add(now,
> > > > +					ktime_set(GVT_SCHED_VGPU_PRI_TIME, 0));
> > > > +	vgpu_data->pri_sched = true;
> > > > +
> > > > +	list_add(&vgpu_data->lru_list, &sched_data->lru_runq_head);
> > > >    	if (!hrtimer_active(&sched_data->timer))
> > > >    		hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),
> > > > 
> > > 
> > 
> 

-- 
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20180822/923441b0/attachment-0001.sig>