[PATCH v5 4/8] drm/i915/gvt: introduce the vGPU LRU list

Wed Mar 29 03:26:37 UTC 2017

On 2017/3/29 11:02, Tian, Kevin wrote:
>> From: Ping Gao
>> Sent: Tuesday, March 28, 2017 2:48 PM
>>
>> vGPU LRU list is introduced to record the schedule status of all the vGPUs, the
> the -> 'so the'
>
>> longest unscheduled vGPU get the highest priority to schedule, it's mainly
> break sentence. ", it's" -> ". It's"
>
>> used to avoid guest driver timeout when its vGPU is not scheduled for a long
>> time. Besides that it keeps the round-robin scheduling fairness when
>> scheduler need choose a new head beacause of vGPU stop/kill.
> round-robin policy is there not just because of vGPU stop/kill. It's still
> the default policy when there is no urgent case from LRU list. You may
> want to fix this description.
>
>> Signed-off-by: Ping Gao <ping.a.gao at intel.com>
>> ---
>>  drivers/gpu/drm/i915/gvt/sched_policy.c | 76
>> ++++++++++++++++++++++++++++++---
>>  1 file changed, 71 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c
>> b/drivers/gpu/drm/i915/gvt/sched_policy.c
>> index c6523b9..5e167eb 100644
>> --- a/drivers/gpu/drm/i915/gvt/sched_policy.c
>> +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
>> @@ -49,6 +49,7 @@ static bool vgpu_has_pending_workload(struct
>> intel_vgpu *vgpu)
>>
>>  struct vgpu_sched_data {
>>  	struct list_head list;
>> +	struct list_head lru_list;
>>  	struct intel_vgpu *vgpu;
>>
>>  	ktime_t sched_in_time;
>> @@ -65,6 +66,7 @@ struct gvt_sched_data {
>>  	struct hrtimer timer;
>>  	unsigned long period;
>>  	struct list_head runq_head;
>> +	struct list_head lru_vgpu_head;
> just lru_head to be consistent with runq_head.
>
>>  };
>>
>>  static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) @@ -141,6
>> +143,36 @@ static struct intel_vgpu *find_busy_vgpu(struct list_head *head,
>>  	return vgpu;
>>  }
>>
>> +/* 1.5 second */
>> +#define VGPU_TDR_THRES_MS 1500
>> +
>> +static struct intel_vgpu *find_longest_unsched_vgpu(struct list_head
>> +*lru_vgpu_head) {
>> +	struct list_head *pos;
>> +	struct vgpu_sched_data *vgpu_data;
>> +	struct intel_vgpu *vgpu = NULL;
>> +
>> +	/* Find out the first active vGPU which has
>> +	 * pending workload in the lru list, the longest
>> +	 * unsched vGPU is the first one after head.
>> +	 */
>> +	list_for_each(pos, lru_vgpu_head) {
>> +		vgpu_data = container_of(pos, struct vgpu_sched_data,
>> lru_list);
>> +		if (vgpu_has_pending_workload(vgpu_data->vgpu)) {
>> +			vgpu = vgpu_data->vgpu;
>> +			break;
>> +		}
>> +	}
>> +
>> +	/* Return the vGPU if it's pending time exceed the threshold */
>> +	if (vgpu && (ktime_to_ms(ktime_sub(ktime_get(),
>> +						vgpu-
>>> last_ctx_submit_time)) >
>> +
>> 	VGPU_TDR_THRES_MS))
>> +		return vgpu;
>> +
>> +	return NULL;
>> +}
>> +
>>  static struct list_head *get_sched_head(struct gvt_sched_data *sched_data)
>> {
>>  	struct intel_gvt *gvt = sched_data->gvt; @@ -152,8 +184,17 @@
>> static struct list_head *get_sched_head(struct gvt_sched_data *sched_data)
>>  		cur_vgpu_data = scheduler->current_vgpu->sched_data;
>>  		head = &cur_vgpu_data->list;
>>  	} else {
>> +		struct vgpu_sched_data *lru_vgpu_data;
>> +
>>  		gvt_dbg_sched("no current vgpu search from q head\n");
>> -		head = &sched_data->runq_head;
>> +
>> +		/* Choose new head according to lru list to avoid
>> +		 * unfairness when current_vgpu set to NULL because
>> +		 * of the vGPU stop/kill.
>> +		 */
>> +		lru_vgpu_data = container_of(sched_data-
>>> lru_vgpu_head.next,
>> +					struct vgpu_sched_data, lru_list);
>> +		head = lru_vgpu_data->list.prev;
> Since later lru_list is specially handled, why do you need 
> handle it here?
>
>> +	next_vgpu = find_longest_unsched_vgpu(&sched_data-
>>> lru_vgpu_head);
> Also above sequence looks different from your earlier commit
> msg. You choose lru list here only when there is no current vgpu,
> i.e. not treating it as higher priority than normal runq.

When current_vgpu is set to NULL, there is no idea where to start to
pick up vgpu in the runq list,  here is to get the who is the LRU vGPU,
and then start to pick up this vGPU in runq.

So LRU list has two purpose :
1. help to find urgent vgpu.
2. help to find a proper point in the runq where start to pick when
current_vgpu set to NULL or idle vGPU in next patch.

I need change the commit msg :)

>
>
>>  	}
>>
>>  	return head;
>> @@ -161,8 +202,9 @@ static struct list_head *get_sched_head(struct
>> gvt_sched_data *sched_data)
>>
>>  static struct intel_vgpu *pickup_next_vgpu(struct gvt_sched_data
>> *sched_data)  {
>> -	struct intel_vgpu *next_vgpu = NULL;
>> -	struct list_head *head = NULL;
>> +	struct vgpu_sched_data *next_vgpu_data;
>> +	struct intel_vgpu *next_vgpu;
>> +	struct list_head *head;
>>
>>  	/* The scheduler follows round-robin style, sched
>>  	 * head means where start to choose next vGPU, it's @@ -172,8
>> +214,22 @@ static struct intel_vgpu *pickup_next_vgpu(struct
>> gvt_sched_data *sched_data)
>>  	 */
>>  	head = get_sched_head(sched_data);
>>
>> -	/* Choose the vGPU which has pending workload. */
>> -	next_vgpu = find_busy_vgpu(head, sched_data);
>> +	/* Find out the vGPU which have not been scheduled near a max
>> +	 * threshold, then execute it immediately to avoid guest TDR.
>> +	 */
>> +	next_vgpu = find_longest_unsched_vgpu(&sched_data-
>>> lru_vgpu_head);
>> +	if (next_vgpu) {
>> +		next_vgpu_data = next_vgpu->sched_data;
>> +
>> +		/* Move the longest unsched vGPU after
>> +		 * current vGPU to keep fairness round-robin.
>> +		 */
>> +		list_del_init(&next_vgpu_data->list);
>> +		list_add(&next_vgpu_data->list, head);
>> +	} else {
>> +		/* Choose the vGPU which has pending workload */
>> +		next_vgpu = find_busy_vgpu(head, sched_data);
> it's another proof that looks you don't need lru logic in
> get_sched_head since it's only used here.
>
>> +	}
>>
>>  	return next_vgpu;
>>  }
>> @@ -184,6 +240,7 @@ static void tbs_sched_func(struct gvt_sched_data
>> *sched_data)  {
>>  	struct intel_gvt *gvt = sched_data->gvt;
>>  	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
>> +	struct vgpu_sched_data *vgpu_data;
>>  	struct intel_vgpu *vgpu = NULL;
>>
>>  	/* no active vgpu or has already had a target */ @@ -195,6 +252,11
>> @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
>>  	if (vgpu) {
>>  		scheduler->next_vgpu = vgpu;
>>  		gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
>> +
>> +		/* Move the last used vGPU to the tail of lru_list */
>> +		vgpu_data = vgpu->sched_data;
>> +		list_del_init(&vgpu_data->lru_list);
>> +		list_add_tail(&vgpu_data->lru_list, &sched_data-
>>> lru_vgpu_head);
>>  	}
>>  out:
>>  	if (scheduler->next_vgpu) {
>> @@ -238,6 +300,7 @@ static int tbs_sched_init(struct intel_gvt *gvt)
>>  		return -ENOMEM;
>>
>>  	INIT_LIST_HEAD(&data->runq_head);
>> +	INIT_LIST_HEAD(&data->lru_vgpu_head);
>>  	hrtimer_init(&data->timer, CLOCK_MONOTONIC,
>> HRTIMER_MODE_ABS);
>>  	data->timer.function = tbs_timer_fn;
>>  	data->period = GVT_DEFAULT_TIME_SLICE; @@ -270,6 +333,7 @@
>> static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
>>
>>  	data->vgpu = vgpu;
>>  	INIT_LIST_HEAD(&data->list);
>> +	INIT_LIST_HEAD(&data->lru_list);
>>
>>  	vgpu->sched_data = data;
>>
>> @@ -291,6 +355,7 @@ static void tbs_sched_start_schedule(struct
>> intel_vgpu *vgpu)
>>  		return;
>>
>>  	list_add_tail(&vgpu_data->list, &sched_data->runq_head);
>> +	list_add_tail(&vgpu_data->lru_list, &sched_data->lru_vgpu_head);
>>
>>  	if (!hrtimer_active(&sched_data->timer))
>>  		hrtimer_start(&sched_data->timer,
>> ktime_add_ns(ktime_get(), @@ -302,6 +367,7 @@ static void
>> tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
>>  	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
>>
>>  	list_del_init(&vgpu_data->list);
>> +	list_del_init(&vgpu_data->lru_list);
>>  }
>>
>>  static struct intel_gvt_sched_policy_ops tbs_schedule_ops = {
>> --
>> 2.7.4
>>
>> _______________________________________________
>> intel-gvt-dev mailing list
>> intel-gvt-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev