[PATCH v4 6/9] drm/i915/gvt: introduce the vGPU LRU list

Mon Mar 20 12:23:36 UTC 2017

Will address all comments in next patch.

On 2017/3/15 11:16, Tian, Kevin wrote:
>> From: Ping Gao
>> Sent: Wednesday, March 8, 2017 2:25 PM
>>
>> vGPU LRU list introduced to record the schedule status of all the vGPUs, the
> 'is introduced'
>
>> longest unsched vGPU get the highest priority to schedule, it's mainly used to
> unscheduled
>
>> avoid guest driver trigger TDR because of vGPU service is non-available for a
> avoid guest driver timeout when its vGPU is not scheduled for a long time
>
>> long time. Besides that it keep the round-robin scheduling fairness when
> keep->keeps
>
>> scheduler need choose a new head beacause of vGPU stop/kill.
>>
>> Signed-off-by: Ping Gao <ping.a.gao at intel.com>
>> ---
>>  drivers/gpu/drm/i915/gvt/sched_policy.c | 77
>> ++++++++++++++++++++++++++++++---
>>  1 file changed, 72 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c
>> b/drivers/gpu/drm/i915/gvt/sched_policy.c
>> index e8a9db1..e0311c8 100644
>> --- a/drivers/gpu/drm/i915/gvt/sched_policy.c
>> +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
>> @@ -49,6 +49,7 @@ static bool vgpu_has_pending_workload(struct
>> intel_vgpu *vgpu)
>>
>>  struct vgpu_sched_data {
>>  	struct list_head list;
>> +	struct list_head lru_list;
>>  	struct intel_vgpu *vgpu;
>>
>>  	/* per-vgpu sched stats */
>> @@ -66,6 +67,7 @@ struct gvt_sched_data {
>>  	struct hrtimer timer;
>>  	unsigned long period;
>>  	struct list_head runq_head;
>> +	struct list_head lru_vgpu_head;
>>  };
>>
>>  static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) @@ -148,6
>> +150,37 @@ static struct intel_vgpu *get_vgpu_has_workload(struct
>> list_head *head,
>>  	return vgpu;
>>  }
>>
>> +/* 1.5 second */
>> +#define VGPU_TDR_THRES_MS  ((cycles_t)tsc_khz * 1500) #define
>> +is_idle_vgpu(vgpu) ((vgpu)->id == 0)
> break lines
>
>> +
>> +static struct intel_vgpu *find_longest_unsched_vgpu(struct list_head
>> +*lru_vgpu_head) {
>> +	struct list_head *pos;
>> +	struct vgpu_sched_data *vgpu_data;
>> +	struct intel_vgpu *vgpu = NULL;
>> +
>> +	/* Find out the first active vGPU which has
>> +	 * pending workload in the lru list, the longest
>> +	 * unsched vGPU is in the head.
> head->candidate?
>
>> +	 */
>> +	list_for_each(pos, lru_vgpu_head) {
>> +		vgpu_data = container_of(pos, struct vgpu_sched_data,
>> lru_list);
>> +		if (!is_idle_vgpu(vgpu_data->vgpu) &&
>> +				vgpu_has_pending_workload(vgpu_data-
>>> vgpu)) {
>> +			vgpu = vgpu_data->vgpu;
>> +			break;
>> +		}
>> +	}
>> +
>> +	/* Return the vGPU if it's pending time exceed the threshold */
>> +	if (vgpu && (get_cycles() - vgpu->last_ctx_submit_time >
>> +			VGPU_TDR_THRES_MS))
>> +		return vgpu;
>> +
>> +	return NULL;
>> +}
>> +
>>  static struct list_head *get_sched_head(struct gvt_sched_data *sched_data)
>> {
>>  	struct intel_gvt *gvt = sched_data->gvt; @@ -159,8 +192,17 @@
>> static struct list_head *get_sched_head(struct gvt_sched_data *sched_data)
>>  		cur_vgpu_data = scheduler->current_vgpu->sched_data;
>>  		head = &cur_vgpu_data->list;
>>  	} else {
>> +		struct vgpu_sched_data *lru_vgpu_data;
>> +
>>  		gvt_dbg_sched("no current vgpu search from q head\n");
>> -		head = &sched_data->runq_head;
>> +
>> +		/* Choose new head according to lru list to avoid
>> +		 * unfairness when current_vgpu set to NULL because
>> +		 * of the vGPU stop/kill.
>> +		 */
>> +		lru_vgpu_data = container_of(sched_data-
>>> lru_vgpu_head.next,
>> +					struct vgpu_sched_data, lru_list);
>> +		head = lru_vgpu_data->list.prev;
>>  	}
>>
>>  	return head;
>> @@ -168,8 +210,9 @@ static struct list_head *get_sched_head(struct
>> gvt_sched_data *sched_data)
>>
>>  static struct intel_vgpu *pickup_next_vgpu(struct gvt_sched_data
>> *sched_data)  {
>> -	struct intel_vgpu *next_vgpu = NULL;
>> -	struct list_head *head = NULL;
>> +	struct vgpu_sched_data *next_vgpu_data;
>> +	struct intel_vgpu *next_vgpu;
>> +	struct list_head *head;
>>
>>  	/* The scheduler is follow round-robin style, sched
>>  	 * head means where start to choose next vGPU, it's @@ -179,8
>> +222,22 @@ static struct intel_vgpu *pickup_next_vgpu(struct
>> gvt_sched_data *sched_data)
>>  	 */
>>  	head = get_sched_head(sched_data);
>>
>> -	/* Choose the vGPU which has pending workload. */
>> -	next_vgpu = get_vgpu_has_workload(head, sched_data);
>> +	/* Find out the vGPU which have not been scheduled near a max
>> +	 * threshold, then execute it immediately to avoid guest TDR.
>> +	 */
>> +	next_vgpu = find_longest_unsched_vgpu(&sched_data-
>>> lru_vgpu_head);
>> +	if (next_vgpu) {
>> +		next_vgpu_data = next_vgpu->sched_data;
>> +
>> +		/* Move the longest unsched vGPU after
>> +		 * current vGPU to keep fairness round-robin.
>> +		 */
>> +		list_del_init(&next_vgpu_data->list);
>> +		list_add(&next_vgpu_data->list, head);
>> +	} else {
>> +		/* Choose the vGPU which has pending workload */
>> +		next_vgpu = get_vgpu_has_workload(head, sched_data);
>> +	}
>>
>>  	return next_vgpu;
>>  }
>> @@ -191,6 +248,7 @@ static void tbs_sched_func(struct gvt_sched_data
>> *sched_data)  {
>>  	struct intel_gvt *gvt = sched_data->gvt;
>>  	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
>> +	struct vgpu_sched_data *vgpu_data;
>>  	struct intel_vgpu *vgpu = NULL;
>>
>>  	/* no active vgpu or has already had a target */ @@ -203,6 +261,11
>> @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
>>  	if (vgpu) {
>>  		scheduler->next_vgpu = vgpu;
>>  		gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
>> +
>> +		/* Move the last used vGPU to the tail of lru_list */
>> +		vgpu_data = vgpu->sched_data;
>> +		list_del_init(&vgpu_data->lru_list);
>> +		list_add_tail(&vgpu_data->lru_list, &sched_data-
>>> lru_vgpu_head);
>>  	}
>>  out:
>>  	if (scheduler->next_vgpu) {
>> @@ -246,6 +309,7 @@ static int tbs_sched_init(struct intel_gvt *gvt)
>>  		return -ENOMEM;
>>
>>  	INIT_LIST_HEAD(&data->runq_head);
>> +	INIT_LIST_HEAD(&data->lru_vgpu_head);
>>  	hrtimer_init(&data->timer, CLOCK_MONOTONIC,
>> HRTIMER_MODE_ABS);
>>  	data->timer.function = tbs_timer_fn;
>>  	data->period = GVT_DEFAULT_TIME_SLICE; @@ -278,6 +342,7 @@
>> static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
>>
>>  	data->vgpu = vgpu;
>>  	INIT_LIST_HEAD(&data->list);
>> +	INIT_LIST_HEAD(&data->lru_list);
>>
>>  	vgpu->sched_data = data;
>>  	return 0;
>> @@ -298,6 +363,7 @@ static void tbs_sched_start_schedule(struct
>> intel_vgpu *vgpu)
>>  		return;
>>
>>  	list_add_tail(&vgpu_data->list, &sched_data->runq_head);
>> +	list_add_tail(&vgpu_data->lru_list, &sched_data->lru_vgpu_head);
>>
>>  	atomic_inc(&vgpu->gvt->num_vgpu_sched);
>>  	if (atomic_read(&vgpu->gvt->num_vgpu_sched) ==
>> HAS_ACTIVE_VGPU_SCHED) @@ -314,6 +380,7 @@ static void
>> tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
>>  		return;
>>
>>  	list_del_init(&vgpu_data->list);
>> +	list_del_init(&vgpu_data->lru_list);
>>
>>  	atomic_dec(&vgpu->gvt->num_vgpu_sched);
>>  	if (atomic_read(&vgpu->gvt->num_vgpu_sched) ==
>> ONLY_IDLE_VGPU_SCHED)
>> --
>> 2.7.4
>>
>> _______________________________________________
>> intel-gvt-dev mailing list
>> intel-gvt-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev