[PATCH v4 6/9] drm/i915/gvt: introduce the vGPU LRU list

Wed Mar 15 03:16:16 UTC 2017

> From: Ping Gao
> Sent: Wednesday, March 8, 2017 2:25 PM
> 
> vGPU LRU list introduced to record the schedule status of all the vGPUs, the

'is introduced'

> longest unsched vGPU get the highest priority to schedule, it's mainly used to

unscheduled

> avoid guest driver trigger TDR because of vGPU service is non-available for a

avoid guest driver timeout when its vGPU is not scheduled for a long time

> long time. Besides that it keep the round-robin scheduling fairness when

keep->keeps

> scheduler need choose a new head beacause of vGPU stop/kill.
> 
> Signed-off-by: Ping Gao <ping.a.gao at intel.com>
> ---
>  drivers/gpu/drm/i915/gvt/sched_policy.c | 77
> ++++++++++++++++++++++++++++++---
>  1 file changed, 72 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c
> b/drivers/gpu/drm/i915/gvt/sched_policy.c
> index e8a9db1..e0311c8 100644
> --- a/drivers/gpu/drm/i915/gvt/sched_policy.c
> +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
> @@ -49,6 +49,7 @@ static bool vgpu_has_pending_workload(struct
> intel_vgpu *vgpu)
> 
>  struct vgpu_sched_data {
>  	struct list_head list;
> +	struct list_head lru_list;
>  	struct intel_vgpu *vgpu;
> 
>  	/* per-vgpu sched stats */
> @@ -66,6 +67,7 @@ struct gvt_sched_data {
>  	struct hrtimer timer;
>  	unsigned long period;
>  	struct list_head runq_head;
> +	struct list_head lru_vgpu_head;
>  };
> 
>  static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) @@ -148,6
> +150,37 @@ static struct intel_vgpu *get_vgpu_has_workload(struct
> list_head *head,
>  	return vgpu;
>  }
> 
> +/* 1.5 second */
> +#define VGPU_TDR_THRES_MS  ((cycles_t)tsc_khz * 1500) #define
> +is_idle_vgpu(vgpu) ((vgpu)->id == 0)

break lines

> +
> +static struct intel_vgpu *find_longest_unsched_vgpu(struct list_head
> +*lru_vgpu_head) {
> +	struct list_head *pos;
> +	struct vgpu_sched_data *vgpu_data;
> +	struct intel_vgpu *vgpu = NULL;
> +
> +	/* Find out the first active vGPU which has
> +	 * pending workload in the lru list, the longest
> +	 * unsched vGPU is in the head.

head->candidate?

> +	 */
> +	list_for_each(pos, lru_vgpu_head) {
> +		vgpu_data = container_of(pos, struct vgpu_sched_data,
> lru_list);
> +		if (!is_idle_vgpu(vgpu_data->vgpu) &&
> +				vgpu_has_pending_workload(vgpu_data-
> >vgpu)) {
> +			vgpu = vgpu_data->vgpu;
> +			break;
> +		}
> +	}
> +
> +	/* Return the vGPU if it's pending time exceed the threshold */
> +	if (vgpu && (get_cycles() - vgpu->last_ctx_submit_time >
> +			VGPU_TDR_THRES_MS))
> +		return vgpu;
> +
> +	return NULL;
> +}
> +
>  static struct list_head *get_sched_head(struct gvt_sched_data *sched_data)
> {
>  	struct intel_gvt *gvt = sched_data->gvt; @@ -159,8 +192,17 @@
> static struct list_head *get_sched_head(struct gvt_sched_data *sched_data)
>  		cur_vgpu_data = scheduler->current_vgpu->sched_data;
>  		head = &cur_vgpu_data->list;
>  	} else {
> +		struct vgpu_sched_data *lru_vgpu_data;
> +
>  		gvt_dbg_sched("no current vgpu search from q head\n");
> -		head = &sched_data->runq_head;
> +
> +		/* Choose new head according to lru list to avoid
> +		 * unfairness when current_vgpu set to NULL because
> +		 * of the vGPU stop/kill.
> +		 */
> +		lru_vgpu_data = container_of(sched_data-
> >lru_vgpu_head.next,
> +					struct vgpu_sched_data, lru_list);
> +		head = lru_vgpu_data->list.prev;
>  	}
> 
>  	return head;
> @@ -168,8 +210,9 @@ static struct list_head *get_sched_head(struct
> gvt_sched_data *sched_data)
> 
>  static struct intel_vgpu *pickup_next_vgpu(struct gvt_sched_data
> *sched_data)  {
> -	struct intel_vgpu *next_vgpu = NULL;
> -	struct list_head *head = NULL;
> +	struct vgpu_sched_data *next_vgpu_data;
> +	struct intel_vgpu *next_vgpu;
> +	struct list_head *head;
> 
>  	/* The scheduler is follow round-robin style, sched
>  	 * head means where start to choose next vGPU, it's @@ -179,8
> +222,22 @@ static struct intel_vgpu *pickup_next_vgpu(struct
> gvt_sched_data *sched_data)
>  	 */
>  	head = get_sched_head(sched_data);
> 
> -	/* Choose the vGPU which has pending workload. */
> -	next_vgpu = get_vgpu_has_workload(head, sched_data);
> +	/* Find out the vGPU which have not been scheduled near a max
> +	 * threshold, then execute it immediately to avoid guest TDR.
> +	 */
> +	next_vgpu = find_longest_unsched_vgpu(&sched_data-
> >lru_vgpu_head);
> +	if (next_vgpu) {
> +		next_vgpu_data = next_vgpu->sched_data;
> +
> +		/* Move the longest unsched vGPU after
> +		 * current vGPU to keep fairness round-robin.
> +		 */
> +		list_del_init(&next_vgpu_data->list);
> +		list_add(&next_vgpu_data->list, head);
> +	} else {
> +		/* Choose the vGPU which has pending workload */
> +		next_vgpu = get_vgpu_has_workload(head, sched_data);
> +	}
> 
>  	return next_vgpu;
>  }
> @@ -191,6 +248,7 @@ static void tbs_sched_func(struct gvt_sched_data
> *sched_data)  {
>  	struct intel_gvt *gvt = sched_data->gvt;
>  	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
> +	struct vgpu_sched_data *vgpu_data;
>  	struct intel_vgpu *vgpu = NULL;
> 
>  	/* no active vgpu or has already had a target */ @@ -203,6 +261,11
> @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
>  	if (vgpu) {
>  		scheduler->next_vgpu = vgpu;
>  		gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
> +
> +		/* Move the last used vGPU to the tail of lru_list */
> +		vgpu_data = vgpu->sched_data;
> +		list_del_init(&vgpu_data->lru_list);
> +		list_add_tail(&vgpu_data->lru_list, &sched_data-
> >lru_vgpu_head);
>  	}
>  out:
>  	if (scheduler->next_vgpu) {
> @@ -246,6 +309,7 @@ static int tbs_sched_init(struct intel_gvt *gvt)
>  		return -ENOMEM;
> 
>  	INIT_LIST_HEAD(&data->runq_head);
> +	INIT_LIST_HEAD(&data->lru_vgpu_head);
>  	hrtimer_init(&data->timer, CLOCK_MONOTONIC,
> HRTIMER_MODE_ABS);
>  	data->timer.function = tbs_timer_fn;
>  	data->period = GVT_DEFAULT_TIME_SLICE; @@ -278,6 +342,7 @@
> static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
> 
>  	data->vgpu = vgpu;
>  	INIT_LIST_HEAD(&data->list);
> +	INIT_LIST_HEAD(&data->lru_list);
> 
>  	vgpu->sched_data = data;
>  	return 0;
> @@ -298,6 +363,7 @@ static void tbs_sched_start_schedule(struct
> intel_vgpu *vgpu)
>  		return;
> 
>  	list_add_tail(&vgpu_data->list, &sched_data->runq_head);
> +	list_add_tail(&vgpu_data->lru_list, &sched_data->lru_vgpu_head);
> 
>  	atomic_inc(&vgpu->gvt->num_vgpu_sched);
>  	if (atomic_read(&vgpu->gvt->num_vgpu_sched) ==
> HAS_ACTIVE_VGPU_SCHED) @@ -314,6 +380,7 @@ static void
> tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
>  		return;
> 
>  	list_del_init(&vgpu_data->list);
> +	list_del_init(&vgpu_data->lru_list);
> 
>  	atomic_dec(&vgpu->gvt->num_vgpu_sched);
>  	if (atomic_read(&vgpu->gvt->num_vgpu_sched) ==
> ONLY_IDLE_VGPU_SCHED)
> --
> 2.7.4
> 
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev