[PATCH v2 6/9] drm/i915/gvt: introduce the vGPU LRU list
Gao, Ping A
ping.a.gao at intel.com
Tue Feb 21 06:36:03 UTC 2017
On 2017/2/17 17:43, Tian, Kevin wrote:
>> From: Ping Gao
>> Sent: Tuesday, February 14, 2017 12:26 PM
>>
>> vGPU LRU list used to record the schedule status of all the vGPUs, the
> 'used to' means LRU list is already in old code. If new you should say
> like "LRU list is introduced to"
Sure, thanks!
>> longest unsched vGPU get the highest priority to schedule, it's
>> mainly used to void guest driver trigger TDR because of vGPU service
> void -> avoid
Sure, thanks!
>> is non-available for a long time. Besides that it keep the round-robin
>> scheduling fairness when scheduler need choose a new head beacause
>> of vGPU stop/kill.
>>
>> Signed-off-by: Ping Gao <ping.a.gao at intel.com>
>> ---
>> drivers/gpu/drm/i915/gvt/sched_policy.c | 75
>> ++++++++++++++++++++++++++++++---
>> 1 file changed, 70 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c
>> b/drivers/gpu/drm/i915/gvt/sched_policy.c
>> index c174ce6..4dcbdc3 100644
>> --- a/drivers/gpu/drm/i915/gvt/sched_policy.c
>> +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
>> @@ -49,6 +49,7 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
>>
>> struct tbs_vgpu_data {
>> struct list_head list;
>> + struct list_head lru_list;
>> struct intel_vgpu *vgpu;
>> /* put some per-vgpu sched stats here */
>> int64_t sched_in_time;
>> @@ -65,6 +66,7 @@ struct tbs_sched_data {
>> struct hrtimer timer;
>> unsigned long period;
>> struct list_head runq_head;
>> + struct list_head lru_vgpu_head;
>> };
>>
>> static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
>> @@ -141,6 +143,35 @@ static struct intel_vgpu *get_vgpu_timeslice_left(struct list_head
>> *head,
>> return vgpu;
>> }
>>
>> +/* 1.5 second */
>> +#define VGPU_TDR_THRES_MS ((cycles_t)tsc_khz * 1500)
>> +
>> +static struct intel_vgpu *get_vgpu_longest_unsched(struct list_head *lru_vgpu_head)
> find_longest_unsched_vgpu
OK!
>> +{
>> + struct list_head *pos;
>> + struct tbs_vgpu_data *vgpu_data;
>> + struct intel_vgpu *vgpu = NULL;
>> +
>> + /* Find out the first real vGPU which has
>> + * pending workload in the lru list.
>> + */
> what do you mean by 'real' vGPU?
All vGPU name as 'real' except the idle_vgpu.
>> + list_for_each(pos, lru_vgpu_head) {
>> + vgpu_data = container_of(pos, struct tbs_vgpu_data, lru_list);
>> + if (vgpu_data->vgpu->id != 0 &&
> better to define macro like is_idle_vgpu(vgpu)
OK.
>> + vgpu_has_pending_workload(vgpu_data->vgpu)) {
>> + vgpu = vgpu_data->vgpu;
>> + break;
>> + }
> need explaining that longest one is in the head.
Sure.
>> + }
>> +
>> + /* Return the vGPU if it's pending time exceed the threshold */
>> + if (vgpu && (get_cycles() - vgpu->last_ctx_submit_time >
>> + VGPU_TDR_THRES_MS))
> last_ctx_submit_time or last_ctx_switch_out_time?
Guest TDR timer start at the context submission point, so here is the
last_ctx_submit_time, no need care about the ctx_switch_out_time.
>> + return vgpu;
>> +
>> + return NULL;
>> +}
>> +
>> static struct list_head *get_sched_head(struct tbs_sched_data *sched_data)
>> {
>> struct intel_gvt *gvt = sched_data->gvt;
>> @@ -152,8 +183,17 @@ static struct list_head *get_sched_head(struct tbs_sched_data
>> *sched_data)
>> cur_vgpu_data = scheduler->current_vgpu->sched_data;
>> head = &cur_vgpu_data->list;
>> } else {
>> + struct tbs_vgpu_data *lru_vgpu_data;
>> +
>> gvt_dbg_sched("no current vgpu search from q head\n");
>> - head = &sched_data->runq_head;
>> +
>> + /* Choose new head according to lru list to avoid
>> + * unfairness when current_vgpu set to NULL because
>> + * of the vGPU stop/kill.
>> + */
>> + lru_vgpu_data = container_of(sched_data->lru_vgpu_head.next,
>> + struct tbs_vgpu_data, lru_list);
>> + head = lru_vgpu_data->list.prev;
>> }
>>
>> return head;
>> @@ -161,16 +201,31 @@ static struct list_head *get_sched_head(struct tbs_sched_data
>> *sched_data)
>>
>> static struct intel_vgpu *pickup_next_vgpu(struct tbs_sched_data *sched_data)
>> {
>> - struct intel_vgpu *next_vgpu = NULL;
>> - struct list_head *head = NULL;
>> + struct tbs_vgpu_data *next_vgpu_data;
>> + struct intel_vgpu *next_vgpu;
>> + struct list_head *head;
>>
>> /* The scheduler is follow round-robin style, sched
>> * head means where start to choose next vGPU, that's
>> * important to keep fairness. */
>> head = get_sched_head(sched_data);
>>
>> - /* Choose the vGPU which has timeslice left */
>> - next_vgpu = get_vgpu_timeslice_left(head, sched_data);
>> + /* Find out the vGPU which have not been scheduled near a max
>> + * threshold, then execute it immediately to avoid guest TDR.
>> + */
>> + next_vgpu = get_vgpu_longest_unsched(&sched_data->lru_vgpu_head);
>> + if (next_vgpu) {
>> + next_vgpu_data = next_vgpu->sched_data;
>> +
>> + /* Move the longest unsched vGPU after
>> + * current vGPU to keep fairness round-robin.
>> + */
>> + list_del_init(&next_vgpu_data->list);
>> + list_add(&next_vgpu_data->list, head);
>> + } else {
>> + /* Choose the vGPU which has timeslice left */
>> + next_vgpu = get_vgpu_timeslice_left(head, sched_data);
>> + }
>>
>> return next_vgpu;
>> }
>> @@ -181,6 +236,7 @@ static void tbs_sched_func(struct tbs_sched_data *sched_data)
>> {
>> struct intel_gvt *gvt = sched_data->gvt;
>> struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
>> + struct tbs_vgpu_data *vgpu_data;
>> struct intel_vgpu *vgpu = NULL;
>>
>> /* no vgpu or has already had a target */
>> @@ -192,6 +248,11 @@ static void tbs_sched_func(struct tbs_sched_data *sched_data)
>> if (vgpu) {
>> scheduler->next_vgpu = vgpu;
>> gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
>> +
>> + /* Move the last used vGPU to the tail of lru_list */
>> + vgpu_data = vgpu->sched_data;
>> + list_del_init(&vgpu_data->lru_list);
>> + list_add_tail(&vgpu_data->lru_list, &sched_data->lru_vgpu_head);
>> }
>> out:
>> if (scheduler->next_vgpu) {
>> @@ -235,6 +296,7 @@ static int tbs_sched_init(struct intel_gvt *gvt)
>> return -ENOMEM;
>>
>> INIT_LIST_HEAD(&data->runq_head);
>> + INIT_LIST_HEAD(&data->lru_vgpu_head);
>> hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
>> data->timer.function = tbs_timer_fn;
>> data->period = GVT_DEFAULT_TIME_SLICE;
>> @@ -267,6 +329,7 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
>>
>> data->vgpu = vgpu;
>> INIT_LIST_HEAD(&data->list);
>> + INIT_LIST_HEAD(&data->lru_list);
>>
>> vgpu->sched_data = data;
>> return 0;
>> @@ -287,6 +350,7 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
>> return;
>>
>> list_add_tail(&vgpu_data->list, &sched_data->runq_head);
>> + list_add_tail(&vgpu_data->lru_list, &sched_data->lru_vgpu_head);
>>
>> vgpu->gvt->num_vgpu_sched++;
>> if (vgpu->gvt->num_vgpu_sched == 2)
>> @@ -303,6 +367,7 @@ static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
>> return;
>>
>> list_del_init(&vgpu_data->list);
>> + list_del_init(&vgpu_data->lru_list);
>>
>> vgpu->gvt->num_vgpu_sched--;
>> if (vgpu->gvt->num_vgpu_sched == 1)
>> --
>> 2.7.4
>>
>> _______________________________________________
>> intel-gvt-dev mailing list
>> intel-gvt-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
More information about the intel-gvt-dev
mailing list