[PATCH] drm/i915/gvt: handle workload lifecycle properly

Wed Mar 1 10:04:20 UTC 2017

On 2017.02.27 17:34:51 +0800, Chuanxiao Dong wrote:
> Currently i915 has a request replay mechanism which can make sure
> the request can be replayed after a GPU reset. With this mechanism,
> gvt should wait until the GVT request seqno passed before complete
> the current workload. So that there should be a context switch interrupt
> come before gvt free the workload. In this way, workload lifecylce
> matches with the i915 request lifecycle. The workload can only be freed
> after the request is completed.
> 
> Signed-off-by: Chuanxiao Dong <chuanxiao.dong at intel.com>
> ---
>  drivers/gpu/drm/i915/gvt/scheduler.c | 49 ++++++++++++++++++++++++------------
>  1 file changed, 33 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index e355a82..0967ca9 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -151,6 +151,15 @@ static int shadow_context_status_change(struct notifier_block *nb,
>  	case INTEL_CONTEXT_SCHEDULE_OUT:
>  		intel_gvt_restore_render_mmio(workload->vgpu,
>  					      workload->ring_id);
> +		/* If the status is -EINPROGRESS means this workload
> +		 * doesn't meet any issue during dispatching so when
> +		 * get the SCHEDULE_OUT set the status to be zero for
> +		 * good. If the status is NOT -EINPROGRESS means there
> +		 * is something wrong happened during dispatching and
> +		 * the status should not be set to zero
> +		 */
> +		if (workload->status == -EINPROGRESS)
> +			workload->status = 0;
>  		atomic_set(&workload->shadow_ctx_active, 0);
>  		break;
>  	default:
> @@ -362,15 +371,23 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
>  	workload = scheduler->current_workload[ring_id];
>  	vgpu = workload->vgpu;
>  
> -	if (!workload->status && !vgpu->resetting) {
> +	/* For the workload w/ request, needs to wait for the context
> +	 * switch to make sure request is completed.
> +	 * For the workload w/o request, directly complete the workload.
> +	 */
> +	if (workload->req) {
>  		wait_event(workload->shadow_ctx_status_wq,
>  			   !atomic_read(&workload->shadow_ctx_active));
>  
> -		update_guest_context(workload);
> +		i915_gem_request_put(fetch_and_zero(&workload->req));
>  
> -		for_each_set_bit(event, workload->pending_events,
> -				 INTEL_GVT_EVENT_MAX)
> -			intel_vgpu_trigger_virtual_event(vgpu, event);
> +		if (!workload->status && !vgpu->resetting) {
> +			update_guest_context(workload);
> +
> +			for_each_set_bit(event, workload->pending_events,
> +					 INTEL_GVT_EVENT_MAX)
> +				intel_vgpu_trigger_virtual_event(vgpu, event);
> +		}
>  	}
>  
>  	gvt_dbg_sched("ring id %d complete workload %p status %d\n",
> @@ -400,7 +417,6 @@ static int workload_thread(void *priv)
>  	int ring_id = p->ring_id;
>  	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
>  	struct intel_vgpu_workload *workload = NULL;
> -	long lret;
>  	int ret;
>  	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
>  	DEFINE_WAIT_FUNC(wait, woken_wake_function);
> @@ -449,23 +465,24 @@ static int workload_thread(void *priv)
>  
>  		gvt_dbg_sched("ring id %d wait workload %p\n",
>  				workload->ring_id, workload);
> -
> -		lret = i915_wait_request(workload->req,
> +retry:
> +		i915_wait_request(workload->req,
>  					 0, MAX_SCHEDULE_TIMEOUT);
> -		if (lret < 0) {
> -			workload->status = lret;
> -			gvt_err("fail to wait workload, skip\n");
> -		} else {
> -			workload->status = 0;
> +		/* I915 has replay mechanism and a request will be replayed
> +		 * if there is i915 reset. So the seqno will be updated anyway.
> +		 * If the seqno is not updated yet after waiting, which means
> +		 * the replay may still be in progress and we can wait again.
> +		 */
> +		if (!i915_gem_request_completed(workload->req)) {
> +			gvt_err("workload %p not completed, wait again\n",
> +					workload);

should this be error?

> +			goto retry;
>  		}

So looks we should actually need to wait context switch change notify
happened with correct workload status? And we need to check i915_wait_request()
return, as for normal state shouldn't need to check request complete again.

What if a bad guest workload that does cause gpu hang? Now gvt context is
not bannable but I think we should care for that case too.

>  
>  complete:
>  		gvt_dbg_sched("will complete workload %p, status: %d\n",
>  				workload, workload->status);
>  
> -		if (workload->req)
> -			i915_gem_request_put(fetch_and_zero(&workload->req));
> -
>  		complete_current_workload(gvt, ring_id);
>  
>  		if (need_force_wake)
> -- 
> 2.7.4
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev

-- 
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 163 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20170301/e5227529/attachment.sig>