[PATCH] drm/amd/display: Handle GPU reset for DC block

Kazlauskas, Nicholas nicholas.kazlauskas at amd.com
Wed May 20 15:46:21 UTC 2020


On 2020-05-20 11:29 a.m., Bhawanpreet Lakha wrote:
> [Why]
> Previously we used the s3 codepath for gpu reset. This can lead to issues in
> certain case where we end of waiting for fences which will never come (because
> parts of the hw are off due to gpu reset) and we end up waiting forever causing
> a deadlock.
> 
> [How]
> Handle GPU reset separately from normal s3 case. We essentially need to redo
> everything we do in s3, but avoid any drm calls.
> 
> For GPU reset case
> 
> suspend:
> 	-Acquire DC lock
> 	-Cache current dc_state
> 	-Commit 0 stream/planes to dc (this puts dc into a state where it can be
> 	 powered off)
> 	-Disable interrupts
> resume
> 	-Edit cached state to force full update
> 	-Commit cached state from suspend
> 	-Build stream and plane updates from the cached state
> 	-Commit stream/plane updates
> 	-Enable interrupts
> 	-Release DC lock

Some comments inline below, but mostly looks good.


> 
> Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha at amd.com>
> ---
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 175 +++++++++++++++++-
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |   1 +
>   2 files changed, 175 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 60fe64aef11b..46bb6e156f81 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -1521,10 +1521,109 @@ static int dm_hw_fini(void *handle)
>   	return 0;
>   }
>   
> +
> +static int dm_enable_vblank(struct drm_crtc *crtc);
> +static void dm_disable_vblank(struct drm_crtc *crtc);
> +
> +static void dm_gpureset_interrupt(struct amdgpu_device *adev,
> +				 struct dc_state *state, bool enable)
> +{

dm_gpureset_toggle_interrupts() might be more clear since this isn't an 
interrupt handler.

> +	enum dc_irq_source irq_source;
> +	struct amdgpu_crtc *acrtc;
> +	int rc = -EBUSY;
> +	int i = 0;
> +
> +	for (i = 0; i < state->stream_count; i++) {
> +		acrtc = get_crtc_by_otg_inst(
> +				adev, state->stream_status[i].primary_otg_inst);
> +
> +		if (acrtc && state->stream_status[i].plane_count != 0) {
> +			irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst;
> +			rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
> +			DRM_DEBUG("crtc %d - vupdate irq %sabling: r=%d\n",
> +				  acrtc->crtc_id, enable ? "en" : "dis", rc);
> +			if (rc)
> +				DRM_WARN("Failed to %s pflip interrupts\n",
> +					 enable ? "enable" : "disable");
> +
> +			if (enable){

Style nitpick, should be if (enable) {

> +				rc = dm_enable_vblank(&acrtc->base);
> +				if (rc)
> +					DRM_WARN("Failed to enable vblank interrupts\n");
> +			} else

Let's keep the } else {

}

here since we're already using on the if above.

> +				dm_disable_vblank(&acrtc->base);
> +
> +		}
> +	}
> +
> +}
> +
> +enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
> +{
> +	struct dc_state *context = NULL;
> +	enum dc_status res = DC_ERROR_UNEXPECTED;
> +	int i;
> +	struct dc_stream_state *del_streams[MAX_PIPES] = { 0 };

Let's use memset for this rather than = { 0 }; , some compilers complain.

> +	int del_streams_count = 0;
> +
> +	context = dc_create_state(dc);
> +	if (context == NULL)
> +		goto context_alloc_fail;
> +
> +	dc_resource_state_copy_construct_current(dc, context);
> +
> +	/* First remove from context all streams */
> +	for (i = 0; i < context->stream_count; i++) {
> +		struct dc_stream_state *stream = context->streams[i];

Need an extra blank line here.

> +		del_streams[del_streams_count++] = stream;
> +	}
> +
> +	/* Remove all planes for removed streams and then remove the streams */
> +	for (i = 0; i < del_streams_count; i++) {
> +		if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
> +			res = DC_FAIL_DETACH_SURFACES;
> +			goto fail;
> +		}
> +
> +		res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
> +		if (res != DC_OK)
> +			goto fail;
> +	}
> +
> +
> +	res = dc_validate_global_state(dc, context, false);
> +
> +	if (res != DC_OK) {
> +		DRM_ERROR("%s:resource validation failed, dc_status:%d\n", __func__, res);
> +		goto fail;
> +	}
> +
> +	res = dc_commit_state(dc, context);
> +
> +fail:
> +	dc_release_state(context);
> +
> +context_alloc_fail:
> +	return res;
> +}
> +
>   static int dm_suspend(void *handle)
>   {
>   	struct amdgpu_device *adev = handle;
>   	struct amdgpu_display_manager *dm = &adev->dm;
> +	int ret = 0;
> +
> +	if (adev->in_gpu_reset) {
> +		mutex_lock(&dm->dc_lock);
> +		dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
> +
> +		dm_gpureset_interrupt(adev, dm->cached_dc_state, false);
> +		
> +		amdgpu_dm_commit_zero_streams(dm->dc);
> +
> +		amdgpu_dm_irq_suspend(adev);

Probably should have a blank line here for style.

> +		return ret;
> +	}
>   
>   	WARN_ON(adev->dm.cached_state);
>   	adev->dm.cached_state = drm_atomic_helper_suspend(adev->ddev);
> @@ -1640,6 +1739,46 @@ static void emulated_link_detect(struct dc_link *link)
>   
>   }
>   
> +static void dm_gpureset_commit_state(struct dc_state *dc_state,
> +				     struct amdgpu_display_manager *dm)
> +{
> +	struct {
> +		struct dc_surface_update surface_updates[MAX_SURFACES];
> +		struct dc_plane_info plane_infos[MAX_SURFACES];
> +		struct dc_scaling_info scaling_infos[MAX_SURFACES];
> +		struct dc_flip_addrs flip_addrs[MAX_SURFACES];
> +		struct dc_stream_update stream_update;
> +	} * bundle;
> +	int k, m;
> +
> +	bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
> +
> +	if (!bundle) {
> +		dm_error("Failed to allocate update bundle\n");
> +		goto cleanup;
> +	}
> +
> +	for (k = 0; k < dc_state->stream_count; k++) {
> +		bundle->stream_update.stream = dc_state->streams[k];
> +
> +		for (m = 0; m < dc_state->stream_status->plane_count; m++) {
> +			bundle->surface_updates[m].surface =
> +				dc_state->stream_status->plane_states[m];
> +			bundle->surface_updates[m].surface->force_full_update =
> +				true;
> +		}
> +		dc_commit_updates_for_stream(
> +			dm->dc, bundle->surface_updates,
> +			dc_state->stream_status->plane_count,
> +			dc_state->streams[k], &bundle->stream_update, dc_state);
> +	}
> +
> +cleanup:
> +	kfree(bundle);
> +
> +	return;
> +}
> +
>   static int dm_resume(void *handle)
>   {
>   	struct amdgpu_device *adev = handle;
> @@ -1656,8 +1795,42 @@ static int dm_resume(void *handle)
>   	struct dm_plane_state *dm_new_plane_state;
>   	struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
>   	enum dc_connection_type new_connection_type = dc_connection_none;
> -	int i, r;
> +	struct dc_state *dc_state;
> +	int i, r, j;
> +
> +	if (adev->in_gpu_reset) {
> +		dc_state = dm->cached_dc_state;
>   
> +		r = dm_dmub_hw_init(adev);
> +		if (r)
> +			DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
> +
> +		dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
> +		dc_resume(dm->dc);
> +
> +		amdgpu_dm_irq_resume_early(adev);
> +
> +		for (i = 0; i < dc_state->stream_count; i++) {
> +			dc_state->streams[i]->mode_changed = true;
> +			for (j = 0; j < dc_state->stream_status->plane_count; j++) {
> +				dc_state->stream_status->plane_states[j]->update_flags.raw
> +					= 0xffffffff;
> +			}
> +		}
> +
> +		WARN_ON(!dc_commit_state(dm->dc, dc_state));
> +
> +		dm_gpureset_commit_state(dm->cached_dc_state, dm);
> +
> +		dm_gpureset_interrupt(adev, dm->cached_dc_state, true);
> +
> +		dm->cached_dc_state = NULL;

Shouldn't we free this?

Regards,
Nicholas Kazlauskas

> +		amdgpu_dm_irq_resume_late(adev);
> +
> +		mutex_unlock(&dm->dc_lock);
> +
> +		return 0;
> +	}
>   	/* Recreate dc_state - DC invalidates it when setting power state to S3. */
>   	dc_release_state(dm_state->context);
>   	dm_state->context = dc_create_state(dm->dc);
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index 3f0c6298b588..40c912e0bf0c 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -315,6 +315,7 @@ struct amdgpu_display_manager {
>   #endif
>   
>   	struct drm_atomic_state *cached_state;
> +	struct dc_state *cached_dc_state;
>   
>   	struct dm_comressor_info compressor;
>   
> 



More information about the amd-gfx mailing list