[PATCH 2/7] drm/amdgpu: Add ring reset callback for JPEG4_0_3

Christian König christian.koenig at amd.com
Fri Jan 31 16:26:14 UTC 2025


Am 31.01.25 um 17:23 schrieb Sathishkumar S:
> Add ring reset function callback for JPEG4_0_3 to
> recover from job timeouts without a full gpu reset.
>
> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 60 ++++++++++++++++++++++--
>   1 file changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> index be0b3b4c8690..62d8628dccc5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> @@ -204,9 +204,7 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
>   	if (r)
>   		return r;
>   
> -	/* TODO: Add queue reset mask when FW fully supports it */
> -	adev->jpeg.supported_reset =
> -		amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
> +	adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_PIPE;
>   	r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
>   	if (r)
>   		return r;
> @@ -231,6 +229,7 @@ static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
>   		return r;
>   
>   	amdgpu_jpeg_sysfs_reset_mask_fini(adev);
> +
>   	r = amdgpu_jpeg_sw_fini(adev);
>   
>   	return r;
> @@ -1099,6 +1098,60 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
>   	return 0;
>   }
>   
> +static int jpeg_v4_0_3_wait_for_idle_on_inst(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring *r;
> +	int ret, j;
> +
> +	for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
> +		r = &adev->jpeg.inst[ring->me].ring_dec[j];
> +		r->sched.ready = false;
> +	}
> +	/* publish update */
> +	smp_rmb();

Using smp_rmb() to publish the update is 100% incorrect.

What exactly are you trying to do?

Regards,
Christian.


> +	for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
> +		r = &adev->jpeg.inst[ring->me].ring_dec[j];
> +		if (r->pipe == j)
> +			continue;
> +		ret = SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, ring->me),
> +						regUVD_JRBC0_UVD_JRBC_STATUS,
> +						jpeg_v4_0_3_core_reg_offset(j),
> +						UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
> +						UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_ring *r;
> +	int ret, j;
> +
> +	if (amdgpu_sriov_vf(adev))
> +		return -EINVAL;
> +
> +	jpeg_v4_0_3_wait_for_idle_on_inst(ring);
> +	jpeg_v4_0_3_stop_inst(ring->adev, ring->me);
> +	jpeg_v4_0_3_start_inst(ring->adev, ring->me);
> +	for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
> +		r = &adev->jpeg.inst[ring->me].ring_dec[j];
> +		jpeg_v4_0_3_start_jrbc(r);
> +		ret = amdgpu_ring_test_helper(r);
> +		if (ret)
> +			return ret;
> +		r->sched.ready = true;
> +	}
> +	/* publish update */
> +	smp_rmb();
> +	dev_info(adev->dev, "Reset on %s succeeded\n", ring->sched.name);
> +	return 0;
> +}
> +
>   static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
>   	.name = "jpeg_v4_0_3",
>   	.early_init = jpeg_v4_0_3_early_init,
> @@ -1145,6 +1198,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
>   	.emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
>   	.emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
>   	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
> +	.reset = jpeg_v4_0_3_ring_reset,
>   };
>   
>   static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)



More information about the amd-gfx mailing list