[PATCH] drm/amdgpu: Fix module unload hang by KIQ IRQ set

Yu, Xiangliang Xiangliang.Yu at amd.com
Fri Feb 17 06:15:06 UTC 2017


Hi Trigger,

Can you try to move src->data= NULL into gfx_v8_0_kiq_set_interrupt_state function? I think it is more simple and clear.

Thanks!
Xiangliang Yu

> -----Original Message-----
> From: Trigger Huang [mailto:trigger.huang at amd.com]
> Sent: Thursday, February 16, 2017 6:48 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Liu, Monk <Monk.Liu at amd.com>; Yu, Xiangliang
> <Xiangliang.Yu at amd.com>; Huang, Trigger <Trigger.Huang at amd.com>
> Subject: [PATCH] drm/amdgpu: Fix module unload hang by KIQ IRQ set
> 
> In some cases, manually insmod/rmmod amdgpu is necessary. When
> unloading amdgpu, the KIQ IRQ enable/disable function will case system
> hang. The root cause is, in the sequence of function amdgpu_fini, the sw_fini
> of IP block AMD_IP_BLOCK_TYPE_GFX will be invoked earlier than that of
> AMD_IP_BLOCK_TYPE_IH. So continue to use the variable freed by
> AMD_IP_BLOCK_TYPE_GFX will cause system hang.
> 
> Signed-off-by: Trigger Huang <trigger.huang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  2 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 ++++++++---------
>  2 files changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 1db2e7b..be43d09 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -793,6 +793,8 @@ struct amdgpu_kiq {
>  	struct amdgpu_bo	*eop_obj;
>  	struct amdgpu_ring	ring;
>  	struct amdgpu_irq_src	irq;
> +	u32			me;
> +	u32			pipe;
>  };
> 
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 772c42b..04e2a5b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1390,12 +1390,15 @@ static int gfx_v8_0_kiq_init_ring(struct
> amdgpu_device *adev,
>  	if (adev->gfx.mec2_fw) {
>  		ring->me = 2;
>  		ring->pipe = 0;
> +		adev->gfx.kiq.me = 2;
> +		adev->gfx.kiq.pipe = 0;
>  	} else {
>  		ring->me = 1;
>  		ring->pipe = 1;
> +		adev->gfx.kiq.me = 1;
> +		adev->gfx.kiq.pipe = 1;
>  	}
> 
> -	irq->data = ring;
>  	ring->queue = 0;
>  	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring-
> >queue);
>  	r = amdgpu_ring_init(adev, ring, 1024, @@ -1410,7 +1413,6 @@ static
> void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,  {
>  	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
>  	amdgpu_ring_fini(ring);
> -	irq->data = NULL;
>  }
> 
>  #define MEC_HPD_SIZE 2048
> @@ -6927,15 +6929,12 @@ static int
> gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
>  					    enum amdgpu_interrupt_state
> state)  {
>  	uint32_t tmp, target;
> -	struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
> 
> -	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
> -
> -	if (ring->me == 1)
> +	if (adev->gfx.kiq.me == 1)
>  		target = mmCP_ME1_PIPE0_INT_CNTL;
>  	else
>  		target = mmCP_ME2_PIPE0_INT_CNTL;
> -	target += ring->pipe;
> +	target += adev->gfx.kiq.pipe;
> 
>  	switch (type) {
>  	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
> @@ -6973,7 +6972,7 @@ static int gfx_v8_0_kiq_irq(struct amdgpu_device
> *adev,
>  			    struct amdgpu_iv_entry *entry)
>  {
>  	u8 me_id, pipe_id, queue_id;
> -	struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
> +	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
> 
>  	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
> 
> @@ -7380,4 +7379,4 @@ static void
> gfx_v8_0_compute_mqd_soft_fini(struct amdgpu_device *adev)
> 
>  	ring = &adev->gfx.kiq.ring;
>  	amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr,
> (void **)&ring->mqd_ptr); -} \ No newline at end of file
> +}
> --
> 2.7.4



More information about the amd-gfx mailing list