[PATCH v2 5/5] drm/amdgpu:add VCN booting with firmware loaded by PSP

Fri Aug 17 23:25:53 UTC 2018

ROCm CQE is seeing what looks like hangs during amdgpu initialization on
Raven and Vega20. Amdgpu basically stops printing messages while trying
to load VCN firmware. It never completes initialization, but there is no
obvious error message. These are the last messages from amdgpu in the log:

[    1.282661] [drm] Found VCN firmware Version: 1.24 Family ID: 18
[    1.282664] [drm] PSP loading VCN firmware
[    1.303164] [drm] reserve 0x400000 from 0xf400e00000 for PSP TMR SIZE

Any applications trying to use /dev/dri/* hang with a backtrace like below.

Was this change expected to affect Raven and Vega20? Has it been tested
before submitting? Do we need updated VCN firmware for it to work?

Thanks,
  Felix

[  363.352985] INFO: task gpu-manager:937 blocked for more than 120 seconds.
[  363.352995]       Not tainted 4.18.0-rc1-kfd-compute-roc-master-8912 #1
[  363.352999] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  363.353004] gpu-manager     D    0   937      1 0x00000000
[  363.353008] Call Trace:
[  363.353018]  ? __schedule+0x3d9/0x8b0
[  363.353023]  schedule+0x32/0x80
[  363.353026]  schedule_preempt_disabled+0xa/0x10
[  363.353028]  __mutex_lock.isra.4+0x2ae/0x4e0
[  363.353031]  ? _cond_resched+0x16/0x40
[  363.353048]  ? drm_stub_open+0x2e/0x100 [drm]
[  363.353063]  drm_stub_open+0x2e/0x100 [drm]
[  363.353069]  chrdev_open+0xbe/0x1a0
[  363.353072]  ? cdev_put+0x20/0x20
[  363.353075]  do_dentry_open+0x1e2/0x300
[  363.353078]  path_openat+0x2b4/0x14b0
[  363.353082]  ? vsnprintf+0x230/0x4c0
[  363.353086]  ? __alloc_pages_nodemask+0x100/0x290
[  363.353088]  do_filp_open+0x99/0x110
[  363.353092]  ? generic_update_time+0x6a/0xc0
[  363.353094]  ? touch_atime+0xc1/0xd0
[  363.353096]  ? _cond_resched+0x16/0x40
[  363.353100]  ? do_sys_open+0x126/0x210
[  363.353102]  do_sys_open+0x126/0x210
[  363.353106]  do_syscall_64+0x4f/0x100
[  363.353110]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  363.353113] RIP: 0033:0x7f988f340040
[  363.353113] Code: Bad RIP value.
[  363.353120] RSP: 002b:00007ffecdefe618 EFLAGS: 00000246 ORIG_RAX: 0000000000000002
[  363.353123] RAX: ffffffffffffffda RBX: 0000000002337cd0 RCX: 00007f988f340040
[  363.353124] RDX: 00007ffecdefe67e RSI: 0000000000000002 RDI: 00007ffecdefe670
[  363.353125] RBP: 00007ffecdefe6a0 R08: 0000000000000000 R09: 000000000000000e
[  363.353126] R10: 000000000000069d R11: 0000000000000246 R12: 0000000000401b40
[  363.353127] R13: 00007ffecdefe910 R14: 0000000000000000 R15: 0000000000000000

On 2018-08-09 12:31 PM, James Zhu wrote:
> From: Likun Gao <Likun.Gao at amd.com>
>
> Setup psp firmware loading for VCN, and make VCN block
> booting from tmr mac address.
>
> Signed-off-by: James Zhu <James.Zhu at amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 17 +++++++++------
>  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c   | 38 ++++++++++++++++++++++++++-------
>  2 files changed, 40 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index 878f62c..77c192a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -111,9 +111,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
>  			version_major, version_minor, family_id);
>  	}
>  
> -	bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
> -		  +  AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
> +	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
>  		  +  AMDGPU_VCN_SESSION_SIZE * 40;
> +	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
> +		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
>  	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
>  				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
>  				    &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
> @@ -189,11 +190,13 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
>  		unsigned offset;
>  
>  		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
> -		offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
> -		memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
> -			    le32_to_cpu(hdr->ucode_size_bytes));
> -		size -= le32_to_cpu(hdr->ucode_size_bytes);
> -		ptr += le32_to_cpu(hdr->ucode_size_bytes);
> +		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
> +			offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
> +			memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
> +				    le32_to_cpu(hdr->ucode_size_bytes));
> +			size -= le32_to_cpu(hdr->ucode_size_bytes);
> +			ptr += le32_to_cpu(hdr->ucode_size_bytes);
> +		}
>  		memset_io(ptr, 0, size);
>  	}
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> index 2ce91a7..74c4ef4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> @@ -100,6 +100,16 @@ static int vcn_v1_0_sw_init(void *handle)
>  	if (r)
>  		return r;
>  
> +	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
> +		const struct common_firmware_header *hdr;
> +		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
> +		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
> +		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
> +		adev->firmware.fw_size +=
> +			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
> +		DRM_INFO("PSP loading VCN firmware\n");
> +	}
> +
>  	r = amdgpu_vcn_resume(adev);
>  	if (r)
>  		return r;
> @@ -265,26 +275,38 @@ static int vcn_v1_0_resume(void *handle)
>  static void vcn_v1_0_mc_resume(struct amdgpu_device *adev)
>  {
>  	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
> -
> -	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
> +	uint32_t offset;
> +
> +	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
> +		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
> +			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
> +		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
> +			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
> +		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
> +		offset = 0;
> +	} else {
> +		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
>  			lower_32_bits(adev->vcn.gpu_addr));
> -	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
> +		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
>  			upper_32_bits(adev->vcn.gpu_addr));
> -	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
> +		offset = size;
> +		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
>  				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
> +	}
> +
>  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
>  
>  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
> -			lower_32_bits(adev->vcn.gpu_addr + size));
> +			lower_32_bits(adev->vcn.gpu_addr + offset));
>  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
> -			upper_32_bits(adev->vcn.gpu_addr + size));
> +			upper_32_bits(adev->vcn.gpu_addr + offset));
>  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
>  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE);
>  
>  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
> -			lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE));
> +			lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE));
>  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
> -			upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE));
> +			upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE));
>  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
>  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
>  			AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40));