[PATCH] drm/amdgpu: fix missed gpu info firmware when cache firmware during S3
Christian König
deathsimple at vodafone.de
Tue Jun 6 08:00:29 UTC 2017
Hi Ray,
mhm, indeed a nice catch.
But why do we need to load the gpu info after resume in the first place?
I mean we already know what GPU we have, loading it again looks
superfluous to me.
Regards,
Christian.
Am 06.06.2017 um 08:24 schrieb Huang Rui:
> gpu_info firmware is released after data is used. But when system enters into
> suspend, upper class driver will cache all firmware names. At that time,
> gpu_info will be failing to load. It seems an upper class issue, that we should
> not release gpu_info firmware until device finished.
>
> [ 903.236589] cache_firmware: amdgpu/vega10_sdma1.bin
> [ 903.236590] fw_set_page_data: fw-amdgpu/vega10_sdma1.bin buf=ffff88041eee10c0 data=ffffc90002561000 size=17408
> [ 903.236591] cache_firmware: amdgpu/vega10_sdma1.bin ret=0
> [ 903.464160] __allocate_fw_buf: fw-amdgpu/vega10_gpu_info.bin buf=ffff88041eee2c00
> [ 903.471815] (NULL device *): loading /lib/firmware/updates/4.11.0-custom/amdgpu/vega10_gpu_info.bin failed with error -2
> [ 903.482870] (NULL device *): loading /lib/firmware/updates/amdgpu/vega10_gpu_info.bin failed with error -2
> [ 903.492716] (NULL device *): loading /lib/firmware/4.11.0-custom/amdgpu/vega10_gpu_info.bin failed with error -2
> [ 903.503156] (NULL device *): direct-loading amdgpu/vega10_gpu_info.bin
>
> Signed-off-by: Huang Rui <ray.huang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++++++++++--------
> 2 files changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8658643..54ee050 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1272,6 +1272,9 @@ struct amdgpu_firmware {
> const struct amdgpu_psp_funcs *funcs;
> struct amdgpu_bo *rbuf;
> struct mutex mutex;
> +
> + /* gpu info firmware data pointer */
> + const struct firmware *fw;
> };
>
> /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 6883fe1..af8f8b3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1426,12 +1426,13 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
>
> static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
> {
> - const struct firmware *fw;
> const char *chip_name;
> char fw_name[30];
> int err;
> const struct gpu_info_firmware_header_v1_0 *hdr;
>
> + adev->firmware.fw = NULL;
> +
> switch (adev->asic_type) {
> case CHIP_TOPAZ:
> case CHIP_TONGA:
> @@ -1466,14 +1467,14 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
> }
>
> snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
> - err = request_firmware(&fw, fw_name, adev->dev);
> + err = request_firmware(&adev->firmware.fw, fw_name, adev->dev);
> if (err) {
> dev_err(adev->dev,
> "Failed to load gpu_info firmware \"%s\"\n",
> fw_name);
> goto out;
> }
> - err = amdgpu_ucode_validate(fw);
> + err = amdgpu_ucode_validate(adev->firmware.fw);
> if (err) {
> dev_err(adev->dev,
> "Failed to validate gpu_info firmware \"%s\"\n",
> @@ -1481,14 +1482,14 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
> goto out;
> }
>
> - hdr = (const struct gpu_info_firmware_header_v1_0 *)fw->data;
> + hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.fw->data;
> amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
>
> switch (hdr->version_major) {
> case 1:
> {
> const struct gpu_info_firmware_v1_0 *gpu_info_fw =
> - (const struct gpu_info_firmware_v1_0 *)(fw->data +
> + (const struct gpu_info_firmware_v1_0 *)(adev->firmware.fw->data +
> le32_to_cpu(hdr->header.ucode_array_offset_bytes));
>
> adev->gfx.config.max_shader_engines = gpu_info_fw->gc_num_se;
> @@ -1513,9 +1514,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
> goto out;
> }
> out:
> - release_firmware(fw);
> - fw = NULL;
> -
> return err;
> }
>
> @@ -2313,6 +2311,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
> amdgpu_fence_driver_fini(adev);
> amdgpu_fbdev_fini(adev);
> r = amdgpu_fini(adev);
> + if (adev->firmware.fw) {
> + release_firmware(adev->firmware.fw);
> + adev->firmware.fw = NULL;
> + }
> adev->accel_working = false;
> /* free i2c buses */
> if (!amdgpu_device_has_dc_support(adev))
More information about the amd-gfx
mailing list