[PATCH 06/18] drm/amdgpu/sriov:fix memory leak after gpu reset
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Sep 18 09:12:47 UTC 2017
Am 18.09.2017 um 08:11 schrieb Monk Liu:
> doing gpu reset will rerun all hw_init and thus
> ucode_init_bo is invoked again, so we need to skip
> the fw_buf allocation during sriov gpu reset to avoid
> memory leak.
>
> Change-Id: I31131eda1bd45ea2f5bdc50c5da5fc5a9fe9027d
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 64 +++++++++++++++----------------
> 2 files changed, 35 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 6ff2959..3d0c633 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1185,6 +1185,9 @@ struct amdgpu_firmware {
>
> /* gpu info firmware data pointer */
> const struct firmware *gpu_info_fw;
> +
> + void *fw_buf_ptr;
> + uint64_t fw_buf_mc;
> };
>
> /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> index f306374..6564902 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> @@ -360,8 +360,6 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
> int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
> {
> struct amdgpu_bo **bo = &adev->firmware.fw_buf;
> - uint64_t fw_mc_addr;
> - void *fw_buf_ptr = NULL;
> uint64_t fw_offset = 0;
> int i, err;
> struct amdgpu_firmware_info *ucode = NULL;
> @@ -372,37 +370,39 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
> return 0;
> }
>
> - err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
> - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
> - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
> - NULL, NULL, 0, bo);
> - if (err) {
> - dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
> - goto failed;
> - }
> + if (!amdgpu_sriov_vf(adev) || !adev->in_sriov_reset) {
Instead of all this better use amdgpu_bo_create_kernel(), this should
already include most of the handling necessary here.
Christian.
> + err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
> + amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
> + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
> + NULL, NULL, 0, bo);
> + if (err) {
> + dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
> + goto failed;
> + }
>
> - err = amdgpu_bo_reserve(*bo, false);
> - if (err) {
> - dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err);
> - goto failed_reserve;
> - }
> + err = amdgpu_bo_reserve(*bo, false);
> + if (err) {
> + dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err);
> + goto failed_reserve;
> + }
>
> - err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
> - &fw_mc_addr);
> - if (err) {
> - dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err);
> - goto failed_pin;
> - }
> + err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
> + &adev->firmware.fw_buf_mc);
> + if (err) {
> + dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err);
> + goto failed_pin;
> + }
>
> - err = amdgpu_bo_kmap(*bo, &fw_buf_ptr);
> - if (err) {
> - dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err);
> - goto failed_kmap;
> - }
> + err = amdgpu_bo_kmap(*bo, &adev->firmware.fw_buf_ptr);
> + if (err) {
> + dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err);
> + goto failed_kmap;
> + }
>
> - amdgpu_bo_unreserve(*bo);
> + amdgpu_bo_unreserve(*bo);
> + }
>
> - memset(fw_buf_ptr, 0, adev->firmware.fw_size);
> + memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
>
> /*
> * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
> @@ -421,14 +421,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
> ucode = &adev->firmware.ucode[i];
> if (ucode->fw) {
> header = (const struct common_firmware_header *)ucode->fw->data;
> - amdgpu_ucode_init_single_fw(adev, ucode, fw_mc_addr + fw_offset,
> - (void *)((uint8_t *)fw_buf_ptr + fw_offset));
> + amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset,
> + adev->firmware.fw_buf_ptr + fw_offset);
> if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
> adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
> const struct gfx_firmware_header_v1_0 *cp_hdr;
> cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
> - amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset,
> - fw_buf_ptr + fw_offset);
> + amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset,
> + adev->firmware.fw_buf_ptr + fw_offset);
> fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
> }
> fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE);
More information about the amd-gfx
mailing list