[PATCH v2] drm/amdgpu: Add -ENOMEM error handling when there is no memory

Alex Deucher alexdeucher at gmail.com
Mon Jul 24 13:22:05 UTC 2023


On Sun, Jul 23, 2023 at 8:53 AM Srinivasan Shanmugam
<srinivasan.shanmugam at amd.com> wrote:
>
> Return -ENOMEM, when there is no sufficient dynamically allocated memory
>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>
> v2:
>  - Added -ENOMEM - when it failed to create MQD backup,
>    while creating MQD for each KCQ
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 17 ++++++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c |  1 +
>  drivers/gpu/drm/amd/amdgpu/mes_v10_1.c   |  4 +++-
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c   |  4 +++-
>  4 files changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 6639fde5dd5c..c76b6bfc4dab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -110,9 +110,9 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
>   * The bitmask of CUs to be disabled in the shader array determined by se and
>   * sh is stored in mask[se * max_sh + sh].
>   */
> -void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
> +void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
>  {
> -       unsigned se, sh, cu;
> +       unsigned int se, sh, cu;
>         const char *p;
>
>         memset(mask, 0, sizeof(*mask) * max_se * max_sh);
> @@ -124,6 +124,7 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
>         for (;;) {
>                 char *next;
>                 int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
> +
>                 if (ret < 3) {
>                         DRM_ERROR("amdgpu: could not parse disable_cu\n");
>                         return;
> @@ -349,7 +350,7 @@ void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
>  }
>
>  int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
> -                       unsigned hpd_size, int xcc_id)
> +                       unsigned int hpd_size, int xcc_id)
>  {
>         int r;
>         u32 *hpd;
> @@ -376,7 +377,7 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
>
>  /* create MQD for each compute/gfx queue */
>  int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
> -                          unsigned mqd_size, int xcc_id)
> +                          unsigned int mqd_size, int xcc_id)
>  {
>         int r, i, j;
>         struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
> @@ -454,8 +455,10 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
>                         ring->mqd_size = mqd_size;
>                         /* prepare MQD backup */
>                         adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
> -                       if (!adev->gfx.mec.mqd_backup[j])
> +                       if (!adev->gfx.mec.mqd_backup[j]) {
>                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
> +                               return -ENOMEM;
> +                       }
>                 }
>         }
>
> @@ -1286,11 +1289,11 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
>         return sysfs_emit(buf, "%s\n", supported_partition);
>  }
>
> -static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR,
> +static DEVICE_ATTR(current_compute_partition, 0644,
>                    amdgpu_gfx_get_current_compute_partition,
>                    amdgpu_gfx_set_compute_partition);
>
> -static DEVICE_ATTR(available_compute_partition, S_IRUGO,
> +static DEVICE_ATTR(available_compute_partition, 0444,
>                    amdgpu_gfx_get_available_compute_partition, NULL);
>
>  int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 03dc59cbe8aa..7e91b24784e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -500,6 +500,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
>         hive = kzalloc(sizeof(*hive), GFP_KERNEL);
>         if (!hive) {
>                 dev_err(adev->dev, "XGMI: allocation failed\n");
> +               ret = -ENOMEM;
>                 hive = NULL;
>                 goto pro_end;
>         }
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
> index 36a123e6c8ee..eb06d749876f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
> @@ -909,10 +909,12 @@ static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev,
>
>         /* prepare MQD backup */
>         adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
> -       if (!adev->mes.mqd_backup[pipe])
> +       if (!adev->mes.mqd_backup[pipe]) {
>                 dev_warn(adev->dev,
>                          "no memory to create MQD backup for ring %s\n",
>                          ring->name);
> +               return -ENOMEM;
> +       }
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 8e67e965f7ea..c8be534cc60d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -1021,10 +1021,12 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
>
>         /* prepare MQD backup */
>         adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
> -       if (!adev->mes.mqd_backup[pipe])
> +       if (!adev->mes.mqd_backup[pipe]) {
>                 dev_warn(adev->dev,
>                          "no memory to create MQD backup for ring %s\n",
>                          ring->name);
> +               return -ENOMEM;
> +       }
>
>         return 0;
>  }
> --
> 2.25.1
>


More information about the amd-gfx mailing list