[bug report] drm/amdgpu/mes: use ring for kernel queue submission

Dan Carpenter dan.carpenter at oracle.com
Wed Oct 26 09:35:58 UTC 2022


Hello Jack Xiao,

The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue
submission" from Mar 27, 2020, leads to the following Smatch static
checker warning:

	drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1056 amdgpu_mes_add_ring()
	error: format string overflow. buf_size: 16 length: 38 [user data]

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
    980 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
    981                         int queue_type, int idx,
    982                         struct amdgpu_mes_ctx_data *ctx_data,
    983                         struct amdgpu_ring **out)
    984 {
    985         struct amdgpu_ring *ring;
    986         struct amdgpu_mes_gang *gang;
    987         struct amdgpu_mes_queue_properties qprops = {0};
    988         int r, queue_id, pasid;
    989 
    990         /*
    991          * Avoid taking any other locks under MES lock to avoid circular
    992          * lock dependencies.
    993          */
    994         amdgpu_mes_lock(&adev->mes);
    995         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
    996         if (!gang) {
    997                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
    998                 amdgpu_mes_unlock(&adev->mes);
    999                 return -EINVAL;
    1000         }
    1001         pasid = gang->process->pasid;
    1002 
    1003         ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
    1004         if (!ring) {
    1005                 amdgpu_mes_unlock(&adev->mes);
    1006                 return -ENOMEM;
    1007         }
    1008 
    1009         ring->ring_obj = NULL;
    1010         ring->use_doorbell = true;
    1011         ring->is_mes_queue = true;
    1012         ring->mes_ctx = ctx_data;
    1013         ring->idx = idx;
    1014         ring->no_scheduler = true;
    1015 
    1016         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
    1017                 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
    1018                                       compute[ring->idx].mec_hpd);
    1019                 ring->eop_gpu_addr =
    1020                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
    1021         }
    1022 
    1023         switch (queue_type) {
    1024         case AMDGPU_RING_TYPE_GFX:
    1025                 ring->funcs = adev->gfx.gfx_ring[0].funcs;
    1026                 break;
    1027         case AMDGPU_RING_TYPE_COMPUTE:
    1028                 ring->funcs = adev->gfx.compute_ring[0].funcs;
    1029                 break;
    1030         case AMDGPU_RING_TYPE_SDMA:
    1031                 ring->funcs = adev->sdma.instance[0].ring.funcs;
    1032                 break;
    1033         default:
    1034                 BUG();
    1035         }
    1036 
    1037         r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
    1038                              AMDGPU_RING_PRIO_DEFAULT, NULL);
    1039         if (r)
    1040                 goto clean_up_memory;
    1041 
    1042         amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
    1043 
    1044         dma_fence_wait(gang->process->vm->last_update, false);
    1045         dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
    1046         amdgpu_mes_unlock(&adev->mes);
    1047 
    1048         r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
    1049         if (r)
    1050                 goto clean_up_ring;
    1051 
    1052         ring->hw_queue_id = queue_id;
    1053         ring->doorbell_index = qprops.doorbell_off;
    1054 
    1055         if (queue_type == AMDGPU_RING_TYPE_GFX)
--> 1056                 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);

I'm not sure why this is warning now instead of in 2020.  But the bug is
definitely real.  "gang_id" is capped at INT_MAX so that can overflow
already even if the values of "pasid" and "queue_id" are zero.

Using snprintf() is safer but also probably the buffer should be larger.

    1057         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
    1058                 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
    1059                         queue_id);
    1060         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
    1061                 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
    1062                         queue_id);
    1063         else
    1064                 BUG();
    1065 
    1066         *out = ring;
    1067         return 0;
    1068 
    1069 clean_up_ring:
    1070         amdgpu_ring_fini(ring);
    1071 clean_up_memory:
    1072         kfree(ring);
    1073         amdgpu_mes_unlock(&adev->mes);
    1074         return r;
    1075 }

regards,
dan carpenter


More information about the amd-gfx mailing list