[PATCH] drm/amd/amdgpu: Fix double unlock in amdgpu_mes_add_ring
Xiao, Jack
Jack.Xiao at amd.com
Wed Oct 9 06:10:56 UTC 2024
[AMD Official Use Only - AMD Internal Distribution Only]
Reviewed-by: Jack Xiao <Jack.Xiao at amd.com>
Regards,
Jack
-----Original Message-----
From: SHANMUGAM, SRINIVASAN <SRINIVASAN.SHANMUGAM at amd.com>
Sent: Tuesday, October 8, 2024 9:34 PM
To: Koenig, Christian <Christian.Koenig at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>
Cc: amd-gfx at lists.freedesktop.org; SHANMUGAM, SRINIVASAN <SRINIVASAN.SHANMUGAM at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>; Xiao, Jack <Jack.Xiao at amd.com>
Subject: [PATCH] drm/amd/amdgpu: Fix double unlock in amdgpu_mes_add_ring
This patch addresses a double unlock issue in the amdgpu_mes_add_ring function. The mutex was being unlocked twice under certain error conditions, which could lead to undefined behavior.
The fix ensures that the mutex is unlocked only once before jumping to the clean_up_memory label. The unlock operation is moved to just before the goto statement within the conditional block that checks the return value of amdgpu_ring_init. This prevents the second unlock attempt after the clean_up_memory label, which is no longer necessary as the mutex is already unlocked by this point in the code flow.
This change resolves the potential double unlock and maintains the correct mutex handling throughout the function.
Fixes below:
Commit d0c423b64765 ("drm/amdgpu/mes: use ring for kernel queue submission"), leads to the following Smatch static checker warning:
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1240 amdgpu_mes_add_ring()
warn: double unlock '&adev->mes.mutex_hidden' (orig line 1213)
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
1143 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
1144 int queue_type, int idx,
1145 struct amdgpu_mes_ctx_data *ctx_data,
1146 struct amdgpu_ring **out)
1147 {
1148 struct amdgpu_ring *ring;
1149 struct amdgpu_mes_gang *gang;
1150 struct amdgpu_mes_queue_properties qprops = {0};
1151 int r, queue_id, pasid;
1152
1153 /*
1154 * Avoid taking any other locks under MES lock to avoid circular
1155 * lock dependencies.
1156 */
1157 amdgpu_mes_lock(&adev->mes);
1158 gang = idr_find(&adev->mes.gang_id_idr, gang_id);
1159 if (!gang) {
1160 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
1161 amdgpu_mes_unlock(&adev->mes);
1162 return -EINVAL;
1163 }
1164 pasid = gang->process->pasid;
1165
1166 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
1167 if (!ring) {
1168 amdgpu_mes_unlock(&adev->mes);
1169 return -ENOMEM;
1170 }
1171
1172 ring->ring_obj = NULL;
1173 ring->use_doorbell = true;
1174 ring->is_mes_queue = true;
1175 ring->mes_ctx = ctx_data;
1176 ring->idx = idx;
1177 ring->no_scheduler = true;
1178
1179 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1180 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
1181 compute[ring->idx].mec_hpd);
1182 ring->eop_gpu_addr =
1183 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1184 }
1185
1186 switch (queue_type) {
1187 case AMDGPU_RING_TYPE_GFX:
1188 ring->funcs = adev->gfx.gfx_ring[0].funcs;
1189 ring->me = adev->gfx.gfx_ring[0].me;
1190 ring->pipe = adev->gfx.gfx_ring[0].pipe;
1191 break;
1192 case AMDGPU_RING_TYPE_COMPUTE:
1193 ring->funcs = adev->gfx.compute_ring[0].funcs;
1194 ring->me = adev->gfx.compute_ring[0].me;
1195 ring->pipe = adev->gfx.compute_ring[0].pipe;
1196 break;
1197 case AMDGPU_RING_TYPE_SDMA:
1198 ring->funcs = adev->sdma.instance[0].ring.funcs;
1199 break;
1200 default:
1201 BUG();
1202 }
1203
1204 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1205 AMDGPU_RING_PRIO_DEFAULT, NULL);
1206 if (r)
1207 goto clean_up_memory;
1208
1209 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
1210
1211 dma_fence_wait(gang->process->vm->last_update, false);
1212 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
1213 amdgpu_mes_unlock(&adev->mes);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1214
1215 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
1216 if (r)
1217 goto clean_up_ring;
^^^^^^^^^^^^^^^^^^
1218
1219 ring->hw_queue_id = queue_id;
1220 ring->doorbell_index = qprops.doorbell_off;
1221
1222 if (queue_type == AMDGPU_RING_TYPE_GFX)
1223 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
1224 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
1225 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
1226 queue_id);
1227 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
1228 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
1229 queue_id);
1230 else
1231 BUG();
1232
1233 *out = ring;
1234 return 0;
1235
1236 clean_up_ring:
1237 amdgpu_ring_fini(ring);
1238 clean_up_memory:
1239 kfree(ring);
--> 1240 amdgpu_mes_unlock(&adev->mes);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1241 return r;
1242 }
Fixes: d0c423b64765 ("drm/amdgpu/mes: use ring for kernel queue submission")
Cc: Christian König <christian.koenig at amd.com>
Cc: Alex Deucher <alexander.deucher at amd.com>
Cc: Hawking Zhang <Hawking.Zhang at amd.com>
Suggested-by: Jack Xiao <Jack.Xiao at amd.com> Reported by: Dan Carpenter <dan.carpenter at linaro.org>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 984bff25cfca..83d0f731fb65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1200,8 +1200,10 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
- if (r)
+ if (r) {
+ amdgpu_mes_unlock(&adev->mes);
goto clean_up_memory;
+ }
amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
@@ -1234,7 +1236,6 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
amdgpu_ring_fini(ring);
clean_up_memory:
kfree(ring);
- amdgpu_mes_unlock(&adev->mes);
return r;
}
--
2.34.1
More information about the amd-gfx
mailing list