[Mesa-dev] [PATCH 2/6] radv/winsys: remove the max IBs per submit limit for the fallback path

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Jul 4 13:06:49 UTC 2018


The chained submission is the fastest path and it should now
be used more often than before.

This breaks some CTS when HiZ is enabled, flushing DB_META
at the end of cmdbufs fixes the regressions. I guess there is
a missing flush somewhere.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 103 ++++++++++--------
 1 file changed, 55 insertions(+), 48 deletions(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 848e81924f..d99fffd431 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -886,66 +886,73 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
 	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
 	amdgpu_bo_list_handle bo_list;
-	struct amdgpu_cs_request request;
-	bool emit_signal_sem = sem_info->cs_emit_signal;
+	struct amdgpu_cs_request request = {};
+	struct amdgpu_cs_ib_info *ibs;
+	struct radv_amdgpu_cs *cs0;
+	unsigned number_of_ibs;
+
 	assert(cs_count);
+	cs0 = radv_amdgpu_cs(cs_array[0]);
 
-	for (unsigned i = 0; i < cs_count;) {
-		struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
-		struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
-		struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
-		unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
-		                    cs_count - i);
+	/* Compute the number of IBs for this submit. */
+	number_of_ibs = cs_count + !!initial_preamble_cs;
 
-		memset(&request, 0, sizeof(request));
+	/* Create a buffer object list. */
+	r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0,
+				       initial_preamble_cs, radv_bo_list,
+				       &bo_list);
+	if (r) {
+		fprintf(stderr, "amdgpu: buffer list creation failed "
+				"for the fallback submission (%d)\n", r);
+		return r;
+	}
 
-		r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
-		                               preamble_cs, radv_bo_list, &bo_list);
-		if (r) {
-			fprintf(stderr, "amdgpu: buffer list creation failed "
-					"for the fallback submission (%d)\n", r);
-			return r;
-		}
+	ibs = malloc(number_of_ibs * sizeof(*ibs));
+	if (!ibs) {
+		if (bo_list)
+			amdgpu_bo_list_destroy(bo_list);
+		return -ENOMEM;
+	}
 
-		request.ip_type = cs0->hw_ip;
-		request.ring = queue_idx;
-		request.resources = bo_list;
-		request.number_of_ibs = cnt + !!preamble_cs;
-		request.ibs = ibs;
-		request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
+	/* Configure the CS request. */
+	if (initial_preamble_cs)
+		ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
 
-		if (preamble_cs) {
-			ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
-		}
+	for (unsigned i = 0; i < cs_count; i++) {
+		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
 
-		for (unsigned j = 0; j < cnt; ++j) {
-			struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
-			ibs[j + !!preamble_cs] = cs->ib;
+		ibs[i + !!initial_preamble_cs] = cs->ib;
 
-			if (cs->is_chained) {
-				*cs->ib_size_ptr -= 4;
-				cs->is_chained = false;
-			}
+		if (cs->is_chained) {
+			*cs->ib_size_ptr -= 4;
+			cs->is_chained = false;
 		}
+	}
 
-		sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
-		r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
-		if (r) {
-			if (r == -ENOMEM)
-				fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
-			else
-				fprintf(stderr, "amdgpu: The CS has been rejected, "
-						"see dmesg for more information.\n");
-		}
+	request.ip_type = cs0->hw_ip;
+	request.ring = queue_idx;
+	request.resources = bo_list;
+	request.number_of_ibs = number_of_ibs;
+	request.ibs = ibs;
+	request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
 
-		if (bo_list)
-			amdgpu_bo_list_destroy(bo_list);
+	/* Submit the CS. */
+	r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
+	if (r) {
+		if (r == -ENOMEM)
+			fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
+		else
+			fprintf(stderr, "amdgpu: The CS has been rejected, "
+					"see dmesg for more information.\n");
+	}
 
-		if (r)
-			return r;
+	if (bo_list)
+		amdgpu_bo_list_destroy(bo_list);
+	free(ibs);
+
+	if (r)
+		return r;
 
-		i += cnt;
-	}
 	if (fence)
 		radv_amdgpu_request_to_fence(ctx, fence, &request);
 
@@ -1151,7 +1158,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
 	if (!cs->ws->use_ib_bos) {
 		ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array,
 							   cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
-	} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
+	} else if (can_patch && cs->ws->batchchain) {
 		ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array,
 							    cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
 	} else {
-- 
2.18.0



More information about the mesa-dev mailing list