[Mesa-dev] [PATCH 1/4] radv/amdgpu: Support a preamble CS.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Sun Jan 29 16:40:01 UTC 2017


Signed-off-by: Bas Nieuwenhuizen <basni at google.com>
---
 src/amd/vulkan/radv_device.c                  |  6 ++-
 src/amd/vulkan/radv_radeon_winsys.h           |  1 +
 src/amd/vulkan/radv_wsi.c                     |  2 +-
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 62 +++++++++++++++++++++------
 4 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 1505498c4bf..ad83f9f4eb1 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1021,7 +1021,8 @@ VkResult radv_QueueSubmit(
 			if (queue->device->trace_bo)
 				*queue->device->trace_id_ptr = 0;
 
-			ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance,
+			ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
+							advance, NULL,
 							(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
 							b ? pSubmits[i].waitSemaphoreCount : 0,
 							(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
@@ -1052,7 +1053,8 @@ VkResult radv_QueueSubmit(
 		if (!submitCount)
 			ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
 							   &queue->device->empty_cs[queue->queue_family_index],
-							   1, NULL, 0, NULL, 0, false, base_fence);
+							   1, NULL, NULL, 0, NULL, 0,
+							   false, base_fence);
 
 		fence->submitted = true;
 	}
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index a0b5092e300..bdb14395d0a 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -305,6 +305,7 @@ struct radeon_winsys {
 			 int queue_index,
 			 struct radeon_winsys_cs **cs_array,
 			 unsigned cs_count,
+			 struct radeon_winsys_cs *preamble_cs,
 			 struct radeon_winsys_sem **wait_sem,
 			 unsigned wait_sem_count,
 			 struct radeon_winsys_sem **signal_sem,
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 2f45961cf8c..9c9e1bb0a8d 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -367,7 +367,7 @@ VkResult radv_QueuePresentKHR(
 		struct radeon_winsys_ctx *ctx = queue->hw_ctx;
 		queue->device->ws->cs_submit(ctx, queue->queue_idx,
 					     &queue->device->empty_cs[queue->queue_family_index],
-					     1,
+					     1, NULL,
 					     (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores,
 					     pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence);
 		fence->submitted = true;
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index f7707f6c793..b58f5db0622 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -422,6 +422,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 				      struct radeon_winsys_cs **cs_array,
 				      unsigned count,
 				      struct radv_amdgpu_winsys_bo *extra_bo,
+				      struct radeon_winsys_cs *extra_cs,
 				      amdgpu_bo_list_handle *bo_list)
 {
 	int r;
@@ -448,7 +449,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 					  bo_list);
 		free(handles);
 		pthread_mutex_unlock(&ws->global_bo_list_lock);
-	} else if (count == 1 && !extra_bo) {
+	} else if (count == 1 && !extra_bo && !extra_cs) {
 		struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
 		r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
 					  cs->priorities, bo_list);
@@ -460,6 +461,10 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 			total_buffer_count += cs->num_buffers;
 		}
 
+		if (extra_cs) {
+			total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers;
+		}
+
 		amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
 		uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count);
 		if (!handles || !priorities) {
@@ -473,8 +478,14 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 			priorities[0] = 8;
 		}
 
-		for (unsigned i = 0; i < count; ++i) {
-			struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
+		for (unsigned i = 0; i < count + !!extra_cs; ++i) {
+			struct radv_amdgpu_cs *cs;
+
+			if (i == count)
+				cs = (struct radv_amdgpu_cs*)extra_cs;
+			else
+				cs = (struct radv_amdgpu_cs*)cs_array[i];
+
 			for (unsigned j = 0; j < cs->num_buffers; ++j) {
 				bool found = false;
 				for (unsigned k = 0; k < unique_bo_count; ++k) {
@@ -514,6 +525,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
 						int queue_idx,
 						struct radeon_winsys_cs **cs_array,
 						unsigned cs_count,
+						struct radeon_winsys_cs *preamble_cs,
 						struct radeon_winsys_fence *_fence)
 {
 	int r;
@@ -522,6 +534,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
 	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
 	amdgpu_bo_list_handle bo_list;
 	struct amdgpu_cs_request request = {0};
+	struct amdgpu_cs_ib_info ibs[2];
 
 	for (unsigned i = cs_count; i--;) {
 		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
@@ -545,7 +558,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
 		}
 	}
 
-	r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list);
+	r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, preamble_cs, &bo_list);
 	if (r) {
 		fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
 		return r;
@@ -557,6 +570,13 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
 	request.ibs = &cs0->ib;
 	request.resources = bo_list;
 
+	if (preamble_cs) {
+		request.ibs = ibs;
+		request.number_of_ibs = 2;
+		ibs[1] = cs0->ib;
+		ibs[0] = ((struct radv_amdgpu_cs*)preamble_cs)->ib;
+	}
+
 	r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
 	if (r) {
 		if (r == -ENOMEM)
@@ -580,6 +600,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
 						 int queue_idx,
 						 struct radeon_winsys_cs **cs_array,
 						 unsigned cs_count,
+						 struct radeon_winsys_cs *preamble_cs,
 						 struct radeon_winsys_fence *_fence)
 {
 	int r;
@@ -593,11 +614,13 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
 	for (unsigned i = 0; i < cs_count;) {
 		struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
 		struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
-		unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i);
+		unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
+		                    cs_count - i);
 
 		memset(&request, 0, sizeof(request));
 
-		r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list);
+		r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL,
+		                               preamble_cs, &bo_list);
 		if (r) {
 			fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
 			return r;
@@ -606,12 +629,16 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
 		request.ip_type = cs0->hw_ip;
 		request.ring = queue_idx;
 		request.resources = bo_list;
-		request.number_of_ibs = cnt;
+		request.number_of_ibs = cnt + !!preamble_cs;
 		request.ibs = ibs;
 
+		if (preamble_cs) {
+			ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
+		}
+
 		for (unsigned j = 0; j < cnt; ++j) {
 			struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
-			ibs[j] = cs->ib;
+			ibs[j + !!preamble_cs] = cs->ib;
 
 			if (cs->is_chained) {
 				*cs->ib_size_ptr -= 4;
@@ -647,6 +674,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
 					       int queue_idx,
 					       struct radeon_winsys_cs **cs_array,
 					       unsigned cs_count,
+					       struct radeon_winsys_cs *preamble_cs,
 					       struct radeon_winsys_fence *_fence)
 {
 	int r;
@@ -670,6 +698,9 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
 		unsigned cnt = 0;
 		unsigned size = 0;
 
+		if (preamble_cs)
+			size += preamble_cs->cdw;
+
 		while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
 			size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
 			++cnt;
@@ -680,6 +711,11 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
 		bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
 		ptr = ws->buffer_map(bo);
 
+		if (preamble_cs) {
+			memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
+			ptr += preamble_cs->cdw;
+		}
+
 		for (unsigned j = 0; j < cnt; ++j) {
 			struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
 			memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
@@ -696,7 +732,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
 
 
 		r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
-		                               (struct radv_amdgpu_winsys_bo*)bo, &bo_list);
+		                               (struct radv_amdgpu_winsys_bo*)bo,
+		                               preamble_cs, &bo_list);
 		if (r) {
 			fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
 			return r;
@@ -740,6 +777,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
 					int queue_idx,
 					struct radeon_winsys_cs **cs_array,
 					unsigned cs_count,
+					struct radeon_winsys_cs *preamble_cs,
 					struct radeon_winsys_sem **wait_sem,
 					unsigned wait_sem_count,
 					struct radeon_winsys_sem **signal_sem,
@@ -759,13 +797,13 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
 	}
 	if (!cs->ws->use_ib_bos) {
 		ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
-							   cs_count, _fence);
+							   cs_count, preamble_cs, _fence);
 	} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
 		ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
-							    cs_count, _fence);
+							    cs_count, preamble_cs, _fence);
 	} else {
 		ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array,
-							     cs_count, _fence);
+							     cs_count, preamble_cs, _fence);
 	}
 
 	for (i = 0; i < signal_sem_count; i++) {
-- 
2.11.0



More information about the mesa-dev mailing list