[Mesa-dev] [PATCH] [rfc] radv: predicate cmask eliminate when using DCC.

Dave Airlie airlied at gmail.com
Tue Jun 13 00:28:05 UTC 2017


From: Dave Airlie <airlied at redhat.com>

When using DCC some clear values don't require a cmask eliminate
step. This patch adds support for black and black with alpha 1,
there are other values, but I don't have access to a comprehensive list.

This works by setting the cmask eliminate predicate when doing the
fast clear, and later when doing the cmask elimination making sure
the draws are predicated.

This increases the fps on Sascha Willems deferred from 580fps to
670fps on a Tonga PRO card.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/amd/vulkan/radv_cmd_buffer.c      | 33 +++++++++++++++++++++++++++++++--
 src/amd/vulkan/radv_image.c           | 13 +++++++++----
 src/amd/vulkan/radv_meta_clear.c      | 18 ++++++++++++++++++
 src/amd/vulkan/radv_meta_fast_clear.c | 22 ++++++++++++++++++++++
 src/amd/vulkan/radv_private.h         |  6 ++++++
 src/amd/vulkan/si_cmd_buffer.c        | 13 +++++++++++++
 6 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 9f36d98..a33d776 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1117,6 +1117,35 @@ radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
 	radeon_emit(cmd_buffer->cs, 0);
 }
 
+/*
+ *with DCC some colors don't require CMASK elimiation before being
+ * used as a texture. This sets a predicate value to determine if the
+ * cmask eliminate is required.
+ */
+void
+radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer,
+				  struct radv_image *image,
+				  bool value)
+{
+	uint64_t pred_val = value;
+	uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo);
+	va += image->offset + image->dcc_pred_offset;
+
+	if (!image->surface.dcc_size)
+		return;
+
+	cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
+
+	radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+				    S_370_WR_CONFIRM(1) |
+				    S_370_ENGINE_SEL(V_370_PFP));
+	radeon_emit(cmd_buffer->cs, va);
+	radeon_emit(cmd_buffer->cs, va >> 32);
+	radeon_emit(cmd_buffer->cs, pred_val);
+	radeon_emit(cmd_buffer->cs, pred_val >> 32);
+}
+
 void
 radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
 			  struct radv_image *image,
@@ -2639,10 +2668,10 @@ void radv_CmdDraw(
 	if (cmd_buffer->state.pipeline->graphics.vtx_emit_num == 3)
 		radeon_emit(cmd_buffer->cs, 0);
 
-	radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
+	radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, cmd_buffer->state.predicating));
 	radeon_emit(cmd_buffer->cs, instanceCount);
 
-	radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, 0));
+	radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
 	radeon_emit(cmd_buffer->cs, vertexCount);
 	radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
 		    S_0287F0_USE_OPAQUE(0));
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 91c7e5f..c21cd69 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -705,12 +705,16 @@ static void
 radv_image_alloc_cmask(struct radv_device *device,
 		       struct radv_image *image)
 {
+	uint32_t clear_value_size = 0;
 	radv_image_get_cmask_info(device, image, &image->cmask);
 
 	image->cmask.offset = align64(image->size, image->cmask.alignment);
 	/* + 8 for storing the clear values */
-	image->clear_value_offset = image->cmask.offset + image->cmask.size;
-	image->size = image->cmask.offset + image->cmask.size + 8;
+	if (!image->clear_value_offset) {
+		image->clear_value_offset = image->cmask.offset + image->cmask.size;
+		clear_value_size = 8;
+	}
+	image->size = image->cmask.offset + image->cmask.size + clear_value_size;
 	image->alignment = MAX2(image->alignment, image->cmask.alignment);
 }
 
@@ -719,9 +723,10 @@ radv_image_alloc_dcc(struct radv_device *device,
 		       struct radv_image *image)
 {
 	image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
-	/* + 8 for storing the clear values */
+	/* + 16 for storing the clear values + dcc pred */
 	image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
-	image->size = image->dcc_offset + image->surface.dcc_size + 8;
+	image->dcc_pred_offset = image->clear_value_offset + 8;
+	image->size = image->dcc_offset + image->surface.dcc_size + 16;
 	image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
 }
 
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index d7e7c5b..a69c268 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -858,6 +858,22 @@ fail:
 	return res;
 }
 
+static bool dcc_requires_cmask_eliminate(VkClearColorValue *clear_value)
+{
+	static const VkClearColorValue zero = {0};
+	static const VkClearColorValue zero_alpha_1 = { .float32 = { 0.0, 0.0, 0.0, 1.0 } };
+
+	/* all 0 clear color */
+	if (!memcmp(clear_value, &zero, sizeof(*clear_value)))
+	    return false;
+
+	/* 0, 0, 0, 1 - clear color */
+	if (!memcmp(clear_value, &zero_alpha_1, sizeof(*clear_value)))
+	    return false;
+
+	return true;
+}
+
 static bool
 emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
 		      const VkClearAttachment *clear_att,
@@ -937,6 +953,8 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
 		radv_fill_buffer(cmd_buffer, iview->image->bo,
 				 iview->image->offset + iview->image->dcc_offset,
 				 iview->image->surface.dcc_size, 0x20202020);
+		radv_set_dcc_need_cmask_elim_pred(cmd_buffer, iview->image,
+						  dcc_requires_cmask_eliminate(&clear_value));
 	} else {
 		radv_fill_buffer(cmd_buffer, iview->image->bo,
 				 iview->image->offset + iview->image->cmask.offset,
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index c93901e..8cd2579 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -334,6 +334,20 @@ emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
 					 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META);
 }
 
+static void
+radv_emit_image_set_pred(struct radv_cmd_buffer *cmd_buffer,
+			 struct radv_image *image, bool value)
+{
+	uint64_t va = 0;
+
+	if (value) {
+		va = cmd_buffer->device->ws->buffer_get_va(image->bo) + image->offset;
+		va += image->dcc_pred_offset;
+	}
+
+	si_emit_set_pred(cmd_buffer, va);
+}
+
 /**
  */
 void
@@ -351,6 +365,10 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 	radv_meta_save_pass(&saved_pass_state, cmd_buffer);
 	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
 
+	if (image->surface.dcc_size) {
+		radv_emit_image_set_pred(cmd_buffer, image, true);
+		cmd_buffer->state.predicating = true;
+	}
 	for (uint32_t layer = 0; layer < layer_count; ++layer) {
 		struct radv_image_view iview;
 
@@ -414,6 +432,10 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					&cmd_buffer->pool->alloc);
 
 	}
+	if (image->surface.dcc_size) {
+		cmd_buffer->state.predicating = false;
+		radv_emit_image_set_pred(cmd_buffer, image, false);
+	}
 	radv_meta_restore(&saved_state, cmd_buffer);
 	radv_meta_restore_pass(&saved_pass_state, cmd_buffer);
 }
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 87cb0a6..a36ba07 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -776,6 +776,7 @@ struct radv_cmd_state {
 	uint32_t                                      descriptors_dirty;
 	uint32_t                                      trace_id;
 	uint32_t                                      last_ia_multi_vgt_param;
+	bool predicating;
 };
 
 struct radv_cmd_pool {
@@ -865,6 +866,7 @@ void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 			    bool is_mec,
 			    enum radv_cmd_flush_bits flush_bits);
 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
+void si_emit_set_pred(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
 			   uint64_t src_va, uint64_t dest_va,
 			   uint64_t size);
@@ -907,6 +909,9 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
 			       struct radv_image *image,
 			       int idx,
 			       uint32_t color_values[2]);
+void radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer,
+				       struct radv_image *image,
+				       bool value);
 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
 		      struct radeon_winsys_bo *bo,
 		      uint64_t offset, uint64_t size, uint32_t value);
@@ -1209,6 +1214,7 @@ struct radv_image {
 	struct radv_fmask_info fmask;
 	struct radv_cmask_info cmask;
 	uint32_t clear_value_offset;
+	uint32_t dcc_pred_offset;
 };
 
 /* Whether the image has a htile that is known consistent with the contents of
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 962b76f..f84443b 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -1112,6 +1112,19 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 	cmd_buffer->state.flush_bits = 0;
 }
 
+void
+si_emit_set_pred(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
+{
+	uint32_t val = 0;
+
+	if (va)
+		val = (((va >> 32) & 0xff) |
+		       PRED_OP(PREDICATION_OP_BOOL64)|
+		       PREDICATION_DRAW_VISIBLE);
+	radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+	radeon_emit(cmd_buffer->cs, va);
+	radeon_emit(cmd_buffer->cs, val);
+}
 
 /* Set this if you want the 3D engine to wait until CP DMA is done.
  * It should be set on the last CP DMA packet. */
-- 
2.9.4



More information about the mesa-dev mailing list