[igt-dev] [PATCH i-g-t v9 3/4] lib/rendercopy: Add support for Clear Color

Mika Kahola mika.kahola at intel.com
Thu Nov 12 13:28:36 UTC 2020


Modify rendercopy to support CCS Clear Color feature.

Signed-off-by: Mika Kahola <mika.kahola at intel.com>
---
 lib/gen8_render.h       |   1 +
 lib/gen9_render.h       |   6 +-
 lib/intel_batchbuffer.c |  10 +++
 lib/intel_batchbuffer.h |   6 ++
 lib/rendercopy.h        |   4 ++
 lib/rendercopy_gen9.c   | 146 ++++++++++++++++++++++++++++------------
 6 files changed, 128 insertions(+), 45 deletions(-)

diff --git a/lib/gen8_render.h b/lib/gen8_render.h
index 31dc01bc..1b0f527e 100644
--- a/lib/gen8_render.h
+++ b/lib/gen8_render.h
@@ -26,6 +26,7 @@
 
 # define GEN8_VS_FLOATING_POINT_MODE_ALTERNATE          (1 << 16)
 
+#define GEN8_3DSTATE_FAST_CLEAR_ENABLE		(1 << 8)
 #define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP	\
 						GEN4_3D(3, 0, 0x21)
 #define GEN8_3DSTATE_PS_BLEND			GEN4_3D(3, 0, 0x4d)
diff --git a/lib/gen9_render.h b/lib/gen9_render.h
index 6274e902..cf5bd2d9 100644
--- a/lib/gen9_render.h
+++ b/lib/gen9_render.h
@@ -127,7 +127,11 @@ struct gen9_surface_state {
 	} ss9;
 
 	struct {
-		uint32_t aux_base_addr;
+		uint32_t aux_base_addr:20;
+		uint32_t procedual_texture:1;
+		uint32_t clearvalue_addr_enable:1;
+		uint32_t quilt_height:5;
+		uint32_t quilt_width:5;
 	} ss10;
 
 	struct {
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 7b4cfb0d..f249a454 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -1096,6 +1096,16 @@ igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid)
 	return copy;
 }
 
+igt_render_clearfunc_t igt_get_render_clearfunc(int devid)
+{
+	igt_render_clearfunc_t clear = NULL;
+
+	if (IS_GEN12(devid))
+		clear = gen12_render_clearfunc;
+
+	return clear;
+}
+
 /**
  * igt_get_media_fillfunc:
  * @devid: pci device id
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index ab1b0c28..5d996ddf 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -374,6 +374,12 @@ typedef void (*igt_vebox_copyfunc_t)(struct intel_bb *ibb,
 
 igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid);
 
+typedef void (*igt_render_clearfunc_t)(struct intel_bb *ibb,
+				       struct intel_buf *dst, unsigned int dst_x, unsigned int dst_y,
+				       unsigned int width, unsigned int height,
+				       float cc_color[4]);
+igt_render_clearfunc_t igt_get_render_clearfunc(int devid);
+
 /**
  * igt_fillfunc_t:
  * @i915: drm fd
diff --git a/lib/rendercopy.h b/lib/rendercopy.h
index 7d5f0802..dd2e1c43 100644
--- a/lib/rendercopy.h
+++ b/lib/rendercopy.h
@@ -23,6 +23,10 @@ static inline void emit_vertex_normalized(struct intel_bb *ibb,
 	intel_bb_out(ibb, u.ui);
 }
 
+void gen12_render_clearfunc(struct intel_bb *ibb,
+			    struct intel_buf *dst, unsigned int dst_x, unsigned int dst_y,
+                            unsigned int width, unsigned int height,
+			    float clear_color[4]);
 void gen12_render_copyfunc(struct intel_bb *ibb,
 			   struct intel_buf *src, uint32_t src_x, uint32_t src_y,
 			   uint32_t width, uint32_t height,
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index ef6855c9..73272085 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -188,20 +188,12 @@ gen8_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst) {
 							   buf->ccs[0].offset,
 							   intel_bb_offset(ibb) + 4 * 10,
 							   buf->addr.offset);
-		ss->ss10.aux_base_addr = (address + buf->ccs[0].offset);
-		ss->ss11.aux_base_addr_hi = (address + buf->ccs[0].offset) >> 32;
-	}
-
-	if (buf->cc.offset) {
-		igt_assert(buf->compression == I915_COMPRESSION_RENDER);
+		if (buf->cc.offset) {
+			ss->ss10.aux_base_addr = (address + buf->ccs[0].offset);
+			ss->ss10.clearvalue_addr_enable = 1;
+			ss->ss11.aux_base_addr_hi = (address + buf->ccs[0].offset) >> 32;
 
-		address = intel_bb_offset_reloc_with_delta(ibb, buf->handle,
-							   read_domain, write_domain,
-							   buf->cc.offset,
-							   intel_bb_offset(ibb) + 4 * 12,
-							   buf->addr.offset);
-		ss->ss12.clear_address = address + buf->cc.offset;
-		ss->ss13.clear_address_hi = (address + buf->cc.offset) >> 32;
+		}
 	}
 
 	return intel_bb_ptr_add_return_prev_offset(ibb, sizeof(*ss));
@@ -218,7 +210,9 @@ gen8_bind_surfaces(struct intel_bb *ibb,
 	binding_table_offset = intel_bb_ptr_add_return_prev_offset(ibb, 32);
 
 	binding_table[0] = gen8_bind_buf(ibb, dst, 1);
-	binding_table[1] = gen8_bind_buf(ibb, src, 0);
+
+	if (src != NULL)
+		binding_table[1] = gen8_bind_buf(ibb, src, 1);
 
 	return binding_table_offset;
 }
@@ -274,16 +268,39 @@ gen7_fill_vertex_buffer_data(struct intel_bb *ibb,
 	offset = intel_bb_offset(ibb);
 
 	emit_vertex_2s(ibb, dst_x + width, dst_y + height);
-	emit_vertex_normalized(ibb, src_x + width, intel_buf_width(src));
-	emit_vertex_normalized(ibb, src_y + height, intel_buf_height(src));
+
+	if (src == NULL) {
+		dst_x /= 64;
+		dst_y /= 16;
+	}
+
+	if (src != NULL) {
+		emit_vertex_normalized(ibb, src_x + width, intel_buf_width(src));
+		emit_vertex_normalized(ibb, src_y + height, intel_buf_height(src));
+	} else {
+		emit_vertex_normalized(ibb, 0, 0);
+		emit_vertex_normalized(ibb, 0, 0);
+	}
 
 	emit_vertex_2s(ibb, dst_x, dst_y + height);
-	emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
-	emit_vertex_normalized(ibb, src_y + height, intel_buf_height(src));
+
+	if (src != NULL) {
+		emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
+		emit_vertex_normalized(ibb, src_y + height, intel_buf_height(src));
+	} else {
+		emit_vertex_normalized(ibb, 0, 0);
+		emit_vertex_normalized(ibb, 0, 0);
+	}
 
 	emit_vertex_2s(ibb, dst_x, dst_y);
-	emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
-	emit_vertex_normalized(ibb, src_y, intel_buf_height(src));
+
+	if (src != NULL) {
+		emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
+		emit_vertex_normalized(ibb, src_y, intel_buf_height(src));
+	} else {
+		emit_vertex_normalized(ibb, 0, 0);
+		emit_vertex_normalized(ibb, 0, 0);
+	}
 
 	return offset;
 }
@@ -729,7 +746,7 @@ gen8_emit_sf(struct intel_bb *ibb)
 }
 
 static void
-gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel) {
+gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel, bool fast_clear) {
 	const int max_threads = 63;
 
 	intel_bb_out(ibb, GEN6_3DSTATE_WM | (2 - 2));
@@ -753,10 +770,16 @@ gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel) {
 	intel_bb_out(ibb, GEN7_3DSTATE_PS | (12-2));
 	intel_bb_out(ibb, kernel);
 	intel_bb_out(ibb, 0); /* kernel hi */
-	intel_bb_out(ibb, 1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
-		     2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
+
+	if (fast_clear)
+		intel_bb_out(ibb, 1);
+	else
+		intel_bb_out(ibb, 1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
+			     2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
+
 	intel_bb_out(ibb, 0); /* scratch space stuff */
 	intel_bb_out(ibb, 0); /* scratch hi */
+
 	intel_bb_out(ibb, (max_threads - 1) << GEN8_3DSTATE_PS_MAX_THREADS_SHIFT |
 		     GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
 	intel_bb_out(ibb, 6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT);
@@ -765,6 +788,9 @@ gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel) {
 	intel_bb_out(ibb, 0); // kernel 2
 	intel_bb_out(ibb, 0); /* kernel 2 hi */
 
+	if (fast_clear)
+		intel_bb_out(ibb, GEN8_3DSTATE_FAST_CLEAR_ENABLE);
+
 	intel_bb_out(ibb, GEN8_3DSTATE_PS_BLEND | (2 - 2));
 	intel_bb_out(ibb, GEN8_PS_BLEND_HAS_WRITEABLE_RT);
 
@@ -876,27 +902,31 @@ static void gen8_emit_primitive(struct intel_bb *ibb, uint32_t offset)
 #define BATCH_STATE_SPLIT 2048
 
 static
-void _gen9_render_copyfunc(struct intel_bb *ibb,
-			   struct intel_buf *src,
-			   unsigned int src_x, unsigned int src_y,
-			   unsigned int width, unsigned int height,
-			   struct intel_buf *dst,
-			   unsigned int dst_x, unsigned int dst_y,
-			   struct intel_buf *aux_pgtable_buf,
-			   const uint32_t ps_kernel[][4],
-			   uint32_t ps_kernel_size)
+void _gen9_render_op(struct intel_bb *ibb,
+		     struct intel_buf *src,
+		     unsigned int src_x, unsigned int src_y,
+		     unsigned int width, unsigned int height,
+		     struct intel_buf *dst,
+		     unsigned int dst_x, unsigned int dst_y,
+		     struct intel_buf *aux_pgtable_buf,
+		     const uint32_t ps_kernel[][4],
+                     uint32_t ps_kernel_size)
 {
 	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
 	uint32_t scissor_state;
 	uint32_t vertex_buffer;
 	uint32_t aux_pgtable_state;
+	bool fast_clear = src != NULL;
 
-	igt_assert(src->bpp == dst->bpp);
+	if (src != NULL)
+		igt_assert(src->bpp == dst->bpp);
 
 	intel_bb_flush_render(ibb);
 
 	intel_bb_add_intel_buf(ibb, dst, true);
-	intel_bb_add_intel_buf(ibb, src, false);
+
+	if (!fast_clear)
+		intel_bb_add_intel_buf(ibb, src, false);
 
 	intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
 
@@ -949,11 +979,13 @@ void _gen9_render_copyfunc(struct intel_bb *ibb,
 	intel_bb_out(ibb, 0);
 	intel_bb_out(ibb, 0);
 
+	gen8_emit_ps(ibb, ps_kernel_off, fast_clear);
+
 	gen7_emit_clip(ibb);
 
 	gen8_emit_sf(ibb);
 
-	gen8_emit_ps(ibb, ps_kernel_off);
+	gen8_emit_ps(ibb, ps_kernel_off, fast_clear);
 
 	intel_bb_out(ibb, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
 	intel_bb_out(ibb, ps_binding_table);
@@ -991,9 +1023,9 @@ void gen9_render_copyfunc(struct intel_bb *ibb,
 			  unsigned int dst_x, unsigned int dst_y)
 
 {
-	_gen9_render_copyfunc(ibb, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, NULL,
-			  ps_kernel_gen9, sizeof(ps_kernel_gen9));
+	_gen9_render_op(ibb, src, src_x, src_y,
+                        width, height, dst, dst_x, dst_y, NULL,
+			ps_kernel_gen9, sizeof(ps_kernel_gen9));
 }
 
 void gen11_render_copyfunc(struct intel_bb *ibb,
@@ -1003,9 +1035,9 @@ void gen11_render_copyfunc(struct intel_bb *ibb,
 			   struct intel_buf *dst,
 			   unsigned int dst_x, unsigned int dst_y)
 {
-	_gen9_render_copyfunc(ibb, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, NULL,
-			  ps_kernel_gen11, sizeof(ps_kernel_gen11));
+	_gen9_render_op(ibb, src, src_x, src_y,
+		        width, height, dst, dst_x, dst_y, NULL,
+			ps_kernel_gen11, sizeof(ps_kernel_gen11));
 }
 
 void gen12_render_copyfunc(struct intel_bb *ibb,
@@ -1019,11 +1051,37 @@ void gen12_render_copyfunc(struct intel_bb *ibb,
 
 	gen12_aux_pgtable_init(&pgtable_info, ibb, src, dst);
 
-	_gen9_render_copyfunc(ibb, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y,
-			  pgtable_info.pgtable_buf,
+	_gen9_render_op(ibb, src, src_x, src_y,
+			width, height, dst, dst_x, dst_y,
+			pgtable_info.pgtable_buf,
 			  gen12_render_copy,
 			  sizeof(gen12_render_copy));
 
 	gen12_aux_pgtable_cleanup(ibb, &pgtable_info);
 }
+
+void gen12_render_clearfunc(struct intel_bb *ibb,
+			    struct intel_buf *dst,
+			    unsigned int dst_x, unsigned int dst_y,
+			    unsigned int width, unsigned int height,
+			    float clear_color[4])
+{
+	struct aux_pgtable_info pgtable_info = { };
+
+	gen12_aux_pgtable_init(&pgtable_info, ibb, NULL, dst);
+
+	/* BSpec 21136 */
+	intel_bb_ptr_set(ibb, dst->cc.offset);
+	intel_bb_out(ibb, clear_color[0]);
+	intel_bb_out(ibb, clear_color[2]);
+	intel_bb_out(ibb, clear_color[1]);
+	intel_bb_out(ibb, clear_color[4]);
+
+	_gen9_render_op(ibb, NULL, 0, 0,
+			width, height, dst, dst_x, dst_y,
+			pgtable_info.pgtable_buf,
+			gen12_render_copy,
+			sizeof(gen12_render_copy));
+
+	gen12_aux_pgtable_cleanup(ibb, &pgtable_info);
+}
-- 
2.25.1



More information about the igt-dev mailing list