[igt-dev] [PATCH i-g-t v10 2/4] lib/rendercopy: Enable fast clear

Imre Deak imre.deak at intel.com
Fri Nov 20 11:43:49 UTC 2020


On Fri, Nov 20, 2020 at 11:36:44AM +0200, Mika Kahola wrote:
> Enable fast clear rendering on rendercopy function.
> 
> v2: Modify _gen9_render_copyfunc to support fast clear (Matt)
>     Enable fast clear bit on 3D sequence (Matt)
>     Add helper function to figure out clear color modifier (Matt)
> v3: Remove unrelated line additions/removes
> v4: Fast clear with color (Imre)
> v5: Write raw 32-bit color values to register (Imre)
>     Require 32-bit color format
> v6: Rebase to use batchbuffer without libdrm dependency
> v7: Enable clear color (Nanley)
> v8: Various cleanups (Imre)
>     Modificate buffer creation (Imre)
> v9: Renaming of render_copyfunc() to render_op() (Imre)
>     Remove igt_render_clearfunc variable (Imre)
> v10: Dst buffer width division by 64 pixels and height by 16 lines (Imre)
>      Reorder ss10 bit fields (Imre)
>      Relocate buffer with clear color value enabled (Imre)
>      Set fast clear enable bit in correct dword (Imre)
> 
> Signed-off-by: Mika Kahola <mika.kahola at intel.com>
> ---
>  lib/gen8_render.h       |   1 +
>  lib/gen9_render.h       |   6 +-
>  lib/igt_fb.c            |  20 +++--
>  lib/igt_fb.h            |   3 +
>  lib/intel_batchbuffer.c |   5 ++
>  lib/intel_batchbuffer.h |   6 ++
>  lib/rendercopy.h        |   4 +
>  lib/rendercopy_gen9.c   | 167 ++++++++++++++++++++++++++++------------
>  8 files changed, 156 insertions(+), 56 deletions(-)
> 
> diff --git a/lib/gen8_render.h b/lib/gen8_render.h
> index 31dc01bc..1b0f527e 100644
> --- a/lib/gen8_render.h
> +++ b/lib/gen8_render.h
> @@ -26,6 +26,7 @@
>  
>  # define GEN8_VS_FLOATING_POINT_MODE_ALTERNATE          (1 << 16)
>  
> +#define GEN8_3DSTATE_FAST_CLEAR_ENABLE		(1 << 8)
>  #define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP	\
>  						GEN4_3D(3, 0, 0x21)
>  #define GEN8_3DSTATE_PS_BLEND			GEN4_3D(3, 0, 0x4d)
> diff --git a/lib/gen9_render.h b/lib/gen9_render.h
> index 6274e902..0bd797f9 100644
> --- a/lib/gen9_render.h
> +++ b/lib/gen9_render.h
> @@ -127,7 +127,11 @@ struct gen9_surface_state {
>  	} ss9;
>  
>  	struct {
> -		uint32_t aux_base_addr;
> +		uint32_t quilt_width:5;
> +		uint32_t quilt_height:5;
> +		uint32_t clearvalue_addr_enable:1;
> +		uint32_t procedual_texture:1;

PT would need a "Only on TGL+" comment

> +		uint32_t aux_base_addr:20;
>  	} ss10;
>  
>  	struct {
> diff --git a/lib/igt_fb.c b/lib/igt_fb.c
> index 43f8c475..422a9e06 100644
> --- a/lib/igt_fb.c
> +++ b/lib/igt_fb.c
> @@ -2141,9 +2141,10 @@ static int yuv_semiplanar_bpp(uint32_t drm_format)
>  	}
>  }
>  
> -static struct intel_buf *create_buf(struct fb_blit_upload *blit,
> -				   const struct igt_fb *fb,
> -				   const char *name)
> +struct intel_buf *
> +igt_fb_create_intel_buf(int fd, struct buf_ops *bops,
> +                        const struct igt_fb *fb,
> +                        const char *name)
>  {
>  	struct intel_buf *buf;
>  	uint32_t bo_name, handle, compression;
> @@ -2169,10 +2170,10 @@ static struct intel_buf *create_buf(struct fb_blit_upload *blit,
>  		compression = I915_COMPRESSION_NONE;
>  	}
>  
> -	bo_name = gem_flink(blit->fd, fb->gem_handle);
> -	handle = gem_open(blit->fd, bo_name);
> +	bo_name = gem_flink(fd, fb->gem_handle);
> +	handle = gem_open(fd, bo_name);
>  
> -	buf = intel_buf_create_using_handle(blit->bops, handle,
> +	buf = intel_buf_create_using_handle(bops, handle,
>  					    fb->width, fb->height,
>  					    fb->plane_bpp[0], 0,
>  					    igt_fb_mod_to_tiling(fb->modifier),
> @@ -2213,6 +2214,13 @@ static struct intel_buf *create_buf(struct fb_blit_upload *blit,
>  	return buf;
>  }
>  
> +static struct intel_buf *create_buf(struct fb_blit_upload *blit,
> +				   const struct igt_fb *fb,
> +				   const char *name)
> +{
> +	return igt_fb_create_intel_buf(blit->fd, blit->bops, fb, name);
> +}
> +
>  static void fini_buf(struct intel_buf *buf)
>  {
>  	intel_buf_destroy(buf);
> diff --git a/lib/igt_fb.h b/lib/igt_fb.h
> index b36db965..bc5b8fa0 100644
> --- a/lib/igt_fb.h
> +++ b/lib/igt_fb.h
> @@ -39,6 +39,7 @@
>  
>  #include "igt_color_encoding.h"
>  #include "igt_debugfs.h"
> +#include "intel_bufops.h"

struct intel_buf; is enough here to avoid adding a header dependency

>  
>  /*
>   * Internal format to denote a buffer compatible with pixman's
> @@ -129,6 +130,8 @@ igt_create_fb_with_bo_size(int fd, int width, int height,
>  			   enum igt_color_range color_range,
>  			   struct igt_fb *fb, uint64_t bo_size,
>  			   unsigned bo_stride);
> +struct intel_buf *igt_fb_create_intel_buf(int fd, struct buf_ops *bops,
> +					  const struct igt_fb *fb, const char *name);
>  unsigned int igt_create_fb(int fd, int width, int height, uint32_t format,
>  			   uint64_t modifier, struct igt_fb *fb);
>  unsigned int igt_create_color_fb(int fd, int width, int height,
> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> index 7b4cfb0d..faec8531 100644
> --- a/lib/intel_batchbuffer.c
> +++ b/lib/intel_batchbuffer.c
> @@ -1096,6 +1096,11 @@ igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid)
>  	return copy;
>  }
>  
> +igt_render_clearfunc_t igt_get_render_clearfunc(int devid)
> +{
> +	return IS_GEN12(devid) ? gen12_render_clearfunc : NULL;
> +}
> +
>  /**
>   * igt_get_media_fillfunc:
>   * @devid: pci device id
> diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> index ab1b0c28..250566da 100644
> --- a/lib/intel_batchbuffer.h
> +++ b/lib/intel_batchbuffer.h
> @@ -374,6 +374,12 @@ typedef void (*igt_vebox_copyfunc_t)(struct intel_bb *ibb,
>  
>  igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid);
>  
> +typedef void (*igt_render_clearfunc_t)(struct intel_bb *ibb,
> +				       struct intel_buf *dst, unsigned int dst_x, unsigned int dst_y,
> +				       unsigned int width, unsigned int height,
> +				       const float cc_color[4]);
> +igt_render_clearfunc_t igt_get_render_clearfunc(int devid);
> +
>  /**
>   * igt_fillfunc_t:
>   * @i915: drm fd
> diff --git a/lib/rendercopy.h b/lib/rendercopy.h
> index 7d5f0802..d2d9c586 100644
> --- a/lib/rendercopy.h
> +++ b/lib/rendercopy.h
> @@ -23,6 +23,10 @@ static inline void emit_vertex_normalized(struct intel_bb *ibb,
>  	intel_bb_out(ibb, u.ui);
>  }
>  
> +void gen12_render_clearfunc(struct intel_bb *ibb,
> +			    struct intel_buf *dst, unsigned int dst_x, unsigned int dst_y,
> +			    unsigned int width, unsigned int height,
> +			    const float clear_color[4]);
>  void gen12_render_copyfunc(struct intel_bb *ibb,
>  			   struct intel_buf *src, uint32_t src_x, uint32_t src_y,
>  			   uint32_t width, uint32_t height,
> diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> index ef6855c9..95563066 100644
> --- a/lib/rendercopy_gen9.c
> +++ b/lib/rendercopy_gen9.c
> @@ -185,23 +185,25 @@ gen8_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst) {
>  
>  		address = intel_bb_offset_reloc_with_delta(ibb, buf->handle,
>  							   read_domain, write_domain,
> -							   buf->ccs[0].offset,
> +							   (buf->cc.offset ? (1 << 10) : 0) | buf->ccs[0].offset,
>  							   intel_bb_offset(ibb) + 4 * 10,
>  							   buf->addr.offset);
> -		ss->ss10.aux_base_addr = (address + buf->ccs[0].offset);
> +		ss->ss10.aux_base_addr = (address + buf->ccs[0].offset) >> 12;
>  		ss->ss11.aux_base_addr_hi = (address + buf->ccs[0].offset) >> 32;
> -	}
>  
> -	if (buf->cc.offset) {
> -		igt_assert(buf->compression == I915_COMPRESSION_RENDER);
> +		if (buf->cc.offset) {
> +			igt_assert(buf->compression == I915_COMPRESSION_RENDER);
>  
> -		address = intel_bb_offset_reloc_with_delta(ibb, buf->handle,
> -							   read_domain, write_domain,
> -							   buf->cc.offset,
> -							   intel_bb_offset(ibb) + 4 * 12,
> -							   buf->addr.offset);
> -		ss->ss12.clear_address = address + buf->cc.offset;
> -		ss->ss13.clear_address_hi = (address + buf->cc.offset) >> 32;
> +			ss->ss10.clearvalue_addr_enable = 1;
> +
> +			address = intel_bb_offset_reloc_with_delta(ibb, buf->handle,
> +								   read_domain, write_domain,
> +								   buf->cc.offset,
> +								   intel_bb_offset(ibb) + 4 * 12,
> +								   buf->addr.offset);
> +			ss->ss12.clear_address = address + buf->cc.offset;
> +			ss->ss13.clear_address_hi = (address + buf->cc.offset) >> 32;
> +		}
>  	}
>  
>  	return intel_bb_ptr_add_return_prev_offset(ibb, sizeof(*ss));
> @@ -218,7 +220,9 @@ gen8_bind_surfaces(struct intel_bb *ibb,
>  	binding_table_offset = intel_bb_ptr_add_return_prev_offset(ibb, 32);
>  
>  	binding_table[0] = gen8_bind_buf(ibb, dst, 1);
> -	binding_table[1] = gen8_bind_buf(ibb, src, 0);
> +
> +	if (src != NULL)
> +		binding_table[1] = gen8_bind_buf(ibb, src, 0);
>  
>  	return binding_table_offset;
>  }
> @@ -273,17 +277,37 @@ gen7_fill_vertex_buffer_data(struct intel_bb *ibb,
>  	intel_bb_ptr_align(ibb, 8);
>  	offset = intel_bb_offset(ibb);
>  
> -	emit_vertex_2s(ibb, dst_x + width, dst_y + height);
> -	emit_vertex_normalized(ibb, src_x + width, intel_buf_width(src));
> -	emit_vertex_normalized(ibb, src_y + height, intel_buf_height(src));
> +	if (src != NULL) {
> +		emit_vertex_2s(ibb, dst_x + width, dst_y + height);
> +
> +		emit_vertex_normalized(ibb, src_x + width, intel_buf_width(src));
> +		emit_vertex_normalized(ibb, src_y + height, intel_buf_height(src));
>  
> -	emit_vertex_2s(ibb, dst_x, dst_y + height);
> -	emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
> -	emit_vertex_normalized(ibb, src_y + height, intel_buf_height(src));
> +		emit_vertex_2s(ibb, dst_x, dst_y + height);
>  
> -	emit_vertex_2s(ibb, dst_x, dst_y);
> -	emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
> -	emit_vertex_normalized(ibb, src_y, intel_buf_height(src));
> +		emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
> +		emit_vertex_normalized(ibb, src_y + height, intel_buf_height(src));
> +
> +		emit_vertex_2s(ibb, dst_x, dst_y);
> +
> +		emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
> +		emit_vertex_normalized(ibb, src_y, intel_buf_height(src));
> +	} else {
> +		emit_vertex_2s(ibb, (dst_x + width)/64, DIV_ROUND_UP(dst_y + height, 16));

				    DIV_ROUND_UP(dst_x + width, 64),
				    DIV_ROUND_UP(dst_y + height, 16)

as start coords need to be rounded down and end coords up
				

> +
> +		emit_vertex_normalized(ibb, 0, 0);
> +		emit_vertex_normalized(ibb, 0, 0);
> +
> +		emit_vertex_2s(ibb, dst_x/64, DIV_ROUND_UP(dst_y + height, 16));
> +
> +		emit_vertex_normalized(ibb, 0, 0);
> +		emit_vertex_normalized(ibb, 0, 0);
> +
> +		emit_vertex_2s(ibb, dst_x/64, dst_y/16);
> +
> +		emit_vertex_normalized(ibb, 0, 0);
> +		emit_vertex_normalized(ibb, 0, 0);
> +	}
>  
>  	return offset;
>  }
> @@ -729,7 +753,7 @@ gen8_emit_sf(struct intel_bb *ibb)
>  }
>  
>  static void
> -gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel) {
> +gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel, bool fast_clear) {
>  	const int max_threads = 63;
>  
>  	intel_bb_out(ibb, GEN6_3DSTATE_WM | (2 - 2));
> @@ -753,12 +777,19 @@ gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel) {
>  	intel_bb_out(ibb, GEN7_3DSTATE_PS | (12-2));
>  	intel_bb_out(ibb, kernel);
>  	intel_bb_out(ibb, 0); /* kernel hi */
> -	intel_bb_out(ibb, 1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
> -		     2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
> +
> +	if (fast_clear)
> +		intel_bb_out(ibb, 1 <<  GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
> +	else
> +		intel_bb_out(ibb, 1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
> +		             2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
> +
>  	intel_bb_out(ibb, 0); /* scratch space stuff */
>  	intel_bb_out(ibb, 0); /* scratch hi */
> +
>  	intel_bb_out(ibb, (max_threads - 1) << GEN8_3DSTATE_PS_MAX_THREADS_SHIFT |
> -		     GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
> +	             GEN6_3DSTATE_WM_16_DISPATCH_ENABLE |
> +	             (fast_clear ? GEN8_3DSTATE_FAST_CLEAR_ENABLE : 0));
>  	intel_bb_out(ibb, 6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT);
>  	intel_bb_out(ibb, 0); // kernel 1
>  	intel_bb_out(ibb, 0); /* kernel 1 hi */
> @@ -876,27 +907,32 @@ static void gen8_emit_primitive(struct intel_bb *ibb, uint32_t offset)
>  #define BATCH_STATE_SPLIT 2048
>  
>  static
> -void _gen9_render_copyfunc(struct intel_bb *ibb,
> -			   struct intel_buf *src,
> -			   unsigned int src_x, unsigned int src_y,
> -			   unsigned int width, unsigned int height,
> -			   struct intel_buf *dst,
> -			   unsigned int dst_x, unsigned int dst_y,
> -			   struct intel_buf *aux_pgtable_buf,
> -			   const uint32_t ps_kernel[][4],
> -			   uint32_t ps_kernel_size)
> +void _gen9_render_op(struct intel_bb *ibb,
> +		     struct intel_buf *src,
> +		     unsigned int src_x, unsigned int src_y,
> +		     unsigned int width, unsigned int height,
> +		     struct intel_buf *dst,
> +		     unsigned int dst_x, unsigned int dst_y,
> +		     struct intel_buf *aux_pgtable_buf,
> +		     const float clear_color[4],
> +		     const uint32_t ps_kernel[][4],
> +		     uint32_t ps_kernel_size)
>  {
>  	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
>  	uint32_t scissor_state;
>  	uint32_t vertex_buffer;
>  	uint32_t aux_pgtable_state;
> +	bool fast_clear = !src;
>  
> -	igt_assert(src->bpp == dst->bpp);
> +	if (!fast_clear)
> +		igt_assert(src->bpp == dst->bpp);
>  
>  	intel_bb_flush_render(ibb);
>  
>  	intel_bb_add_intel_buf(ibb, dst, true);
> -	intel_bb_add_intel_buf(ibb, src, false);
> +
> +	if (!fast_clear)
> +		intel_bb_add_intel_buf(ibb, src, false);
>  
>  	intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
>  
> @@ -924,6 +960,18 @@ void _gen9_render_copyfunc(struct intel_bb *ibb,
>  
>  	gen12_emit_aux_pgtable_state(ibb, aux_pgtable_state, true);
>  
> +	if (fast_clear) {
> +		for (int i = 0; i < 4; i++) {
> +			intel_bb_out(ibb, MI_STORE_DWORD_IMM);
> +			intel_bb_emit_reloc(ibb, dst->handle,
> +					    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> +                                            dst->cc.offset + i*sizeof(float),
> +					    dst->addr.offset);
> +			intel_bb_out(ibb, *(uint32_t*)&clear_color[i]);
> +               }
> +       }

some w/s and indentation to be fixed above

Looks ok with the above things fixed:
Reviewed-by: Imre Deak <imre.deak at intel.com>

> +
> +
>  	gen8_emit_sip(ibb);
>  
>  	gen7_emit_push_constants(ibb);
> @@ -953,7 +1001,7 @@ void _gen9_render_copyfunc(struct intel_bb *ibb,
>  
>  	gen8_emit_sf(ibb);
>  
> -	gen8_emit_ps(ibb, ps_kernel_off);
> +	gen8_emit_ps(ibb, ps_kernel_off, fast_clear);
>  
>  	intel_bb_out(ibb, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
>  	intel_bb_out(ibb, ps_binding_table);
> @@ -991,9 +1039,9 @@ void gen9_render_copyfunc(struct intel_bb *ibb,
>  			  unsigned int dst_x, unsigned int dst_y)
>  
>  {
> -	_gen9_render_copyfunc(ibb, src, src_x, src_y,
> -			  width, height, dst, dst_x, dst_y, NULL,
> -			  ps_kernel_gen9, sizeof(ps_kernel_gen9));
> +	_gen9_render_op(ibb, src, src_x, src_y,
> +		        width, height, dst, dst_x, dst_y, NULL, NULL,
> +		        ps_kernel_gen9, sizeof(ps_kernel_gen9));
>  }
>  
>  void gen11_render_copyfunc(struct intel_bb *ibb,
> @@ -1003,9 +1051,9 @@ void gen11_render_copyfunc(struct intel_bb *ibb,
>  			   struct intel_buf *dst,
>  			   unsigned int dst_x, unsigned int dst_y)
>  {
> -	_gen9_render_copyfunc(ibb, src, src_x, src_y,
> -			  width, height, dst, dst_x, dst_y, NULL,
> -			  ps_kernel_gen11, sizeof(ps_kernel_gen11));
> +	_gen9_render_op(ibb, src, src_x, src_y,
> +		        width, height, dst, dst_x, dst_y, NULL, NULL,
> +		        ps_kernel_gen11, sizeof(ps_kernel_gen11));
>  }
>  
>  void gen12_render_copyfunc(struct intel_bb *ibb,
> @@ -1019,11 +1067,32 @@ void gen12_render_copyfunc(struct intel_bb *ibb,
>  
>  	gen12_aux_pgtable_init(&pgtable_info, ibb, src, dst);
>  
> -	_gen9_render_copyfunc(ibb, src, src_x, src_y,
> -			  width, height, dst, dst_x, dst_y,
> -			  pgtable_info.pgtable_buf,
> -			  gen12_render_copy,
> -			  sizeof(gen12_render_copy));
> +	_gen9_render_op(ibb, src, src_x, src_y,
> +		        width, height, dst, dst_x, dst_y,
> +		        pgtable_info.pgtable_buf,
> +		        NULL,
> +		        gen12_render_copy,
> +		        sizeof(gen12_render_copy));
> +
> +	gen12_aux_pgtable_cleanup(ibb, &pgtable_info);
> +}
> +
> +void gen12_render_clearfunc(struct intel_bb *ibb,
> +			    struct intel_buf *dst,
> +			    unsigned int dst_x, unsigned int dst_y,
> +			    unsigned int width, unsigned int height,
> +			    const float clear_color[4])
> +{
> +	struct aux_pgtable_info pgtable_info = { };
> +
> +	gen12_aux_pgtable_init(&pgtable_info, ibb, NULL, dst);
> +
> +	_gen9_render_op(ibb, NULL, 0, 0,
> +		        width, height, dst, dst_x, dst_y,
> +		        pgtable_info.pgtable_buf,
> +		        clear_color,
> +		        gen12_render_copy,
> +		        sizeof(gen12_render_copy));
>  
>  	gen12_aux_pgtable_cleanup(ibb, &pgtable_info);
>  }
> -- 
> 2.25.1
> 


More information about the igt-dev mailing list