[igt-dev] [PATCH i-g-t 1/2] lib/rendercopy: Enable render target Fast Clear for GEN12

Wed Jan 15 15:33:36 UTC 2020

On Wed, Jan 15, 2020 at 04:38:12PM +0200, Mika Kahola wrote:
> To test Clear Color plane content, we need to enable
> render target for fast clear.
> 
> Signed-off-by: Mika Kahola <mika.kahola at intel.com>
> ---
>  lib/gen12_render.h      |   9 ++
>  lib/igt_fb.c            |  13 +++
>  lib/intel_batchbuffer.c |  10 +++
>  lib/intel_batchbuffer.h |   1 +
>  lib/rendercopy.h        |   5 ++
>  lib/rendercopy_gen9.c   | 191 +++++++++++++++++++++++++++++++++++++++-
>  6 files changed, 228 insertions(+), 1 deletion(-)
>  create mode 100644 lib/gen12_render.h
> 
> diff --git a/lib/gen12_render.h b/lib/gen12_render.h
> new file mode 100644
> index 00000000..e70f4c25
> --- /dev/null
> +++ b/lib/gen12_render.h
> @@ -0,0 +1,9 @@
> +#ifndef GEN12_RENDER_H
> +#define GEN12_RENDER_H
> +
> +#include "gen9_render.h"
> +
> +#define GEN12_PS_FAST_CLEAR_ENABLE                     (1 << 8)
> +#define GEN12_PS_FAST_CLEAR_RESOLVE                    (2 << 6)
> +
> +#endif
> diff --git a/lib/igt_fb.c b/lib/igt_fb.c
> index c81b9de8..332f98d8 100644
> --- a/lib/igt_fb.c
> +++ b/lib/igt_fb.c
> @@ -480,6 +480,11 @@ static bool is_gen12_mc_ccs_modifier(uint64_t modifier)
>  	return modifier == LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS;
>  }
>  
> +static bool is_gen12_cc_ccs_modifier(uint64_t modifier)
> +{
> +	return modifier == LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC;
> +}
> +
>  static bool is_gen12_ccs_modifier(uint64_t modifier)
>  {
>  	return is_gen12_mc_ccs_modifier(modifier) ||
> @@ -2091,6 +2096,12 @@ static bool use_vebox_copy(const struct igt_fb *src_fb,
>  	       igt_format_is_yuv(dst_fb->drm_format);
>  }
>  
> +static bool use_clear_color_copy(const struct igt_fb *src_fb,
> +				 const struct igt_fb *dst_fb)
> +{
> +	return is_gen12_cc_ccs_modifier(dst_fb->modifier);
> +}
> +
>  /**
>   * copy_with_engine:
>   * @blit: context for the copy operation
> @@ -2115,6 +2126,8 @@ static void copy_with_engine(struct fb_blit_upload *blit,
>  
>  	if (use_vebox_copy(src_fb, dst_fb))
>  		vebox_copy = igt_get_vebox_copyfunc(intel_get_drm_devid(blit->fd));
> +	else if (use_clear_color_copy(src_fb, dst_fb))
> +		render_copy = igt_get_render_cc_copyfunc(intel_get_drm_devid(blit->fd));

We'd need a separate clear function that only clears the FB with a given
color. This is what we need for a RC-CC test in kms_ccs.

For a render copy test we need to add support to the existing render
copy func to read a fast color cleared FB. A test for this can be added
then to gem_render_copy.

>  	else
>  		render_copy = igt_get_render_copyfunc(intel_get_drm_devid(blit->fd));
>  
> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> index 3dc89024..227e854c 100644
> --- a/lib/intel_batchbuffer.c
> +++ b/lib/intel_batchbuffer.c
> @@ -851,6 +851,16 @@ igt_render_copyfunc_t igt_get_render_copyfunc(int devid)
>  	return copy;
>  }
>  
> +igt_render_copyfunc_t igt_get_render_cc_copyfunc(int devid)
> +{
> +	igt_render_copyfunc_t copy = NULL;
> +
> +	if (IS_GEN12(devid))
> +		copy = gen12_render_cc_copyfunc;
> +
> +	return copy;
> +}
> +
>  igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid)
>  {
>  	igt_vebox_copyfunc_t copy = NULL;
> diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> index fd7ef03f..d979c9f2 100644
> --- a/lib/intel_batchbuffer.h
> +++ b/lib/intel_batchbuffer.h
> @@ -320,6 +320,7 @@ typedef void (*igt_render_copyfunc_t)(struct intel_batchbuffer *batch,
>  				      const struct igt_buf *dst, unsigned dst_x, unsigned dst_y);
>  
>  igt_render_copyfunc_t igt_get_render_copyfunc(int devid);
> +igt_render_copyfunc_t igt_get_render_cc_copyfunc(int devid);
>  
>  
>  /**
> diff --git a/lib/rendercopy.h b/lib/rendercopy.h
> index e0577cac..17f5fa94 100644
> --- a/lib/rendercopy.h
> +++ b/lib/rendercopy.h
> @@ -23,6 +23,11 @@ static inline void emit_vertex_normalized(struct intel_batchbuffer *batch,
>  	OUT_BATCH(u.ui);
>  }
>  
> +void gen12_render_cc_copyfunc(struct intel_batchbuffer *batch,
> +			      drm_intel_context * context,
> +			      const struct igt_buf *src, unsigned int src_x, unsigned int src_y,
> +			      unsigned int width, unsigned int height,
> +			      const struct igt_buf *dst, unsigned int dst_x, unsigned int dst_y);
>  void gen12_render_copyfunc(struct intel_batchbuffer *batch,
>  			   drm_intel_context *context,
>  			   const struct igt_buf *src, unsigned src_x, unsigned src_y,
> diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> index 835c8d80..af95d2ee 100644
> --- a/lib/rendercopy_gen9.c
> +++ b/lib/rendercopy_gen9.c
> @@ -20,7 +20,7 @@
>  #include "intel_batchbuffer.h"
>  #include "intel_io.h"
>  #include "rendercopy.h"
> -#include "gen9_render.h"
> +#include "gen12_render.h"
>  #include "intel_reg.h"
>  #include "igt_aux.h"
>  
> @@ -958,6 +958,54 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
>  	OUT_BATCH(0);	/* index buffer offset, ignored */
>  }
>  
> +static void
> +gen12_emit_ps_cc(struct intel_batchbuffer *batch, uint32_t kernel)
> +{
> +	const int max_threads = 63;
> +
> +	OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2));
> +	OUT_BATCH(/* XXX: I don't understand the BARYCENTRIC stuff, but it
> +		   * appears we need it to put our setup data in the place we
> +		   * expect (g6, see below) */
> +		  GEN8_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC);
> +
> +	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (11-2));
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +
> +	OUT_BATCH(GEN7_3DSTATE_PS | (12-2));
> +	OUT_BATCH(kernel);

A shader kernel isn't used for a fast clear. Would be good to share more
with gen12_emit_ps().

> +	OUT_BATCH(0); /* kernel hi */
> +	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
> +		  2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
> +	OUT_BATCH(0); /* scratch space stuff */
> +	OUT_BATCH(0); /* scratch hi */
> +	OUT_BATCH((max_threads - 1) << GEN8_3DSTATE_PS_MAX_THREADS_SHIFT |
> +		  GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
> +	OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT);
> +	OUT_BATCH(0); // kernel 1
> +	OUT_BATCH(0); /* kernel 1 hi */
> +	OUT_BATCH(0); // kernel 2
> +	OUT_BATCH(0); /* kernel 2 hi */
> +	OUT_BATCH(GEN12_PS_FAST_CLEAR_ENABLE);
> +	OUT_BATCH(GEN12_PS_FAST_CLEAR_RESOLVE);
> +
> +	OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2));
> +	OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT);
> +
> +	OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2));
> +	OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE);
> +
> +}
> +
>  /* The general rule is if it's named gen6 it is directly copied from
>   * gen6_render_copyfunc.
>   *
> @@ -990,6 +1038,127 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
>  
>  #define BATCH_STATE_SPLIT 2048
>  
> +static
> +void _gen12_render_cc_copyfunc(struct intel_batchbuffer *batch,
> +			       drm_intel_context *context,
> +			       const struct igt_buf *src, unsigned int src_x,
> +			       unsigned int src_y, unsigned int width, unsigned int height,
> +			       const struct igt_buf *dst, unsigned int dst_x,
> +			       unsigned int dst_y,
> +			       drm_intel_bo *aux_pgtable_bo,
> +			       const uint32_t ps_kernel[][4],
> +			       uint32_t ps_kernel_size)

Would be good to share more with gen12_render_copyfunc() and the clear
color must be passed in and programmed somewhere, maybe in the
gen9_surface_state struct clear color fields.

> +{
> +	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
> +	uint32_t scissor_state;
> +	uint32_t vertex_buffer;
> +	uint32_t batch_end;
> +	uint32_t aux_pgtable_state;
> +
> +	igt_assert(src->bpp == dst->bpp);
> +	intel_batchbuffer_flush_with_context(batch, context);
> +
> +	intel_batchbuffer_align(batch, 8);
> +
> +	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
> +
> +	annotation_init(&aub_annotations);
> +
> +	ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
> +	ps_sampler_state  = gen8_create_sampler(batch);
> +	ps_kernel_off = gen8_fill_ps(batch, ps_kernel, ps_kernel_size);
> +	vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
> +						     src_x, src_y,
> +						     dst_x, dst_y,
> +						     width, height);
> +	cc.cc_state = gen6_create_cc_state(batch);
> +	cc.blend_state = gen8_create_blend_state(batch);
> +	viewport.cc_state = gen6_create_cc_viewport(batch);
> +	viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch);
> +	scissor_state = gen6_create_scissor_rect(batch);
> +
> +	aux_pgtable_state = gen12_create_aux_pgtable_state(batch,
> +							   aux_pgtable_bo);
> +
> +	/* TODO: theree is other state which isn't setup */
> +
> +	assert(batch->ptr < &batch->buffer[4095]);
> +
> +	batch->ptr = batch->buffer;
> +
> +	/* Start emitting the commands. The order roughly follows the mesa blorp
> +	 * order */
> +	OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
> +				GEN9_PIPELINE_SELECTION_MASK);
> +
> +	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state, true);
> +
> +	gen8_emit_sip(batch);
> +
> +	gen7_emit_push_constants(batch);
> +
> +	gen9_emit_state_base_address(batch);
> +
> +	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
> +	OUT_BATCH(viewport.cc_state);
> +	OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
> +	OUT_BATCH(viewport.sf_clip_state);
> +
> +	gen7_emit_urb(batch);
> +
> +	gen8_emit_cc(batch);
> +
> +	gen8_emit_multisample(batch);
> +
> +	gen8_emit_null_state(batch);
> +
> +	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (5 - 2));
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +
> +	gen7_emit_clip(batch);
> +
> +	gen8_emit_sf(batch);
> +
> +	gen12_emit_ps_cc(batch, ps_kernel_off);
> +
> +	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
> +	OUT_BATCH(ps_binding_table);
> +
> +	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS);
> +	OUT_BATCH(ps_sampler_state);
> +
> +	OUT_BATCH(GEN8_3DSTATE_SCISSOR_STATE_POINTERS);
> +	OUT_BATCH(scissor_state);
> +
> +	gen9_emit_depth(batch);
> +
> +	gen7_emit_clear(batch);
> +
> +	gen6_emit_drawing_rectangle(batch, dst);
> +
> +	gen7_emit_vertex_buffer(batch, vertex_buffer);
> +	gen6_emit_vertex_elements(batch);
> +
> +	gen8_emit_vf_topology(batch);
> +	gen8_emit_primitive(batch, vertex_buffer);
> +
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +
> +	batch_end = intel_batchbuffer_align(batch, 8);
> +	assert(batch_end < BATCH_STATE_SPLIT);
> +	annotation_add_batch(&aub_annotations, batch_end);
> +
> +	dump_batch(batch);
> +
> +	annotation_flush(&aub_annotations, batch);
> +
> +	gen6_render_flush(batch, context, batch_end);
> +	intel_batchbuffer_reset(batch);
> +}
> +
>  static
>  void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
>  			  drm_intel_context *context,
> @@ -1154,3 +1323,23 @@ void gen12_render_copyfunc(struct intel_batchbuffer *batch,
>  
>  	gen12_aux_pgtable_cleanup(&pgtable_info);
>  }
> +
> +void gen12_render_cc_copyfunc(struct intel_batchbuffer *batch,
> +			      drm_intel_context *context,
> +			      const struct igt_buf *src, unsigned int src_x, unsigned int src_y,
> +			      unsigned int width, unsigned int height,
> +			      const struct igt_buf *dst, unsigned int dst_x, unsigned int dst_y)
> +
> +{
> +	struct aux_pgtable_info pgtable_info = { };
> +
> +	gen12_aux_pgtable_init(&pgtable_info, batch->bufmgr, src, dst);
> +
> +	_gen12_render_cc_copyfunc(batch, context, src, src_x, src_y,
> +				  width, height, dst, dst_x, dst_y,
> +				  pgtable_info.pgtable_bo,
> +				  gen12_render_copy,
> +				  sizeof(gen12_render_copy));
> +
> +	gen12_aux_pgtable_cleanup(&pgtable_info);
> +}
> -- 
> 2.17.1
>