[Mesa-dev] [PATCH] radv: fixup IA_MULTI_VGT_PARAM handling.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Tue Feb 14 20:15:25 UTC 2017



On Tue, Feb 14, 2017, at 21:03, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
> 
> This ports the remains of the workarounds from radeonsi for
> the non-TESS cases. It should provide equivalent workarounds
> for hawaii and bonarie.
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 23 +++++++-----
>  src/amd/vulkan/radv_pipeline.c   | 22 +++++++++++-
>  src/amd/vulkan/radv_private.h    | 12 +++++--
>  src/amd/vulkan/si_cmd_buffer.c   | 78
>  +++++++++++++++++++++++++++++-----------
>  4 files changed, 104 insertions(+), 31 deletions(-)
> 
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 8f2e984..19d9ef8 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1267,7 +1267,7 @@ radv_flush_constants(struct radv_cmd_buffer
> *cmd_buffer,
>  }
>  
>  static void
> -radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer)
> +radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, bool
> instanced_or_indirect_draw, uint32_t count)
>  {

Rename count to vertex_count or something that at least tells what it
counts. With that

Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

>  	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
>  	struct radv_device *device = cmd_buffer->device;
> @@ -1332,6 +1332,15 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer
> *cmd_buffer)
>  	if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR))
>  		radv_emit_scissor(cmd_buffer);
>  
> +       ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer,
> instanced_or_indirect_draw, count);
> +       if (cmd_buffer->state.last_ia_multi_vgt_param !=
> ia_multi_vgt_param) {
> +               if
> (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
> +                       radeon_set_context_reg_idx(cmd_buffer->cs,
> R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
> +               else
> +                       radeon_set_context_reg(cmd_buffer->cs,
> R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
> +               cmd_buffer->state.last_ia_multi_vgt_param =
> ia_multi_vgt_param;
> +       }
> +
>  	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) {
>  		uint32_t stages = 0;
>  
> @@ -1341,15 +1350,12 @@ radv_cmd_buffer_flush_state(struct
> radv_cmd_buffer *cmd_buffer)
>  				S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
>  
>  		radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, stages);
> -               ia_multi_vgt_param =
> si_get_ia_multi_vgt_param(cmd_buffer);
>  
>  		if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
> -                       radeon_set_context_reg_idx(cmd_buffer->cs,
> R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
>  			radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
>  			radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, cmd_buffer->state.pipeline->graphics.prim);
>  		} else {
>  			radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, cmd_buffer->state.pipeline->graphics.prim);
> -                       radeon_set_context_reg(cmd_buffer->cs,
> R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
>  			radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
>  		}
>  		radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, cmd_buffer->state.pipeline->graphics.gs_out);
> @@ -2188,7 +2194,8 @@ void radv_CmdDraw(
>  	uint32_t                                    firstInstance)
>  {
>  	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
> -       radv_cmd_buffer_flush_state(cmd_buffer);
> +
> +       radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1),
> vertexCount);
>  
>  	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
>  
> @@ -2239,7 +2246,7 @@ void radv_CmdDrawIndexed(
>  	uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size;
>  	uint64_t index_va;
>  
> -       radv_cmd_buffer_flush_state(cmd_buffer);
> +       radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1),
> indexCount);
>  	radv_emit_primitive_reset_index(cmd_buffer);
>  
>  	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
> @@ -2337,7 +2344,7 @@ radv_cmd_draw_indirect_count(VkCommandBuffer       
>                      command
>                               uint32_t                                   
>                               stride)
>  {
>  	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
> -       radv_cmd_buffer_flush_state(cmd_buffer);
> +       radv_cmd_buffer_flush_state(cmd_buffer, true, 0);
>  
>  	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
>  							   cmd_buffer->cs, 14);
> @@ -2362,7 +2369,7 @@ radv_cmd_draw_indexed_indirect_count(
>  	int index_size = cmd_buffer->state.index_type ? 4 : 2;
>  	uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size;
>  	uint64_t index_va;
> -       radv_cmd_buffer_flush_state(cmd_buffer);
> +       radv_cmd_buffer_flush_state(cmd_buffer, true, 0);
>  	radv_emit_primitive_reset_index(cmd_buffer);
>  
>  	index_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo);
> diff --git a/src/amd/vulkan/radv_pipeline.c
> b/src/amd/vulkan/radv_pipeline.c
> index 53f06ac..cbd846a 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -1483,6 +1483,24 @@ calculate_gs_ring_sizes(struct radv_pipeline
> *pipeline)
>  	pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
>  }
>  
> +static const struct radv_prim_vertex_count prim_size_table[] = {
> +       [V_008958_DI_PT_NONE] = {0, 0},
> +       [V_008958_DI_PT_POINTLIST] = {1, 1},
> +       [V_008958_DI_PT_LINELIST] = {2, 2},
> +       [V_008958_DI_PT_LINESTRIP] = {2, 1},
> +       [V_008958_DI_PT_TRILIST] = {3, 3},
> +       [V_008958_DI_PT_TRIFAN] = {3, 1},
> +       [V_008958_DI_PT_TRISTRIP] = {3, 1},
> +       [V_008958_DI_PT_LINELIST_ADJ] = {4, 4},
> +       [V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1},
> +       [V_008958_DI_PT_TRILIST_ADJ] = {6, 6},
> +       [V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2},
> +       [V_008958_DI_PT_RECTLIST] = {3, 3},
> +       [V_008958_DI_PT_LINELOOP] = {2, 1},
> +       [V_008958_DI_PT_POLYGON] = {3, 1},
> +       [V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
> +};
> +
>  VkResult
>  radv_pipeline_init(struct radv_pipeline *pipeline,
>  		   struct radv_device *device,
> @@ -1581,7 +1599,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
>  		pipeline->graphics.gs_out = V_028A6C_OUTPRIM_TYPE_TRISTRIP;
>  	}
>  	pipeline->graphics.prim_restart_enable = !!pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
> -
> +       /* prim vertex count will need TESS changes */
> +       pipeline->graphics.prim_vertex_count =
> prim_size_table[pipeline->graphics.prim];
> +       
>  	const VkPipelineVertexInputStateCreateInfo *vi_info =
>  		pCreateInfo->pVertexInputState;
>  	for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
> diff --git a/src/amd/vulkan/radv_private.h
> b/src/amd/vulkan/radv_private.h
> index dcccd94..7b1d8fb 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -686,8 +686,8 @@ struct radv_attachment_state {
>  
>  struct radv_cmd_state {
>  	uint32_t                                      vb_dirty;
> -       bool                                         
> vertex_descriptors_dirty;
>  	radv_cmd_dirty_mask_t                         dirty;
> +       bool                                         
> vertex_descriptors_dirty;
>  
>  	struct radv_pipeline *                        pipeline;
>  	struct radv_pipeline *                        emitted_pipeline;
> @@ -710,6 +710,7 @@ struct radv_cmd_state {
>  	float					     offset_scale;
>  	uint32_t                                      descriptors_dirty;
>  	uint32_t                                      trace_id;
> +       uint32_t                                     
> last_ia_multi_vgt_param;
>  };
>  
>  struct radv_cmd_pool {
> @@ -771,7 +772,8 @@ void si_write_viewport(struct radeon_winsys_cs *cs,
> int first_vp,
>  		       int count, const VkViewport *viewports);
>  void si_write_scissors(struct radeon_winsys_cs *cs, int first,
>  		       int count, const VkRect2D *scissors);
> -uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer);
> +uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
> +                                  bool instanced_or_indirect_draw,
> uint32_t draw_vertex_count);
>  void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
>  void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
>  			   uint64_t src_va, uint64_t dest_va,
> @@ -925,6 +927,11 @@ struct radv_multisample_state {
>  	unsigned num_samples;
>  };
>  
> +struct radv_prim_vertex_count {
> +       uint8_t min;
> +       uint8_t incr;
> +};
> +
>  struct radv_pipeline {
>  	struct radv_device *                          device;
>  	uint32_t                                     dynamic_state_mask;
> @@ -956,6 +963,7 @@ struct radv_pipeline {
>  			bool prim_restart_enable;
>  			unsigned esgs_ring_size;
>  			unsigned gsvs_ring_size;
> +                       struct radv_prim_vertex_count prim_vertex_count;
>  		} graphics;
>  	};
>  
> diff --git a/src/amd/vulkan/si_cmd_buffer.c
> b/src/amd/vulkan/si_cmd_buffer.c
> index 7d81c2b..e20e3bd 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -565,8 +565,25 @@ si_write_scissors(struct radeon_winsys_cs *cs, int
> first,
>  	}
>  }
>  
> +static inline unsigned
> +radv_prims_for_vertices(struct radv_prim_vertex_count *info, unsigned
> num)
> +{
> +       if (num == 0)
> +               return 0;
> +
> +       if (info->incr == 0)
> +               return 0;
> +
> +       if (num < info->min)
> +               return 0;
> +
> +       return 1 + ((num - info->min) / info->incr);
> +}
> +
>  uint32_t
> -si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
> +si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
> +                         bool instanced_or_indirect_draw,
> +                         uint32_t draw_vertex_count)
>  {
>  	enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
>  	enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
> @@ -580,10 +597,14 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer
> *cmd_buffer)
>  	bool ia_switch_on_eoi = false;
>  	bool partial_vs_wave = false;
>  	bool partial_es_wave = false;
> +       uint32_t num_prims =
> radv_prims_for_vertices(&cmd_buffer->state.pipeline->graphics.prim_vertex_count,
> draw_vertex_count);
> +       bool multi_instances_smaller_than_primgroup;
>  
>  	if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
>  		primgroup_size = 64;  /* recommended with a GS */
>  
> +       multi_instances_smaller_than_primgroup =
> (instanced_or_indirect_draw ||
> +                                                 num_prims <
> primgroup_size);
>  	/* TODO TES */
>  
>  	/* TODO linestipple */
> @@ -596,12 +617,30 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer
> *cmd_buffer)
>  		    prim == V_008958_DI_PT_POLYGON ||
>  		    prim == V_008958_DI_PT_LINELOOP ||
>  		    prim == V_008958_DI_PT_TRIFAN ||
> -                   prim == V_008958_DI_PT_TRISTRIP_ADJ)
> -                       //          info->primitive_restart ||
> -                       //          info->count_from_stream_output)
> +                   prim == V_008958_DI_PT_TRISTRIP_ADJ ||
> +                  
> (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
> +                    (family < CHIP_POLARIS10 ||
> +                     (prim != V_008958_DI_PT_POINTLIST &&
> +                     prim != V_008958_DI_PT_LINESTRIP &&
> +                      prim != V_008958_DI_PT_TRISTRIP))))
>  			wd_switch_on_eop = true;
>  
> -               /* TODO HAWAII */
> +               /* Hawaii hangs if instancing is enabled and
> WD_SWITCH_ON_EOP is 0.
> +                * We don't know that for indirect drawing, so treat it
> as
> +                * always problematic. */
> +               if (family == CHIP_HAWAII &&
> +                   instanced_or_indirect_draw)
> +                       wd_switch_on_eop = true;
> +
> +               /* Performance recommendation for 4 SE Gfx7-8 parts if
> +                * instances are smaller than a primgroup.
> +                * Assume indirect draws always use small instances.
> +                * This is needed for good VS wave utilization.
> +                */
> +               if (chip_class <= VI &&
> +                   info->max_se == 4 &&
> +                   multi_instances_smaller_than_primgroup)
> +                       wd_switch_on_eop = true;
>  
>  		/* Required on CIK and later. */
>  		if (info->max_se > 2 && !wd_switch_on_eop)
> @@ -614,12 +653,11 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer
> *cmd_buffer)
>  		      (radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
>  			partial_vs_wave = true;
>  
> -#if 0
>  		/* Instancing bug on Bonaire. */
>  		if (family == CHIP_BONAIRE && ia_switch_on_eoi &&
> -                   (info->indirect || info->instance_count > 1))
> +                   instanced_or_indirect_draw)
>  			partial_vs_wave = true;
> -#endif
> +
>  		/* If the WD switch is false, the IA switch must be false too. */
>  		assert(wd_switch_on_eop || !ia_switch_on_eop);
>  	}
> @@ -627,19 +665,19 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer
> *cmd_buffer)
>  	if (ia_switch_on_eoi)
>  		partial_es_wave = true;
>  
> -       /* GS requirement. */
> -       if (SI_GS_PER_ES / primgroup_size >=
> cmd_buffer->device->gs_table_depth - 3)
> -               partial_es_wave = true;
> +       if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
> +               /* GS requirement. */
> +               if (SI_GS_PER_ES / primgroup_size >=
> cmd_buffer->device->gs_table_depth - 3)
> +                       partial_es_wave = true;
> +
> +               /* Hw bug with single-primitive instances and
> SWITCH_ON_EOI
> +                * on multi-SE chips. */
> +               if (info->max_se >= 2 && ia_switch_on_eoi &&
> +                   (instanced_or_indirect_draw &&
> +                    num_prims <= 1))
> +                       cmd_buffer->state.flush_bits |=
> RADV_CMD_FLAG_VGT_FLUSH;
> +       }
>  
> -       /* Hw bug with single-primitive instances and SWITCH_ON_EOI
> -        * on multi-SE chips. */
> -#if 0
> -       if (info->max_se >= 2 && ia_switch_on_eoi &&
> -           (info->indirect ||
> -            (info->instance_count > 1 &&
> -             si_num_prims_for_vertices(info) <= 1)))
> -               sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
> -#endif
>  	return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
>  		S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
>  		S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
> -- 
> 2.7.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list