[Mesa-dev] [PATCH 01/13] radeonsi: remove 8 bytes from si_shader_key by flattening opt.hw_vs
Nicolai Hähnle
nhaehnle at gmail.com
Mon Jun 12 09:43:58 UTC 2017
Eh. It makes sense, but I really, really wish there was some attribute
with which could make the compiler automatically do this kind of packing
for us. Anyway... apart from a minor nitpick on patch 7, the series is
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
On 10.06.2017 18:39, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 10 +++++-----
> src/gallium/drivers/radeonsi/si_shader.h | 7 +++----
> src/gallium/drivers/radeonsi/si_state.c | 2 +-
> src/gallium/drivers/radeonsi/si_state_shaders.c | 12 ++++++------
> 4 files changed, 15 insertions(+), 16 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 2c92269..a6b7e5e 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2282,21 +2282,21 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
> case TGSI_SEMANTIC_PSIZE:
> case TGSI_SEMANTIC_CLIPVERTEX:
> case TGSI_SEMANTIC_EDGEFLAG:
> break;
> case TGSI_SEMANTIC_GENERIC:
> /* don't process indices the function can't handle */
> if (semantic_index >= SI_MAX_IO_GENERIC)
> break;
> /* fall through */
> default:
> - if (shader->key.opt.hw_vs.kill_outputs &
> + if (shader->key.opt.kill_outputs &
> (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
> export_param = false;
> }
>
> if (outputs[i].vertex_stream[0] != 0 &&
> outputs[i].vertex_stream[1] != 0 &&
> outputs[i].vertex_stream[2] != 0 &&
> outputs[i].vertex_stream[3] != 0)
> export_param = false;
>
> @@ -2314,28 +2314,28 @@ handle_semantic:
> semantic_name = TGSI_SEMANTIC_GENERIC;
> goto handle_semantic;
> case TGSI_SEMANTIC_VIEWPORT_INDEX:
> viewport_index_value = outputs[i].values[0];
> semantic_name = TGSI_SEMANTIC_GENERIC;
> goto handle_semantic;
> case TGSI_SEMANTIC_POSITION:
> target = V_008DFC_SQ_EXP_POS;
> break;
> case TGSI_SEMANTIC_CLIPDIST:
> - if (shader->key.opt.hw_vs.clip_disable) {
> + if (shader->key.opt.clip_disable) {
> semantic_name = TGSI_SEMANTIC_GENERIC;
> goto handle_semantic;
> }
> target = V_008DFC_SQ_EXP_POS + 2 + semantic_index;
> break;
> case TGSI_SEMANTIC_CLIPVERTEX:
> - if (shader->key.opt.hw_vs.clip_disable)
> + if (shader->key.opt.clip_disable)
> continue;
> si_llvm_emit_clipvertex(bld_base, pos_args, outputs[i].values);
> continue;
> case TGSI_SEMANTIC_COLOR:
> case TGSI_SEMANTIC_BCOLOR:
> case TGSI_SEMANTIC_PRIMID:
> case TGSI_SEMANTIC_FOG:
> case TGSI_SEMANTIC_TEXCOORD:
> case TGSI_SEMANTIC_GENERIC:
> if (!export_param)
> @@ -5328,22 +5328,22 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade
> break;
>
> default:
> assert(0);
> }
>
> if ((processor == PIPE_SHADER_GEOMETRY ||
> processor == PIPE_SHADER_TESS_EVAL ||
> processor == PIPE_SHADER_VERTEX) &&
> !key->as_es && !key->as_ls) {
> - fprintf(f, " opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
> - fprintf(f, " opt.hw_vs.clip_disable = %u\n", key->opt.hw_vs.clip_disable);
> + fprintf(f, " opt.kill_outputs = 0x%"PRIx64"\n", key->opt.kill_outputs);
> + fprintf(f, " opt.clip_disable = %u\n", key->opt.clip_disable);
> }
> }
>
> static void si_init_shader_ctx(struct si_shader_context *ctx,
> struct si_screen *sscreen,
> LLVMTargetMachineRef tm)
> {
> struct lp_build_tgsi_context *bld_base;
>
> si_llvm_context_init(ctx, sscreen, tm);
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 7c04b7e..de520a2 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -493,24 +493,23 @@ struct si_shader_key {
>
> union {
> uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */
> /* When PS needs PrimID and GS is disabled. */
> unsigned vs_export_prim_id:1;
> } u;
> } mono;
>
> /* Optimization flags for asynchronous compilation only. */
> struct {
> - struct {
> - uint64_t kill_outputs; /* "get_unique_index" bits */
> - unsigned clip_disable:1;
> - } hw_vs; /* HW VS (it can be VS, TES, GS) */
> + /* For HW VS (it can be VS, TES, GS) */
> + uint64_t kill_outputs; /* "get_unique_index" bits */
> + unsigned clip_disable:1;
>
> /* For shaders where monolithic variants have better code.
> *
> * This is a flag that has no effect on code generation,
> * but forces monolithic shaders to be used as soon as
> * possible, because it's in the "opt" group.
> */
> unsigned prefer_mono:1;
> } opt;
> };
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index a8255f2..27a88a8 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -688,21 +688,21 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
> struct si_shader_selector *vs_sel = vs->selector;
> struct tgsi_shader_info *info = &vs_sel->info;
> struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
> unsigned window_space =
> info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
> unsigned clipdist_mask = vs_sel->clipdist_mask;
> unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
> unsigned culldist_mask = vs_sel->culldist_mask;
> unsigned total_mask;
>
> - if (vs->key.opt.hw_vs.clip_disable) {
> + if (vs->key.opt.clip_disable) {
> assert(!info->culldist_writemask);
> clipdist_mask = 0;
> culldist_mask = 0;
> }
> total_mask = clipdist_mask | culldist_mask;
>
> /* Clip distances on points have no effect, so need to be implemented
> * as cull distances. This applies for the clipvertex case as well.
> *
> * Setting this for primitives other than points should have no adverse
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 677a6de..07e6a42 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -1196,21 +1196,21 @@ static void si_shader_selector_key_vs(struct si_context *sctx,
>
> memcpy(key->mono.vs_fix_fetch, sctx->vertex_elements->fix_fetch, count);
> }
>
> static void si_shader_selector_key_hw_vs(struct si_context *sctx,
> struct si_shader_selector *vs,
> struct si_shader_key *key)
> {
> struct si_shader_selector *ps = sctx->ps_shader.cso;
>
> - key->opt.hw_vs.clip_disable =
> + key->opt.clip_disable =
> sctx->queued.named.rasterizer->clip_plane_enable == 0 &&
> (vs->info.clipdist_writemask ||
> vs->info.writes_clipvertex) &&
> !vs->info.culldist_writemask;
>
> /* Find out if PS is disabled. */
> bool ps_disabled = true;
> if (ps) {
> bool ps_modifies_zs = ps->info.uses_kill ||
> ps->info.writes_z ||
> @@ -1236,21 +1236,21 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
> /* ignore POSITION, PSIZE */
> outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0) |
> (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0))));
>
> if (!ps_disabled) {
> inputs_read = ps->inputs_read;
> }
>
> uint64_t linked = outputs_written & inputs_read;
>
> - key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
> + key->opt.kill_outputs = ~linked & outputs_written;
> }
>
> /* Compute the key for the hw shader variant */
> static inline void si_shader_selector_key(struct pipe_context *ctx,
> struct si_shader_selector *sel,
> struct si_shader_key *key)
> {
> struct si_context *sctx = (struct si_context *)ctx;
>
> memset(key, 0, sizeof(*key));
> @@ -2188,22 +2188,22 @@ static void si_update_clip_regs(struct si_context *sctx,
> {
> if (next_hw_vs &&
> (!old_hw_vs ||
> old_hw_vs->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] !=
> next_hw_vs->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] ||
> old_hw_vs->pa_cl_vs_out_cntl != next_hw_vs->pa_cl_vs_out_cntl ||
> old_hw_vs->clipdist_mask != next_hw_vs->clipdist_mask ||
> old_hw_vs->culldist_mask != next_hw_vs->culldist_mask ||
> !old_hw_vs_variant ||
> !next_hw_vs_variant ||
> - old_hw_vs_variant->key.opt.hw_vs.clip_disable !=
> - next_hw_vs_variant->key.opt.hw_vs.clip_disable))
> + old_hw_vs_variant->key.opt.clip_disable !=
> + next_hw_vs_variant->key.opt.clip_disable))
> si_mark_atom_dirty(sctx, &sctx->clip_regs);
> }
>
> static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
> {
> struct si_context *sctx = (struct si_context *)ctx;
> struct si_shader_selector *old_hw_vs = si_get_vs(sctx)->cso;
> struct si_shader *old_hw_vs_variant = si_get_vs_state(sctx);
> struct si_shader_selector *sel = state;
>
> @@ -3087,21 +3087,21 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
> }
> si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
> }
>
> bool si_update_shaders(struct si_context *sctx)
> {
> struct pipe_context *ctx = (struct pipe_context*)sctx;
> struct si_compiler_ctx_state compiler_state;
> struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
> struct si_shader *old_vs = si_get_vs_state(sctx);
> - bool old_clip_disable = old_vs ? old_vs->key.opt.hw_vs.clip_disable : false;
> + bool old_clip_disable = old_vs ? old_vs->key.opt.clip_disable : false;
> struct si_shader *old_ps = sctx->ps_shader.current;
> unsigned old_spi_shader_col_format =
> old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
> int r;
>
> compiler_state.tm = sctx->tm;
> compiler_state.debug = sctx->b.debug;
> compiler_state.is_debug_context = sctx->is_debug;
>
> /* Update stages before GS. */
> @@ -3192,21 +3192,21 @@ bool si_update_shaders(struct si_context *sctx)
> if (!si_update_gs_ring_buffers(sctx))
> return false;
> } else {
> si_pm4_bind_state(sctx, gs, NULL);
> if (sctx->b.chip_class <= VI)
> si_pm4_bind_state(sctx, es, NULL);
> }
>
> si_update_vgt_shader_config(sctx);
>
> - if (old_clip_disable != si_get_vs_state(sctx)->key.opt.hw_vs.clip_disable)
> + if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)
> si_mark_atom_dirty(sctx, &sctx->clip_regs);
>
> if (sctx->ps_shader.cso) {
> unsigned db_shader_control;
>
> r = si_shader_select(ctx, &sctx->ps_shader, &compiler_state);
> if (r)
> return false;
> si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
>
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list