Mesa (main): radeonsi: gather pipe_stream_output_info from NIR intrinsics
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Apr 22 22:47:27 UTC 2022
Module: Mesa
Branch: main
Commit: b57a163b7da52c92eac07147f6e0a61ee27a6da1
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b57a163b7da52c92eac07147f6e0a61ee27a6da1
Author: Marek Olšák <marek.olsak at amd.com>
Date: Sun Dec 19 20:10:03 2021 -0500
radeonsi: gather pipe_stream_output_info from NIR intrinsics
This stops pipe_stream_output_info from create_*s_state context functions
because NIR contains everything and can do more advanced shader linking
this way.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14414>
---
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 4 +--
src/gallium/drivers/radeonsi/si_pipe.h | 2 +-
src/gallium/drivers/radeonsi/si_shader.c | 9 +++--
src/gallium/drivers/radeonsi/si_shader.h | 4 +--
src/gallium/drivers/radeonsi/si_shader_info.c | 11 ++++++
src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 3 +-
src/gallium/drivers/radeonsi/si_state_shaders.cpp | 41 +++++++++--------------
src/gallium/drivers/radeonsi/si_state_streamout.c | 2 +-
8 files changed, 41 insertions(+), 35 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 02545073f64..6e2e5cff573 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -606,7 +606,7 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
/* The edgeflag is always stored in the last element that's also
* used for padding to reduce LDS bank conflicts. */
- if (shader->selector->so.num_outputs)
+ if (shader->selector->info.enabled_streamout_buffer_mask)
lds_vertex_size = 4 * shader->selector->info.num_outputs + 1;
if (gfx10_ngg_writes_user_edgeflags(shader))
lds_vertex_size = MAX2(lds_vertex_size, 1);
@@ -2169,7 +2169,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
{
const struct si_shader_selector *sel = shader->selector;
- if (sel->info.stage == MESA_SHADER_GEOMETRY && sel->so.num_outputs)
+ if (sel->info.stage == MESA_SHADER_GEOMETRY && sel->info.enabled_streamout_buffer_mask)
return 44;
return 8;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 38cdc43c75e..50d8a4cc999 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -814,7 +814,7 @@ struct si_streamout {
/* External state which comes from the vertex shader,
* it must be set explicitly when binding a shader. */
- uint16_t *stride_in_dw;
+ uint8_t *stride_in_dw;
unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 4a2960bf01b..4e0784880ed 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -27,6 +27,7 @@
#include "nir.h"
#include "nir_builder.h"
#include "nir_serialize.h"
+#include "nir/nir_helpers.h"
#include "si_pipe.h"
#include "si_shader_internal.h"
#include "sid.h"
@@ -1587,7 +1588,9 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
bool free_nir;
struct nir_shader *nir = si_get_nir_shader(sel, &shader->key, &free_nir);
- struct pipe_stream_output_info so = sel->so;
+ struct pipe_stream_output_info so = {};
+ if (sel->info.enabled_streamout_buffer_mask)
+ nir_gather_stream_output_info(nir, &so);
/* Dump NIR before doing NIR->LLVM conversion in case the
* conversion fails. */
@@ -1616,7 +1619,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
/* The GS copy shader is compiled next. */
if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
- shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
+ shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, &so, debug);
if (!shader->gs_copy_shader) {
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
return false;
@@ -2312,7 +2315,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
shader->uses_vs_state_outprim = sscreen->use_ngg &&
/* Only used by streamout in vertex shaders. */
sel->info.stage == MESA_SHADER_VERTEX &&
- sel->so.num_outputs;
+ sel->info.enabled_streamout_buffer_mask;
if (sel->info.stage == MESA_SHADER_VERTEX) {
shader->uses_base_instance = sel->info.uses_base_instance ||
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 577e822bc0c..9e193d4b0c8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -367,6 +367,7 @@ struct si_shader_info {
int constbuf0_num_slots;
ubyte num_stream_output_components[4];
+ uint16_t enabled_streamout_buffer_mask;
uint num_memory_stores;
@@ -459,7 +460,6 @@ struct si_shader_selector {
void *nir_binary;
unsigned nir_size;
- struct pipe_stream_output_info so;
struct si_shader_info info;
enum pipe_shader_type pipe_shader_type;
@@ -486,7 +486,6 @@ struct si_shader_selector {
uint16_t gsvs_vertex_size;
ubyte gs_input_verts_per_prim;
unsigned max_gsvs_emit_size;
- uint16_t enabled_streamout_buffer_mask;
bool tess_turns_off_ngg;
/* PS parameters. */
@@ -959,6 +958,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
struct ac_llvm_compiler *compiler,
struct si_shader_selector *gs_selector,
+ const struct pipe_stream_output_info *so,
struct util_debug_callback *debug);
/* si_shader_nir.c */
diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c
index 26c309b2dca..b9a5c9a8d46 100644
--- a/src/gallium/drivers/radeonsi/si_shader_info.c
+++ b/src/gallium/drivers/radeonsi/si_shader_info.c
@@ -325,6 +325,7 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
(nir_intrinsic_component(intr) * 2);
unsigned new_mask = mask & ~info->output_usagemask[loc];
+ /* Iterate over all components. */
for (unsigned i = 0; i < 4; i++) {
unsigned stream = (gs_streams >> (i * 2)) & 0x3;
@@ -332,6 +333,16 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
info->output_streams[loc] |= stream << (i * 2);
info->num_stream_output_components[stream]++;
}
+
+ if (nir_intrinsic_has_io_xfb(intr)) {
+ nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) :
+ nir_intrinsic_io_xfb2(intr);
+ if (xfb.out[i % 2].num_components) {
+ unsigned stream = (gs_streams >> (i * 2)) & 0x3;
+ info->enabled_streamout_buffer_mask |=
+ BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer);
+ }
+ }
}
if (nir_intrinsic_has_src_type(intr))
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
index 9cfca4dc5dc..6a570ceff14 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -422,6 +422,7 @@ void si_preload_gs_rings(struct si_shader_context *ctx)
struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
struct ac_llvm_compiler *compiler,
struct si_shader_selector *gs_selector,
+ const struct pipe_stream_output_info *so,
struct util_debug_callback *debug)
{
struct si_shader_context ctx;
@@ -446,7 +447,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
ctx.shader = shader;
ctx.stage = MESA_SHADER_VERTEX;
- ctx.so = gs_selector->so;
+ ctx.so = *so;
builder = ctx.ac.builder;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 7575d1631eb..0ee4bf523da 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -203,9 +203,6 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
_mesa_sha1_init(&ctx);
_mesa_sha1_update(&ctx, &shader_variant_flags, 4);
_mesa_sha1_update(&ctx, ir_binary, ir_size);
- if (sel->info.stage == MESA_SHADER_VERTEX || sel->info.stage == MESA_SHADER_TESS_EVAL ||
- sel->info.stage == MESA_SHADER_GEOMETRY)
- _mesa_sha1_update(&ctx, &sel->so, sizeof(sel->so));
_mesa_sha1_final(&ctx, ir_sha1_cache_key);
if (ir_binary == blob.data)
@@ -1512,7 +1509,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
}
shader->ctx_reg.ngg.vgt_stages.u.ngg = 1;
- shader->ctx_reg.ngg.vgt_stages.u.streamout = gs_sel->so.num_outputs;
+ shader->ctx_reg.ngg.vgt_stages.u.streamout = !!gs_sel->info.enabled_streamout_buffer_mask;
shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
shader->ctx_reg.ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32;
}
@@ -1702,11 +1699,11 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
if (!sscreen->use_ngg_streamout) {
- rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
- S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
- S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
- S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
- S_00B12C_SO_EN(!!shader->selector->so.num_outputs);
+ rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) |
+ S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) |
+ S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) |
+ S_00B12C_SO_BASE3_EN(!!shader->selector->info.base.xfb_stride[3]) |
+ S_00B12C_SO_EN(!!info->enabled_streamout_buffer_mask);
}
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
@@ -2783,7 +2780,7 @@ int si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state
}
}
-static void si_parse_next_shader_property(const struct si_shader_info *info, bool streamout,
+static void si_parse_next_shader_property(const struct si_shader_info *info,
union si_shader_key *key)
{
gl_shader_stage next_shader = info->base.next_stage;
@@ -2804,7 +2801,7 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo
* assume that it's a HW LS. (the next shader is TCS)
* This heuristic is needed for separate shader objects.
*/
- if (!info->writes_position && !streamout)
+ if (!info->writes_position && !info->enabled_streamout_buffer_mask)
key->ge.as_ls = 1;
}
break;
@@ -2874,10 +2871,11 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
shader->selector = sel;
shader->is_monolithic = false;
- si_parse_next_shader_property(&sel->info, sel->so.num_outputs != 0, &shader->key);
+ si_parse_next_shader_property(&sel->info, &shader->key);
if (sel->info.stage <= MESA_SHADER_GEOMETRY &&
- sscreen->use_ngg && (!sel->so.num_outputs || sscreen->use_ngg_streamout) &&
+ sscreen->use_ngg && (!sel->info.enabled_streamout_buffer_mask ||
+ sscreen->use_ngg_streamout) &&
((sel->info.stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) ||
sel->info.stage == MESA_SHADER_TESS_EVAL || sel->info.stage == MESA_SHADER_GEOMETRY))
shader->key.ge.as_ngg = 1;
@@ -3035,8 +3033,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->compiler_ctx_state.debug = sctx->debug;
sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
- sel->so = state->stream_output;
-
if (state->type == PIPE_SHADER_IR_TGSI) {
sel->nir = tgsi_to_nir(state->tokens, ctx->screen, true);
} else {
@@ -3057,12 +3053,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
&sel->active_samplers_and_images);
- /* Record which streamout buffers are enabled. */
- for (unsigned i = 0; i < sel->so.num_outputs; i++) {
- sel->enabled_streamout_buffer_mask |= (1 << sel->so.output[i].output_buffer)
- << (sel->so.output[i].stream * 4);
- }
-
sel->num_vs_inputs =
sel->info.stage == MESA_SHADER_VERTEX && !sel->info.base.vs.blit_sgprs_amd
? sel->info.num_inputs
@@ -3197,7 +3187,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
!sel->info.writes_viewport_index && /* cull only against viewport 0 */
!sel->info.base.writes_memory &&
/* NGG GS supports culling with streamout because it culls after streamout. */
- (sel->info.stage == MESA_SHADER_GEOMETRY || !sel->so.num_outputs) &&
+ (sel->info.stage == MESA_SHADER_GEOMETRY || !sel->info.enabled_streamout_buffer_mask) &&
(sel->info.stage != MESA_SHADER_GEOMETRY || sel->info.num_stream_output_components[0]) &&
(sel->info.stage != MESA_SHADER_VERTEX ||
(!sel->info.base.vs.blit_sgprs_amd &&
@@ -3312,8 +3302,8 @@ static void si_update_streamout_state(struct si_context *sctx)
if (!shader_with_so)
return;
- sctx->streamout.enabled_stream_buffers_mask = shader_with_so->enabled_streamout_buffer_mask;
- sctx->streamout.stride_in_dw = shader_with_so->so.stride;
+ sctx->streamout.enabled_stream_buffers_mask = shader_with_so->info.enabled_streamout_buffer_mask;
+ sctx->streamout.stride_in_dw = shader_with_so->info.base.xfb_stride;
}
static void si_update_clip_regs(struct si_context *sctx, struct si_shader_selector *old_hw_vs,
@@ -3440,7 +3430,8 @@ bool si_update_ngg(struct si_context *sctx)
} else if (!sctx->screen->use_ngg_streamout) {
struct si_shader_selector *last = si_get_vs(sctx)->cso;
- if ((last && last->so.num_outputs) || sctx->streamout.prims_gen_query_enabled)
+ if ((last && last->info.enabled_streamout_buffer_mask) ||
+ sctx->streamout.prims_gen_query_enabled)
new_ngg = false;
}
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index b5557610947..0406c7d0198 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -308,7 +308,7 @@ static void si_emit_streamout_begin(struct si_context *sctx)
{
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
struct si_streamout_target **t = sctx->streamout.targets;
- uint16_t *stride_in_dw = sctx->streamout.stride_in_dw;
+ uint8_t *stride_in_dw = sctx->streamout.stride_in_dw;
unsigned i;
si_flush_vgt_streamout(sctx);
More information about the mesa-commit
mailing list