[Mesa-dev] [PATCH v2 23/25] radeonsi: use a single descriptor for the GSVS ring
Nicolai Hähnle
nhaehnle at gmail.com
Tue Dec 6 10:48:34 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
We can hardcode all of the fields for swizzling in the geometry shader.
The advantage is that we use fewer descriptor slots and we no longer have to
update any of the (ring) descriptors when the geometry shader changes.
---
src/gallium/drivers/radeonsi/si_pipe.h | 1 -
src/gallium/drivers/radeonsi/si_shader.c | 73 ++++++++++++++++++++++---
src/gallium/drivers/radeonsi/si_state.h | 6 +-
src/gallium/drivers/radeonsi/si_state_shaders.c | 37 +------------
4 files changed, 67 insertions(+), 50 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 42cbecb..2409c85 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -315,21 +315,20 @@ struct si_context {
int last_primitive_restart_en;
int last_restart_index;
int last_gs_out_prim;
int last_prim;
int last_multi_vgt_param;
int last_rast_prim;
unsigned last_sc_line_stipple;
int last_vtx_reuse_depth;
int current_rast_prim; /* primitive type after TES, GS */
bool gs_tri_strip_adj_fix;
- unsigned last_gsvs_itemsize;
/* Scratch buffer */
struct r600_resource *scratch_buffer;
bool emit_scratch_reloc;
unsigned scratch_waves;
unsigned spi_tmpring_size;
struct r600_resource *compute_scratch_buffer;
/* Emitted derived tessellation state. */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index e3ed0d3..b22248e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5690,51 +5690,108 @@ static void create_function(struct si_shader_context *ctx)
}
/**
* Load ESGS and GSVS ring buffer resource descriptors and save the variables
* for later use.
*/
static void preload_ring_buffers(struct si_shader_context *ctx)
{
struct gallivm_state *gallivm =
ctx->soa.bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
SI_PARAM_RW_BUFFERS);
if ((ctx->type == PIPE_SHADER_VERTEX &&
ctx->shader->key.as_es) ||
(ctx->type == PIPE_SHADER_TESS_EVAL &&
ctx->shader->key.as_es) ||
ctx->type == PIPE_SHADER_GEOMETRY) {
unsigned ring =
ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
: SI_ES_RING_ESGS;
LLVMValueRef offset = lp_build_const_int32(gallivm, ring);
ctx->esgs_ring =
build_indexed_load_const(ctx, buf_ptr, offset);
}
if (ctx->shader->is_gs_copy_shader) {
- LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_RING_GSVS);
+ LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
ctx->gsvs_ring[0] =
build_indexed_load_const(ctx, buf_ptr, offset);
- }
- if (ctx->type == PIPE_SHADER_GEOMETRY) {
- int i;
- for (i = 0; i < 4; i++) {
- LLVMValueRef offset = lp_build_const_int32(gallivm, SI_GS_RING_GSVS0 + i);
+ } else if (ctx->type == PIPE_SHADER_GEOMETRY) {
+ struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
+ LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
+ LLVMValueRef base_ring;
+
+ base_ring = build_indexed_load_const(ctx, buf_ptr, offset);
+
+ /* The conceptual layout of the GSVS ring is
+ * v0c0 .. vLv0 v0c1 .. vLc1 ..
+ * but the real memory layout is swizzled across
+ * threads:
+ * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+ * t16v0c0 ..
+ * Override the buffer descriptor accordingly.
+ */
+ LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
+ unsigned max_gsvs_emit_size = ctx->shader->selector->max_gsvs_emit_size;
+ unsigned num_records;
+
+ num_records = 64;
+ if (ctx->screen->b.chip_class >= VI)
+ num_records *= max_gsvs_emit_size;
+
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ LLVMValueRef ring, tmp;
+
+ if (!ctx->shader->selector->info.num_stream_output_components[stream])
+ continue;
- ctx->gsvs_ring[i] =
- build_indexed_load_const(ctx, buf_ptr, offset);
+ /* Limit on the stride field for <= CIK. */
+ assert(max_gsvs_emit_size < (1 << 14));
+
+ ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
+ tmp = LLVMBuildExtractElement(builder, ring, uint->zero, "");
+ tmp = LLVMBuildAdd(builder, tmp,
+ LLVMConstInt(ctx->i64,
+ max_gsvs_emit_size * 64 * stream, 0), "");
+ ring = LLVMBuildInsertElement(builder, ring, tmp, uint->zero, "");
+ ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, "");
+ tmp = LLVMBuildExtractElement(builder, ring, uint->one, "");
+ tmp = LLVMBuildOr(builder, tmp,
+ LLVMConstInt(ctx->i32,
+ S_008F04_STRIDE(max_gsvs_emit_size) |
+ S_008F04_SWIZZLE_ENABLE(1), 0), "");
+ ring = LLVMBuildInsertElement(builder, ring, tmp, uint->one, "");
+ ring = LLVMBuildInsertElement(builder, ring,
+ LLVMConstInt(ctx->i32, num_records, 0),
+ LLVMConstInt(ctx->i32, 2, 0), "");
+ ring = LLVMBuildInsertElement(builder, ring,
+ LLVMConstInt(ctx->i32,
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */
+ S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
+ S_008F0C_ADD_TID_ENABLE(1),
+ 0),
+ LLVMConstInt(ctx->i32, 3, 0), "");
+ ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, "");
+
+ ctx->gsvs_ring[stream] = ring;
}
}
}
static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
LLVMValueRef param_rw_buffers,
unsigned param_pos_fixed_pt)
{
struct lp_build_tgsi_context *bld_base =
&ctx->soa.bld_base;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 3a9f0cf..9ea52ea 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -160,25 +160,21 @@ struct si_shader_data {
};
/* Private read-write buffer slots. */
enum {
SI_HS_RING_TESS_FACTOR,
SI_HS_RING_TESS_OFFCHIP,
SI_ES_RING_ESGS,
SI_GS_RING_ESGS,
- SI_GS_RING_GSVS0,
- SI_GS_RING_GSVS1,
- SI_GS_RING_GSVS2,
- SI_GS_RING_GSVS3,
- SI_VS_RING_GSVS,
+ SI_RING_GSVS,
SI_VS_STREAMOUT_BUF0,
SI_VS_STREAMOUT_BUF1,
SI_VS_STREAMOUT_BUF2,
SI_VS_STREAMOUT_BUF3,
SI_HS_CONST_DEFAULT_TESS_LEVELS,
SI_VS_CONST_CLIP_PLANES,
SI_PS_CONST_POLY_STIPPLE,
SI_PS_CONST_SAMPLE_POSITIONS,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ea71569..1e9f5f0 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2032,61 +2032,28 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
/* Set ring bindings. */
if (sctx->esgs_ring) {
si_set_ring_buffer(&sctx->b.b, SI_ES_RING_ESGS,
sctx->esgs_ring, 0, sctx->esgs_ring->width0,
true, true, 4, 64, 0);
si_set_ring_buffer(&sctx->b.b, SI_GS_RING_ESGS,
sctx->esgs_ring, 0, sctx->esgs_ring->width0,
false, false, 0, 0, 0);
}
if (sctx->gsvs_ring) {
- si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS,
+ si_set_ring_buffer(&sctx->b.b, SI_RING_GSVS,
sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
false, false, 0, 0, 0);
-
- /* Also update SI_GS_RING_GSVSi descriptors. */
- sctx->last_gsvs_itemsize = 0;
}
return true;
}
-static void si_update_gsvs_ring_bindings(struct si_context *sctx)
-{
- unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
- uint64_t offset;
-
- if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize)
- return;
-
- sctx->last_gsvs_itemsize = gsvs_itemsize;
-
- si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS0,
- sctx->gsvs_ring, gsvs_itemsize,
- 64, true, true, 4, 16, 0);
-
- offset = gsvs_itemsize * 64;
- si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS1,
- sctx->gsvs_ring, gsvs_itemsize,
- 64, true, true, 4, 16, offset);
-
- offset = (gsvs_itemsize * 2) * 64;
- si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS2,
- sctx->gsvs_ring, gsvs_itemsize,
- 64, true, true, 4, 16, offset);
-
- offset = (gsvs_itemsize * 3) * 64;
- si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS3,
- sctx->gsvs_ring, gsvs_itemsize,
- 64, true, true, 4, 16, offset);
-}
-
/**
* @returns 1 if \p sel has been updated to use a new scratch buffer
* 0 if not
* < 0 if there was a failure
*/
static int si_update_scratch_buffer(struct si_context *sctx,
struct si_shader *shader)
{
uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
int r;
@@ -2462,22 +2429,20 @@ bool si_update_shaders(struct si_context *sctx)
if (sctx->gs_shader.cso) {
r = si_shader_select(ctx, &sctx->gs_shader);
if (r)
return false;
si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
si_pm4_bind_state(sctx, vs, sctx->gs_shader.cso->gs_copy_shader->pm4);
si_update_so(sctx, sctx->gs_shader.cso);
if (!si_update_gs_ring_buffers(sctx))
return false;
-
- si_update_gsvs_ring_bindings(sctx);
} else {
si_pm4_bind_state(sctx, gs, NULL);
si_pm4_bind_state(sctx, es, NULL);
}
si_update_vgt_shader_config(sctx);
if (sctx->ps_shader.cso) {
unsigned db_shader_control;
--
2.7.4
More information about the mesa-dev
mailing list