Mesa (main): radeonsi: Change GS vertex offset arguments to use gs_vtx_offset array.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Aug 26 05:41:27 UTC 2021
Module: Mesa
Branch: main
Commit: 346eb08a3d2561ecc28f11606976342615eaaa44
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=346eb08a3d2561ecc28f11606976342615eaaa44
Author: Timur Kristóf <timur.kristof at gmail.com>
Date: Tue Aug 24 00:27:58 2021 +0200
radeonsi: Change GS vertex offset arguments to use gs_vtx_offset array.
This makes RadeonSI behave like RADV, and hence makes it possible to
use the NIR based ESGS I/O lowering and NGG lowering.
Signed-off-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12511>
---
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 31 ++++++++++-------------
src/gallium/drivers/radeonsi/si_shader.c | 6 ++---
src/gallium/drivers/radeonsi/si_shader_internal.h | 4 ---
src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 23 ++++-------------
4 files changed, 21 insertions(+), 43 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index edfe0ee3645..a8a4e94a092 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -153,7 +153,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use
if (prim_passthrough)
prim.passthrough = prim_passthrough;
else
- prim.passthrough = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset);
+ prim.passthrough = ac_get_arg(&ctx->ac, ctx->args.gs_vtx_offset[0]);
/* This is only used with NGG culling, which returns the NGG
* passthrough prim export encoding.
@@ -190,9 +190,8 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use
ngg_get_vertices_per_prim(ctx, &prim.num_vertices);
prim.isnull = ctx->ac.i1false;
- prim.index[0] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 0, 16);
- prim.index[1] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 16, 16);
- prim.index[2] = si_unpack_param(ctx, ctx->gs_vtx23_offset, 0, 16);
+ for (unsigned i = 0; i < 3; ++i)
+ prim.index[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[i / 2], (i & 1) * 16, 16);
for (unsigned i = 0; i < prim.num_vertices; ++i) {
prim.edgeflag[i] = ngg_get_initial_edgeflag(ctx, i);
@@ -908,13 +907,11 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
/* For the GS fast launch, the VS prolog simply puts the Vertex IDs
* into these VGPRs.
*/
- vtxindex[0] = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset);
- vtxindex[1] = ac_get_arg(&ctx->ac, ctx->gs_vtx23_offset);
- vtxindex[2] = ac_get_arg(&ctx->ac, ctx->gs_vtx45_offset);
+ for (unsigned i = 0; i < 3; ++i)
+ vtxindex[i] = ac_get_arg(&ctx->ac, ctx->args.gs_vtx_offset[i]);
} else {
- vtxindex[0] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 0, 16);
- vtxindex[1] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 16, 16);
- vtxindex[2] = si_unpack_param(ctx, ctx->gs_vtx23_offset, 0, 16);
+ for (unsigned i = 0; i < 3; ++i)
+ vtxindex[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[i / 2], (i & 1) * 16, 16);
};
LLVMValueRef gs_vtxptr[] = {
ngg_nogs_vertex_ptr(ctx, vtxindex[0]),
@@ -1237,11 +1234,11 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
val = LLVMBuildLoad(builder, new_vgpr0, "");
ret = LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, val), vgpr++, "");
- vgpr++; /* gs_vtx23_offset */
+ vgpr++; /* gs_vtx_offset[1] = offsets of vertices 2-3 */
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_prim_id, vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
- vgpr++; /* gs_vtx45_offset */
+ vgpr++; /* gs_vtx_offset[2] = offsets of vertices 4-5 */
/* Set the input VPGRs to the corresponding LDS addresses where the VGPR values are
* stored. The VS prolog will load them.
@@ -1340,13 +1337,11 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
LLVMValueRef vtxindex[3];
if (ctx->shader->key.opt.ngg_culling) {
- vtxindex[0] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 0, 9);
- vtxindex[1] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 10, 9);
- vtxindex[2] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 20, 9);
+ for (unsigned i = 0; i < 3; ++i)
+ vtxindex[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[0], 10 * i, 9);
} else {
- vtxindex[0] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 0, 16);
- vtxindex[1] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 16, 16);
- vtxindex[2] = si_unpack_param(ctx, ctx->gs_vtx23_offset, 0, 16);
+ for (unsigned i = 0; i < 3; ++i)
+ vtxindex[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[i / 2], (i & 1) * 16, 16);
}
/* Determine the number of vertices per primitive. */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0cdd3a9f539..30203c011da 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -553,11 +553,11 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
declare_vb_descriptor_input_sgprs(ctx);
/* VGPRs (first GS, then VS/TES) */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx01_offset);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx23_offset);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[0]);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[1]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_prim_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_invocation_id);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx45_offset);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[2]);
if (ctx->stage == MESA_SHADER_VERTEX) {
declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 46d8e69b985..3b56b8f6338 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -134,10 +134,6 @@ struct si_shader_context {
/* API TES */
struct ac_arg tes_offchip_addr;
- /* API GS */
- struct ac_arg gs_vtx01_offset; /* in dwords (GFX9) */
- struct ac_arg gs_vtx23_offset; /* in dwords (GFX9) */
- struct ac_arg gs_vtx45_offset; /* in dwords (GFX9) */
/* PS */
struct ac_arg pos_fixed_pt;
/* CS */
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
index 67d7150d694..da74f7f6cf8 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -57,21 +57,8 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in
/* GFX9 has the ESGS ring in LDS. */
if (ctx->screen->info.chip_class >= GFX9) {
unsigned index = vtx_offset_param;
-
- switch (index / 2) {
- case 0:
- vtx_offset = si_unpack_param(ctx, ctx->gs_vtx01_offset, index % 2 ? 16 : 0, 16);
- break;
- case 1:
- vtx_offset = si_unpack_param(ctx, ctx->gs_vtx23_offset, index % 2 ? 16 : 0, 16);
- break;
- case 2:
- vtx_offset = si_unpack_param(ctx, ctx->gs_vtx45_offset, index % 2 ? 16 : 0, 16);
- break;
- default:
- assert(0);
- return NULL;
- }
+ vtx_offset =
+ si_unpack_param(ctx, ctx->args.gs_vtx_offset[index / 2], (index & 1) * 16, 16);
unsigned offset = param * 4 + swizzle;
vtx_offset =
@@ -137,11 +124,11 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
unsigned vgpr = 8 + SI_NUM_VS_STATE_RESOURCE_SGPRS;
- ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx01_offset, vgpr++);
- ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx23_offset, vgpr++);
+ ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_vtx_offset[0], vgpr++);
+ ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_vtx_offset[1], vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_prim_id, vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
- ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx45_offset, vgpr++);
+ ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_vtx_offset[2], vgpr++);
ctx->return_value = ret;
}
More information about the mesa-commit
mailing list