[Mesa-dev] [PATCH] ac/nir: replace SI.buffer.load.dword with amdgcn.buffer.load
Samuel Pitoiset
samuel.pitoiset at gmail.com
Thu Feb 1 15:37:15 UTC 2018
The old one generates useless instructions in there, found while
comparing geometry shaders between RadeonSI and RADV.
This improves all Vulkan demos that use geometry shaders, +4%
for deferredshadows, +9% for viewportarray, +7% for
geometryshader on Polaris10.
This seems to also improve DOW3 a little bit (+1%).
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 52 ++++++++++++++++-------------------------
1 file changed, 20 insertions(+), 32 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5c5594956b..f89012bfe9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3063,7 +3063,6 @@ load_gs_input(struct ac_shader_abi *abi,
{
struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
LLVMValueRef vtx_offset;
- LLVMValueRef args[9];
unsigned param, vtx_offset_param;
LLVMValueRef value[4], result;
@@ -3081,20 +3080,16 @@ load_gs_input(struct ac_shader_abi *abi,
LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
value[i] = ac_lds_load(&ctx->ac, dw_addr);
} else {
- args[0] = ctx->esgs_ring;
- args[1] = vtx_offset;
- args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false);
- args[3] = ctx->ac.i32_0;
- args[4] = ctx->ac.i32_1; /* OFFEN */
- args[5] = ctx->ac.i32_0; /* IDXEN */
- args[6] = ctx->ac.i32_1; /* GLC */
- args[7] = ctx->ac.i32_0; /* SLC */
- args[8] = ctx->ac.i32_0; /* TFE */
-
- value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
- ctx->ac.i32, args, 9,
- AC_FUNC_ATTR_READONLY |
- AC_FUNC_ATTR_LEGACY);
+ LLVMValueRef soffset =
+ LLVMConstInt(ctx->ac.i32,
+ (param * 4 + i + const_index) * 256,
+ false);
+
+ value[i] = ac_build_buffer_load(&ctx->ac,
+ ctx->esgs_ring, 1,
+ ctx->ac.i32_0,
+ vtx_offset, soffset,
+ 0, 1, 0, true, false);
}
}
result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
@@ -7213,16 +7208,9 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
static void
ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
{
- LLVMValueRef args[9];
- args[0] = ctx->gsvs_ring;
- args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->ac.i32, 4, false), "");
- args[3] = ctx->ac.i32_0;
- args[4] = ctx->ac.i32_1; /* OFFEN */
- args[5] = ctx->ac.i32_0; /* IDXEN */
- args[6] = ctx->ac.i32_1; /* GLC */
- args[7] = ctx->ac.i32_1; /* SLC */
- args[8] = ctx->ac.i32_0; /* TFE */
-
+ LLVMValueRef vtx_offset =
+ LLVMBuildMul(ctx->builder, ctx->abi.vertex_id,
+ LLVMConstInt(ctx->ac.i32, 4, false), "");
int idx = 0;
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
@@ -7240,16 +7228,16 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
}
for (unsigned j = 0; j < length; j++) {
- LLVMValueRef value;
- args[2] = LLVMConstInt(ctx->ac.i32,
+ LLVMValueRef value, soffset;
+
+ soffset = LLVMConstInt(ctx->ac.i32,
(slot * 4 + j) *
ctx->gs_max_out_vertices * 16 * 4, false);
- value = ac_build_intrinsic(&ctx->ac,
- "llvm.SI.buffer.load.dword.i32.i32",
- ctx->ac.i32, args, 9,
- AC_FUNC_ATTR_READONLY |
- AC_FUNC_ATTR_LEGACY);
+ value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring,
+ 1, ctx->ac.i32_0,
+ vtx_offset, soffset,
+ 0, 1, 1, true, false);
LLVMBuildStore(ctx->builder,
ac_to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
--
2.16.1
More information about the mesa-dev
mailing list