[Mesa-dev] [PATCH 4/6] ac/nir: rewrite shared variable handling
Connor Abbott
connora at valvesoftware.com
Thu Jul 6 19:48:45 UTC 2017
From: Connor Abbott <cwabbott0 at gmail.com>
Similar to before, do the direct NIR->LLVM translation instead of
lowering to an array then back to a variable. This should fix indirect
dereferences, make shared variables more tightly packed, and make LLVM's
alias analysis more precise.
---
src/amd/common/ac_nir_to_llvm.c | 116 ++++++++--------------------------------
1 file changed, 23 insertions(+), 93 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index f42d214..743cc1d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -155,7 +155,6 @@ struct nir_to_llvm_context {
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
- LLVMValueRef shared_memory;
uint64_t input_mask;
uint64_t output_mask;
uint8_t num_output_clips;
@@ -386,23 +385,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
CONST_ADDR_SPACE);
}
-static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
- int idx,
- LLVMTypeRef type)
-{
- LLVMValueRef offset;
- LLVMValueRef ptr;
- int addr_space;
-
- offset = LLVMConstInt(ctx->i32, idx * 16, false);
-
- ptr = ctx->shared_memory;
- ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
- addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
- return ptr;
-}
-
static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
if (t == ctx->f16 || t == ctx->i16)
@@ -2986,7 +2968,8 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
values[chan] = ctx->inputs[idx + chan + const_index * 4];
}
break;
- case nir_var_local: {
+ case nir_var_local:
+ case nir_var_shared: {
LLVMValueRef address = build_gep_for_deref(ctx,
instr->variables[0]);
LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
@@ -3016,23 +2999,6 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
}
}
break;
- case nir_var_shared: {
- LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
- LLVMValueRef derived_ptr;
-
- if (indir_index)
- indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
-
- for (unsigned chan = 0; chan < ve; chan++) {
- LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
- if (indir_index)
- index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
- derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
-
- values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
- }
- break;
- }
default:
unreachable("unhandle variable mode");
}
@@ -3107,7 +3073,8 @@ visit_store_var(struct nir_to_llvm_context *ctx,
}
}
break;
- case nir_var_local: {
+ case nir_var_local:
+ case nir_var_shared: {
int writemask = instr->const_index[0];
LLVMValueRef address = build_gep_for_deref(ctx,
instr->variables[0]);
@@ -3137,28 +3104,6 @@ visit_store_var(struct nir_to_llvm_context *ctx,
}
break;
}
- case nir_var_shared: {
- LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
-
- if (indir_index)
- indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
-
- for (unsigned chan = 0; chan < 8; chan++) {
- if (!(writemask & (1 << chan)))
- continue;
- LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
- LLVMValueRef derived_ptr;
-
- if (indir_index)
- index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
-
- value = llvm_extract_elem(ctx, src, chan);
- derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
- LLVMBuildStore(ctx->builder,
- to_integer(&ctx->ac, value), derived_ptr);
- }
- break;
- }
default:
break;
}
@@ -3637,9 +3582,8 @@ static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef ptr, result;
- int idx = instr->variables[0]->var->data.driver_location;
LLVMValueRef src = get_src(ctx, instr->src[0]);
- ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
+ ptr = build_gep_for_deref(ctx, instr->variables[0]);
if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
LLVMValueRef src1 = get_src(ctx, instr->src[1]);
@@ -5114,6 +5058,20 @@ setup_locals(struct nir_to_llvm_context *ctx,
}
}
+static void
+setup_shared(struct nir_to_llvm_context *ctx,
+ struct nir_shader *nir)
+{
+ nir_foreach_variable(variable, &nir->shared) {
+ LLVMValueRef shared =
+ LLVMAddGlobalInAddressSpace(
+ ctx->module, glsl_to_llvm_type(ctx, variable->type),
+ variable->name ? variable->name : "",
+ LOCAL_ADDR_SPACE);
+ _mesa_hash_table_insert(ctx->vars, variable, shared);
+ }
+}
+
static LLVMValueRef
emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
{
@@ -5907,15 +5865,6 @@ handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
}
}
-static void
-handle_shared_compute_var(struct nir_to_llvm_context *ctx,
- struct nir_variable *variable, uint32_t *offset, int idx)
-{
- unsigned size = glsl_count_attribute_slots(variable->type, false);
- variable->data.driver_location = *offset;
- *offset += size;
-}
-
static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
{
LLVMPassManagerRef passmgr;
@@ -6072,29 +6021,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
create_function(&ctx);
- if (nir->stage == MESA_SHADER_COMPUTE) {
- int num_shared = 0;
- nir_foreach_variable(variable, &nir->shared)
- num_shared++;
- if (num_shared) {
- int idx = 0;
- uint32_t shared_size = 0;
- LLVMValueRef var;
- LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
- nir_foreach_variable(variable, &nir->shared) {
- handle_shared_compute_var(&ctx, variable, &shared_size, idx);
- idx++;
- }
-
- shared_size *= 16;
- var = LLVMAddGlobalInAddressSpace(ctx.module,
- LLVMArrayType(ctx.i8, shared_size),
- "compute_lds",
- LOCAL_ADDR_SPACE);
- LLVMSetAlignment(var, 4);
- ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
- }
- } else if (nir->stage == MESA_SHADER_GEOMETRY) {
+ if (nir->stage == MESA_SHADER_GEOMETRY) {
ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, "gs_next_vertex");
ctx.gs_max_out_vertices = nir->info.gs.vertices_out;
@@ -6127,6 +6054,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
setup_locals(&ctx, func);
+ if (nir->stage == MESA_SHADER_COMPUTE)
+ setup_shared(&ctx, nir);
+
visit_cf_list(&ctx, &func->impl->body);
phi_post_pass(&ctx);
--
2.9.4
More information about the mesa-dev
mailing list