[Mesa-dev] [PATCH] ac/nir: rewrite shared variable handling (v2)

Nicolai Hähnle nhaehnle at gmail.com
Sun Jul 16 18:39:07 UTC 2017


On 14.07.2017 00:39, Connor Abbott wrote:
> From: Connor Abbott <cwabbott0 at gmail.com>
> 
> Translate the NIR variables directly to LLVM instead of lowering to a
> TGSI-style giant array of vec4's and then back to a variable. This
> should fix indirect dereferences, make shared variables more tightly
> packed, and make LLVM's alias analysis more precise. This should fix an
> upcoming Feral title, which has a compute shader that was failing to
> compile because the extra padding made us run out of LDS space.
> 
> v2: Combine the previous two patches into one, only use this for shared
> variables for now until LLVM becomes smarter.
> 
> Cc: Alex Smith <asmith at feralinteractive.com>
> Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen>

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


> ---
> Alex: I made this by squashing together two patches from my series, so it
> should still work, but I'd still like to get your Tested-by before I push to
> make sure I didn't screw something up.
> 
>   src/amd/common/ac_nir_to_llvm.c | 245 ++++++++++++++++++++++++++--------------
>   1 file changed, 158 insertions(+), 87 deletions(-)
> 
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 9223310..9a69066 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -65,6 +65,7 @@ struct nir_to_llvm_context {
>   
>   	struct hash_table *defs;
>   	struct hash_table *phis;
> +	struct hash_table *vars;
>   
>   	LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
>   	LLVMValueRef ring_offsets;
> @@ -154,7 +155,6 @@ struct nir_to_llvm_context {
>   	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
>   	LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
>   
> -	LLVMValueRef shared_memory;
>   	uint64_t input_mask;
>   	uint64_t output_mask;
>   	int num_locals;
> @@ -387,23 +387,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
>   	                       CONST_ADDR_SPACE);
>   }
>   
> -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
> -					  int idx,
> -					  LLVMTypeRef type)
> -{
> -	LLVMValueRef offset;
> -	LLVMValueRef ptr;
> -	int addr_space;
> -
> -	offset = LLVMConstInt(ctx->i32, idx * 16, false);
> -
> -	ptr = ctx->shared_memory;
> -	ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
> -	addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
> -	ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
> -	return ptr;
> -}
> -
>   static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
>   {
>   	if (t == ctx->f16 || t == ctx->i16)
> @@ -2905,6 +2888,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>   	return result;
>   }
>   
> +static LLVMValueRef
> +build_gep_for_deref(struct nir_to_llvm_context *ctx,
> +		    nir_deref_var *deref)
> +{
> +	struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
> +	assert(entry->data);
> +	LLVMValueRef val = entry->data;
> +	nir_deref *tail = deref->deref.child;
> +	while (tail != NULL) {
> +		LLVMValueRef offset;
> +		switch (tail->deref_type) {
> +		case nir_deref_type_array: {
> +			nir_deref_array *array = nir_deref_as_array(tail);
> +			offset = LLVMConstInt(ctx->i32, array->base_offset, 0);
> +			if (array->deref_array_type ==
> +			    nir_deref_array_type_indirect) {
> +				offset = LLVMBuildAdd(ctx->builder, offset,
> +						      get_src(ctx,
> +							      array->indirect),
> +						      "");
> +			}
> +			break;
> +		}
> +		case nir_deref_type_struct: {
> +			nir_deref_struct *deref_struct =
> +				nir_deref_as_struct(tail);
> +			offset = LLVMConstInt(ctx->i32,
> +					      deref_struct->index, 0);
> +			break;
> +		}
> +		default:
> +			unreachable("bad deref type");
> +		}
> +		val = ac_build_gep0(&ctx->ac, val, offset);
> +		tail = tail->child;
> +	}
> +	return val;
> +}
> +
>   static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>   				   nir_intrinsic_instr *instr)
>   {
> @@ -2966,6 +2988,14 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>   			}
>   		}
>   		break;
> +	case nir_var_shared: {
> +		LLVMValueRef address = build_gep_for_deref(ctx,
> +							   instr->variables[0]);
> +		LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
> +		return LLVMBuildBitCast(ctx->builder, val,
> +					get_def_type(ctx, &instr->dest.ssa),
> +					"");
> +	}
>   	case nir_var_shader_out:
>   		if (ctx->stage == MESA_SHADER_TESS_CTRL)
>   			return load_tcs_output(ctx, instr);
> @@ -2988,23 +3018,6 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>   			}
>   		}
>   		break;
> -	case nir_var_shared: {
> -		LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
> -		LLVMValueRef derived_ptr;
> -
> -		if (indir_index)
> -			indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
> -
> -		for (unsigned chan = 0; chan < ve; chan++) {
> -			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
> -			if (indir_index)
> -				index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
> -			derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
> -
> -			values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
> -		}
> -		break;
> -	}
>   	default:
>   		unreachable("unhandle variable mode");
>   	}
> @@ -3105,24 +3118,32 @@ visit_store_var(struct nir_to_llvm_context *ctx,
>   		}
>   		break;
>   	case nir_var_shared: {
> -		LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
> -
> -		if (indir_index)
> -			indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
> -
> -		for (unsigned chan = 0; chan < 8; chan++) {
> -			if (!(writemask & (1 << chan)))
> -				continue;
> -			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
> -			LLVMValueRef derived_ptr;
> -
> -			if (indir_index)
> -				index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
> -
> -			value = llvm_extract_elem(ctx, src, chan);
> -			derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
> -			LLVMBuildStore(ctx->builder,
> -			               to_integer(&ctx->ac, value), derived_ptr);
> +		int writemask = instr->const_index[0];
> +		LLVMValueRef address = build_gep_for_deref(ctx,
> +							   instr->variables[0]);
> +		LLVMValueRef val = get_src(ctx, instr->src[0]);
> +		unsigned components =
> +			glsl_get_vector_elements(
> +			   nir_deref_tail(&instr->variables[0]->deref)->type);
> +		if (writemask == (1 << components) - 1) {
> +			val = LLVMBuildBitCast(
> +			   ctx->builder, val,
> +			   LLVMGetElementType(LLVMTypeOf(address)), "");
> +			LLVMBuildStore(ctx->builder, val, address);
> +		} else {
> +			for (unsigned chan = 0; chan < 4; chan++) {
> +				if (!(writemask & (1 << chan)))
> +					continue;
> +				LLVMValueRef ptr =
> +					LLVMBuildStructGEP(ctx->builder,
> +							   address, chan, "");
> +				LLVMValueRef src = llvm_extract_elem(ctx, val,
> +								     chan);
> +				src = LLVMBuildBitCast(
> +				   ctx->builder, src,
> +				   LLVMGetElementType(LLVMTypeOf(ptr)), "");
> +				LLVMBuildStore(ctx->builder, src, ptr);
> +			}
>   		}
>   		break;
>   	}
> @@ -3604,9 +3625,8 @@ static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
>   				     const nir_intrinsic_instr *instr)
>   {
>   	LLVMValueRef ptr, result;
> -	int idx = instr->variables[0]->var->data.driver_location;
>   	LLVMValueRef src = get_src(ctx, instr->src[0]);
> -	ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
> +	ptr = build_gep_for_deref(ctx, instr->variables[0]);
>   
>   	if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
>   		LLVMValueRef src1 = get_src(ctx, instr->src[1]);
> @@ -5005,6 +5025,68 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
>   	ctx->output_mask |= mask_attribs;
>   }
>   
> +static LLVMTypeRef
> +glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx,
> +		       enum glsl_base_type type)
> +{
> +	switch (type) {
> +	case GLSL_TYPE_INT:
> +	case GLSL_TYPE_UINT:
> +	case GLSL_TYPE_BOOL:
> +	case GLSL_TYPE_SUBROUTINE:
> +		return ctx->i32;
> +	case GLSL_TYPE_FLOAT: /* TODO handle mediump */
> +		return ctx->f32;
> +	case GLSL_TYPE_INT64:
> +	case GLSL_TYPE_UINT64:
> +		return ctx->i64;
> +	case GLSL_TYPE_DOUBLE:
> +		return ctx->f64;
> +	default:
> +		unreachable("unknown GLSL type");
> +	}
> +}
> +
> +static LLVMTypeRef
> +glsl_to_llvm_type(struct nir_to_llvm_context *ctx,
> +		  const struct glsl_type *type)
> +{
> +	if (glsl_type_is_scalar(type)) {
> +		return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type));
> +	}
> +
> +	if (glsl_type_is_vector(type)) {
> +		return LLVMVectorType(
> +		   glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)),
> +		   glsl_get_vector_elements(type));
> +	}
> +
> +	if (glsl_type_is_matrix(type)) {
> +		return LLVMArrayType(
> +		   glsl_to_llvm_type(ctx, glsl_get_column_type(type)),
> +		   glsl_get_matrix_columns(type));
> +	}
> +
> +	if (glsl_type_is_array(type)) {
> +		return LLVMArrayType(
> +		   glsl_to_llvm_type(ctx, glsl_get_array_element(type)),
> +		   glsl_get_length(type));
> +	}
> +
> +	assert(glsl_type_is_struct(type));
> +
> +	LLVMTypeRef member_types[glsl_get_length(type)];
> +
> +	for (unsigned i = 0; i < glsl_get_length(type); i++) {
> +		member_types[i] =
> +			glsl_to_llvm_type(ctx,
> +					  glsl_get_struct_field(type, i));
> +	}
> +
> +	return LLVMStructTypeInContext(ctx->context, member_types,
> +				       glsl_get_length(type), false);
> +}
> +
>   static void
>   setup_locals(struct nir_to_llvm_context *ctx,
>   	     struct nir_function *func)
> @@ -5028,6 +5110,20 @@ setup_locals(struct nir_to_llvm_context *ctx,
>   	}
>   }
>   
> +static void
> +setup_shared(struct nir_to_llvm_context *ctx,
> +	     struct nir_shader *nir)
> +{
> +	nir_foreach_variable(variable, &nir->shared) {
> +		LLVMValueRef shared =
> +			LLVMAddGlobalInAddressSpace(
> +			   ctx->module, glsl_to_llvm_type(ctx, variable->type),
> +			   variable->name ? variable->name : "",
> +			   LOCAL_ADDR_SPACE);
> +		_mesa_hash_table_insert(ctx->vars, variable, shared);
> +	}
> +}
> +
>   static LLVMValueRef
>   emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
>   {
> @@ -5820,15 +5916,6 @@ handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
>   	}
>   }
>   
> -static void
> -handle_shared_compute_var(struct nir_to_llvm_context *ctx,
> -			  struct nir_variable *variable, uint32_t *offset, int idx)
> -{
> -	unsigned size = glsl_count_attribute_slots(variable->type, false);
> -	variable->data.driver_location = *offset;
> -	*offset += size;
> -}
> -
>   static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
>   {
>   	LLVMPassManagerRef passmgr;
> @@ -5985,29 +6072,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>   
>   	create_function(&ctx);
>   
> -	if (nir->stage == MESA_SHADER_COMPUTE) {
> -		int num_shared = 0;
> -		nir_foreach_variable(variable, &nir->shared)
> -			num_shared++;
> -		if (num_shared) {
> -			int idx = 0;
> -			uint32_t shared_size = 0;
> -			LLVMValueRef var;
> -			LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
> -			nir_foreach_variable(variable, &nir->shared) {
> -				handle_shared_compute_var(&ctx, variable, &shared_size, idx);
> -				idx++;
> -			}
> -
> -			shared_size *= 16;
> -			var = LLVMAddGlobalInAddressSpace(ctx.module,
> -							  LLVMArrayType(ctx.i8, shared_size),
> -							  "compute_lds",
> -							  LOCAL_ADDR_SPACE);
> -			LLVMSetAlignment(var, 4);
> -			ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
> -		}
> -	} else if (nir->stage == MESA_SHADER_GEOMETRY) {
> +	if (nir->stage == MESA_SHADER_GEOMETRY) {
>   		ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, "gs_next_vertex");
>   
>   		ctx.gs_max_out_vertices = nir->info.gs.vertices_out;
> @@ -6033,11 +6098,16 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>   	                                   _mesa_key_pointer_equal);
>   	ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>   	                                   _mesa_key_pointer_equal);
> +	ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
> +	                                     _mesa_key_pointer_equal);
>   
>   	func = (struct nir_function *)exec_list_get_head(&nir->functions);
>   
>   	setup_locals(&ctx, func);
>   
> +	if (nir->stage == MESA_SHADER_COMPUTE)
> +		setup_shared(&ctx, nir);
> +
>   	visit_cf_list(&ctx, &func->impl->body);
>   	phi_post_pass(&ctx);
>   
> @@ -6050,6 +6120,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>   	free(ctx.locals);
>   	ralloc_free(ctx.defs);
>   	ralloc_free(ctx.phis);
> +	ralloc_free(ctx.vars);
>   
>   	if (nir->stage == MESA_SHADER_GEOMETRY) {
>   		unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4;
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list