[Mesa-dev] [PATCH 08/18] ac: move some helpers to ac_llvm_build.c

Tue Dec 12 09:50:52 UTC 2017

On 11.12.2017 03:43, Timothy Arceri wrote:
> We will call these from the radeonsi NIR backend.
> ---
>   src/amd/common/ac_llvm_build.c  | 24 +++++++++++++++++
>   src/amd/common/ac_llvm_build.h  |  8 ++++++
>   src/amd/common/ac_nir_to_llvm.c | 58 +++++++++++++----------------------------
>   3 files changed, 50 insertions(+), 40 deletions(-)
> 
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index b2bf1bf7b51..faa08b6301c 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -91,20 +91,44 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
>   
>   	args[0] = LLVMConstReal(ctx->f32, 2.5);
>   	ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
>   
>   	ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
>   							"amdgpu.uniform", 14);
>   
>   	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
>   }
>   
> +int
> +ac_get_llvm_num_components(LLVMValueRef value)
> +{
> +	LLVMTypeRef type = LLVMTypeOf(value);
> +	unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
> +	                              ? LLVMGetVectorSize(type)
> +	                              : 1;
> +	return num_components;
> +}
> +
> +LLVMValueRef
> +ac_llvm_extract_elem(struct ac_llvm_context *ac,
> +		     LLVMValueRef value,
> +		     int index)
> +{
> +	int count = ac_get_llvm_num_components(value);
> +
> +	if (count == 1)
> +		return value;

I realize you're just moving code around here, but I think it'd be more 
logical if this were

    if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
       assert(index == 0);
       return value;
    }

Cheers,
Nicolai


> +
> +	return LLVMBuildExtractElement(ac->builder, value,
> +				       LLVMConstInt(ac->i32, index, false), "");
> +}
> +
>   unsigned
>   ac_get_type_size(LLVMTypeRef type)
>   {
>   	LLVMTypeKind kind = LLVMGetTypeKind(type);
>   
>   	switch (kind) {
>   	case LLVMIntegerTypeKind:
>   		return LLVMGetIntTypeWidth(type) / 8;
>   	case LLVMFloatTypeKind:
>   		return 4;
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 655dc1dcc86..c14b0d9f019 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -75,20 +75,28 @@ struct ac_llvm_context {
>   
>   	enum chip_class chip_class;
>   
>   	LLVMValueRef lds;
>   };
>   
>   void
>   ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
>   		     enum chip_class chip_class);
>   
> +int
> +ac_get_llvm_num_components(LLVMValueRef value);
> +
> +LLVMValueRef
> +ac_llvm_extract_elem(struct ac_llvm_context *ac,
> +		     LLVMValueRef value,
> +		     int index);
> +
>   unsigned ac_get_type_size(LLVMTypeRef type);
>   
>   LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
>   LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
>   LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
>   LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
>   
>   LLVMValueRef
>   ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
>   		   LLVMTypeRef return_type, LLVMValueRef *params,
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index ad2c2336ee8..28edf819c58 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -995,46 +995,24 @@ static void create_function(struct nir_to_llvm_context *ctx,
>   			set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, &user_sgpr_idx, 1);
>   		}
>   		break;
>   	default:
>   		unreachable("Shader stage not implemented");
>   	}
>   
>   	ctx->shader_info->num_user_sgprs = user_sgpr_idx;
>   }
>   
> -static int get_llvm_num_components(LLVMValueRef value)
> -{
> -	LLVMTypeRef type = LLVMTypeOf(value);
> -	unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
> -	                              ? LLVMGetVectorSize(type)
> -	                              : 1;
> -	return num_components;
> -}
> -
> -static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac,
> -				      LLVMValueRef value,
> -				      int index)
> -{
> -	int count = get_llvm_num_components(value);
> -
> -	if (count == 1)
> -		return value;
> -
> -	return LLVMBuildExtractElement(ac->builder, value,
> -				       LLVMConstInt(ac->i32, index, false), "");
> -}
> -
>   static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
>                                   LLVMValueRef value, unsigned count)
>   {
> -	unsigned num_components = get_llvm_num_components(value);
> +	unsigned num_components = ac_get_llvm_num_components(value);
>   	if (count == num_components)
>   		return value;
>   
>   	LLVMValueRef masks[] = {
>   	    LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
>   	    LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
>   
>   	if (count == 1)
>   		return LLVMBuildExtractElement(ctx->builder, value, masks[0],
>   		                               "");
> @@ -2309,21 +2287,21 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
>   						      ctx->ac.i32_0, "");
>   
>   			tmp = LLVMBuildExtractElement(ctx->ac.builder,
>   						      base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
>   			data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
>   						      ctx->ac.i32_1, "");
>   			store_name = "llvm.amdgcn.buffer.store.v2f32";
>   
>   		} else {
>   			assert(count == 1);
> -			if (get_llvm_num_components(base_data) > 1)
> +			if (ac_get_llvm_num_components(base_data) > 1)
>   				data = LLVMBuildExtractElement(ctx->ac.builder, base_data,
>   							       LLVMConstInt(ctx->ac.i32, start, false), "");
>   			else
>   				data = base_data;
>   			store_name = "llvm.amdgcn.buffer.store.f32";
>   		}
>   
>   		offset = base_offset;
>   		if (start != 0) {
>   			offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), "");
> @@ -2336,23 +2314,23 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
>   }
>   
>   static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
>                                         const nir_intrinsic_instr *instr)
>   {
>   	const char *name;
>   	LLVMValueRef params[6];
>   	int arg_count = 0;
>   
>   	if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
> -		params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
> +		params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
>   	}
> -	params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
> +	params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
>   	params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
>   						 get_src(ctx, instr->src[0]),
>   						 true);
>   	params[arg_count++] = LLVMConstInt(ctx->ac.i32, 0, false); /* vindex */
>   	params[arg_count++] = get_src(ctx, instr->src[1]);      /* voffset */
>   	params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false);  /* slc */
>   
>   	switch (instr->intrinsic) {
>   	case nir_intrinsic_ssbo_atomic_add:
>   		name = "llvm.amdgcn.buffer.atomic.add";
> @@ -2816,21 +2794,21 @@ store_tcs_output(struct ac_shader_abi *abi,
>   
>   	bool is_tess_factor = false;
>   	if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
>   	    location == VARYING_SLOT_TESS_LEVEL_OUTER)
>   		is_tess_factor = true;
>   
>   	unsigned base = is_compact ? const_index : 0;
>   	for (unsigned chan = 0; chan < 8; chan++) {
>   		if (!(writemask & (1 << chan)))
>   			continue;
> -		LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - component);
> +		LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
>   
>   		if (store_lds || is_tess_factor)
>   			ac_lds_store(&ctx->ac, dw_addr, value);
>   
>   		if (!is_tess_factor && writemask != 0xF)
>   			ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
>   						    buf_addr, ctx->oc_lds,
>   						    4 * (base + chan), 1, 0, true, false);
>   
>   		dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
> @@ -3112,21 +3090,21 @@ visit_store_var(struct ac_nir_context *ctx,
>   	int writemask = instr->const_index[0] << comp;
>   	LLVMValueRef indir_index;
>   	unsigned const_index;
>   	get_deref_offset(ctx, instr->variables[0], false,
>   		         NULL, NULL, &const_index, &indir_index);
>   
>   	if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
>   		int old_writemask = writemask;
>   
>   		src = LLVMBuildBitCast(ctx->ac.builder, src,
> -		                       LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2),
> +		                       LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
>   		                       "");
>   
>   		writemask = 0;
>   		for (unsigned chan = 0; chan < 4; chan++) {
>   			if (old_writemask & (1 << chan))
>   				writemask |= 3u << (2 * chan);
>   		}
>   	}
>   
>   	switch (instr->variables[0]->var->data.mode) {
> @@ -3150,21 +3128,21 @@ visit_store_var(struct ac_nir_context *ctx,
>   						    const_index, location, driver_location,
>   						    src, comp, is_patch, is_compact, writemask);
>   			return;
>   		}
>   
>   		for (unsigned chan = 0; chan < 8; chan++) {
>   			int stride = 4;
>   			if (!(writemask & (1 << chan)))
>   				continue;
>   
> -			value = llvm_extract_elem(&ctx->ac, src, chan - comp);
> +			value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
>   
>   			if (instr->variables[0]->var->data.compact)
>   				stride = 1;
>   			if (indir_index) {
>   				unsigned count = glsl_count_attribute_slots(
>   						instr->variables[0]->var->type, false);
>   				count -= chan / 4;
>   				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>   						&ctx->ac, ctx->outputs + idx + chan, count,
>   						stride, true, true);
> @@ -3179,21 +3157,21 @@ visit_store_var(struct ac_nir_context *ctx,
>   
>   				LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
>   			}
>   		}
>   		break;
>   	case nir_var_local:
>   		for (unsigned chan = 0; chan < 8; chan++) {
>   			if (!(writemask & (1 << chan)))
>   				continue;
>   
> -			value = llvm_extract_elem(&ctx->ac, src, chan);
> +			value = ac_llvm_extract_elem(&ctx->ac, src, chan);
>   			if (indir_index) {
>   				unsigned count = glsl_count_attribute_slots(
>   					instr->variables[0]->var->type, false);
>   				count -= chan / 4;
>   				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>   					&ctx->ac, ctx->locals + idx + chan, count,
>   					4, true, true);
>   
>   				tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
>   								 value, indir_index, "");
> @@ -3219,22 +3197,22 @@ visit_store_var(struct ac_nir_context *ctx,
>   			   ctx->ac.builder, val,
>   			   LLVMGetElementType(LLVMTypeOf(address)), "");
>   			LLVMBuildStore(ctx->ac.builder, val, address);
>   		} else {
>   			for (unsigned chan = 0; chan < 4; chan++) {
>   				if (!(writemask & (1 << chan)))
>   					continue;
>   				LLVMValueRef ptr =
>   					LLVMBuildStructGEP(ctx->ac.builder,
>   							   address, chan, "");
> -				LLVMValueRef src = llvm_extract_elem(&ctx->ac, val,
> -								     chan);
> +				LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
> +									chan);
>   				src = LLVMBuildBitCast(
>   				   ctx->ac.builder, src,
>   				   LLVMGetElementType(LLVMTypeOf(ptr)), "");
>   				LLVMBuildStore(ctx->ac.builder, src, ptr);
>   			}
>   		}
>   		break;
>   	}
>   	default:
>   		break;
> @@ -3352,21 +3330,21 @@ static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
>   	if(instr->variables[0]->deref.child)
>   		type = instr->variables[0]->deref.child->type;
>   
>   	LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>   	LLVMValueRef coords[4];
>   	LLVMValueRef masks[] = {
>   		LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
>   		LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
>   	};
>   	LLVMValueRef res;
> -	LLVMValueRef sample_index = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0);
> +	LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0);
>   
>   	int count;
>   	enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
>   	bool is_array = glsl_sampler_type_is_array(type);
>   	bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
>   			     dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
>   	bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
>   		      dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
>   	bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
>   	count = image_type_to_components_count(dim, is_array);
> @@ -3399,21 +3377,21 @@ static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
>   	if (count == 1 && !gfx9_1d) {
>   		if (instr->src[0].ssa->num_components)
>   			res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
>   		else
>   			res = src0;
>   	} else {
>   		int chan;
>   		if (is_ms)
>   			count--;
>   		for (chan = 0; chan < count; ++chan) {
> -			coords[chan] = llvm_extract_elem(&ctx->ac, src0, chan);
> +			coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
>   		}
>   		if (add_frag_pos) {
>   			for (chan = 0; chan < 2; ++chan)
>   				coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
>   						ctx->ac.i32, ""), "");
>   			coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
>   			count++;
>   		}
>   
>   		if (gfx9_1d) {
> @@ -4590,50 +4568,50 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
>   		samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
>   				       samples, "");
>   		samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
>   					  ctx->ac.i32_1, "");
>   		result = samples;
>   		goto write_result;
>   	}
>   
>   	if (coord)
>   		for (chan = 0; chan < instr->coord_components; chan++)
> -			coords[chan] = llvm_extract_elem(&ctx->ac, coord, chan);
> +			coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
>   
>   	if (offsets && instr->op != nir_texop_txf) {
>   		LLVMValueRef offset[3], pack;
>   		for (chan = 0; chan < 3; ++chan)
>   			offset[chan] = ctx->ac.i32_0;
>   
>   		args.offset = true;
> -		for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
> -			offset[chan] = llvm_extract_elem(&ctx->ac, offsets, chan);
> +		for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) {
> +			offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan);
>   			offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
>   						    LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
>   			if (chan)
>   				offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
>   							    LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
>   		}
>   		pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
>   		pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
>   		address[count++] = pack;
>   
>   	}
>   	/* pack LOD bias value */
>   	if (instr->op == nir_texop_txb && bias) {
>   		address[count++] = bias;
>   	}
>   
>   	/* Pack depth comparison value */
>   	if (instr->is_shadow && comparator) {
>   		LLVMValueRef z = ac_to_float(&ctx->ac,
> -		                             llvm_extract_elem(&ctx->ac, comparator, 0));
> +		                             ac_llvm_extract_elem(&ctx->ac, comparator, 0));
>   
>   		/* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
>   		 * so the depth comparison value isn't clamped for Z16 and
>   		 * Z24 anymore. Do it manually here.
>   		 *
>   		 * It's unnecessary if the original texture format was
>   		 * Z32_FLOAT, but we don't know that here.
>   		 */
>   		if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
>   			z = ac_build_clamp(&ctx->ac, z);
> @@ -4663,22 +4641,22 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
>   				num_dest_deriv_channels = 2;
>   				num_deriv_comp = 2;
>   			} else {
>   				num_dest_deriv_channels = 1;
>   				num_deriv_comp = 1;
>   			}
>   			break;
>   		}
>   
>   		for (unsigned i = 0; i < num_src_deriv_channels; i++) {
> -			derivs[i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddx, i));
> -			derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddy, i));
> +			derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
> +			derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
>   		}
>   		for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
>   			derivs[i] = ctx->ac.f32_0;
>   			derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
>   		}
>   	}
>   
>   	if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
>   		for (chan = 0; chan < instr->coord_components; chan++)
>   			coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.