[Mesa-dev] [PATCH v2 07/20] ac: move some helpers to ac_llvm_build.c

Fri Dec 15 04:34:02 UTC 2017

This one do not apply anylonger after Samuel's commit
amd/common: add ac_build_waitcnt()
#225b19880204024a805cc54b1001d09ef3b58054

For your motivation:
I've tested V1 and V2 of the whole series (before the latest master 
commits) and could ran _all_ my 'normal' stuff.

Even UH run with GREAT tess speed without any hick ups.

GREAT stuff!

V1 + V2 have my tb even before Nicolai formulated his comments.

BTW There are many commits waiting...;-)

Cheers,
Dieter

Am 13.12.2017 08:52, schrieb Timothy Arceri:
> We will call these from the radeonsi NIR backend.
> 
> Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> ---
>  src/amd/common/ac_llvm_build.c  | 24 +++++++++++++++++
>  src/amd/common/ac_llvm_build.h  |  8 ++++++
>  src/amd/common/ac_nir_to_llvm.c | 58 
> +++++++++++++----------------------------
>  3 files changed, 50 insertions(+), 40 deletions(-)
> 
> diff --git a/src/amd/common/ac_llvm_build.c 
> b/src/amd/common/ac_llvm_build.c
> index b2bf1bf7b51..faa08b6301c 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -91,20 +91,44 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
> LLVMContextRef context,
> 
>  	args[0] = LLVMConstReal(ctx->f32, 2.5);
>  	ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
> 
>  	ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
>  							"amdgpu.uniform", 14);
> 
>  	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
>  }
> 
> +int
> +ac_get_llvm_num_components(LLVMValueRef value)
> +{
> +	LLVMTypeRef type = LLVMTypeOf(value);
> +	unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
> +	                              ? LLVMGetVectorSize(type)
> +	                              : 1;
> +	return num_components;
> +}
> +
> +LLVMValueRef
> +ac_llvm_extract_elem(struct ac_llvm_context *ac,
> +		     LLVMValueRef value,
> +		     int index)
> +{
> +	int count = ac_get_llvm_num_components(value);
> +
> +	if (count == 1)
> +		return value;
> +
> +	return LLVMBuildExtractElement(ac->builder, value,
> +				       LLVMConstInt(ac->i32, index, false), "");
> +}
> +
>  unsigned
>  ac_get_type_size(LLVMTypeRef type)
>  {
>  	LLVMTypeKind kind = LLVMGetTypeKind(type);
> 
>  	switch (kind) {
>  	case LLVMIntegerTypeKind:
>  		return LLVMGetIntTypeWidth(type) / 8;
>  	case LLVMFloatTypeKind:
>  		return 4;
> diff --git a/src/amd/common/ac_llvm_build.h 
> b/src/amd/common/ac_llvm_build.h
> index 655dc1dcc86..c14b0d9f019 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -75,20 +75,28 @@ struct ac_llvm_context {
> 
>  	enum chip_class chip_class;
> 
>  	LLVMValueRef lds;
>  };
> 
>  void
>  ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef 
> context,
>  		     enum chip_class chip_class);
> 
> +int
> +ac_get_llvm_num_components(LLVMValueRef value);
> +
> +LLVMValueRef
> +ac_llvm_extract_elem(struct ac_llvm_context *ac,
> +		     LLVMValueRef value,
> +		     int index);
> +
>  unsigned ac_get_type_size(LLVMTypeRef type);
> 
>  LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, 
> LLVMTypeRef t);
>  LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef 
> v);
>  LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef 
> t);
>  LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
> 
>  LLVMValueRef
>  ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
>  		   LLVMTypeRef return_type, LLVMValueRef *params,
> diff --git a/src/amd/common/ac_nir_to_llvm.c 
> b/src/amd/common/ac_nir_to_llvm.c
> index 6f84604d54a..6060df75314 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -995,46 +995,24 @@ static void create_function(struct
> nir_to_llvm_context *ctx,
>  			set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET,
> &user_sgpr_idx, 1);
>  		}
>  		break;
>  	default:
>  		unreachable("Shader stage not implemented");
>  	}
> 
>  	ctx->shader_info->num_user_sgprs = user_sgpr_idx;
>  }
> 
> -static int get_llvm_num_components(LLVMValueRef value)
> -{
> -	LLVMTypeRef type = LLVMTypeOf(value);
> -	unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
> -	                              ? LLVMGetVectorSize(type)
> -	                              : 1;
> -	return num_components;
> -}
> -
> -static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac,
> -				      LLVMValueRef value,
> -				      int index)
> -{
> -	int count = get_llvm_num_components(value);
> -
> -	if (count == 1)
> -		return value;
> -
> -	return LLVMBuildExtractElement(ac->builder, value,
> -				       LLVMConstInt(ac->i32, index, false), "");
> -}
> -
>  static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
>                                  LLVMValueRef value, unsigned count)
>  {
> -	unsigned num_components = get_llvm_num_components(value);
> +	unsigned num_components = ac_get_llvm_num_components(value);
>  	if (count == num_components)
>  		return value;
> 
>  	LLVMValueRef masks[] = {
>  	    LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, 
> false),
>  	    LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, 
> false)};
> 
>  	if (count == 1)
>  		return LLVMBuildExtractElement(ctx->builder, value, masks[0],
>  		                               "");
> @@ -2321,21 +2299,21 @@ static void visit_store_ssbo(struct 
> ac_nir_context *ctx,
>  						      ctx->ac.i32_0, "");
> 
>  			tmp = LLVMBuildExtractElement(ctx->ac.builder,
>  						      base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), 
> "");
>  			data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
>  						      ctx->ac.i32_1, "");
>  			store_name = "llvm.amdgcn.buffer.store.v2f32";
> 
>  		} else {
>  			assert(count == 1);
> -			if (get_llvm_num_components(base_data) > 1)
> +			if (ac_get_llvm_num_components(base_data) > 1)
>  				data = LLVMBuildExtractElement(ctx->ac.builder, base_data,
>  							       LLVMConstInt(ctx->ac.i32, start, false), "");
>  			else
>  				data = base_data;
>  			store_name = "llvm.amdgcn.buffer.store.f32";
>  		}
> 
>  		offset = base_offset;
>  		if (start != 0) {
>  			offset = LLVMBuildAdd(ctx->ac.builder, offset,
> LLVMConstInt(ctx->ac.i32, start * 4, false), "");
> @@ -2348,23 +2326,23 @@ static void visit_store_ssbo(struct 
> ac_nir_context *ctx,
>  }
> 
>  static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
>                                        const nir_intrinsic_instr 
> *instr)
>  {
>  	const char *name;
>  	LLVMValueRef params[6];
>  	int arg_count = 0;
> 
>  	if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
> -		params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[3]), 0);
> +		params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[3]), 0);
>  	}
> -	params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[2]), 0);
> +	params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[2]), 0);
>  	params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
>  						 get_src(ctx, instr->src[0]),
>  						 true);
>  	params[arg_count++] = LLVMConstInt(ctx->ac.i32, 0, false); /* vindex 
> */
>  	params[arg_count++] = get_src(ctx, instr->src[1]);      /* voffset */
>  	params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false);  /* slc */
> 
>  	switch (instr->intrinsic) {
>  	case nir_intrinsic_ssbo_atomic_add:
>  		name = "llvm.amdgcn.buffer.atomic.add";
> @@ -2827,21 +2805,21 @@ store_tcs_output(struct ac_shader_abi *abi,
> 
>  	bool is_tess_factor = false;
>  	if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
>  	    location == VARYING_SLOT_TESS_LEVEL_OUTER)
>  		is_tess_factor = true;
> 
>  	unsigned base = is_compact ? const_index : 0;
>  	for (unsigned chan = 0; chan < 8; chan++) {
>  		if (!(writemask & (1 << chan)))
>  			continue;
> -		LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - 
> component);
> +		LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - 
> component);
> 
>  		if (store_lds || is_tess_factor)
>  			ac_lds_store(&ctx->ac, dw_addr, value);
> 
>  		if (!is_tess_factor && writemask != 0xF)
>  			ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, 
> value, 1,
>  						    buf_addr, ctx->oc_lds,
>  						    4 * (base + chan), 1, 0, true, false);
> 
>  		dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
> @@ -3119,21 +3097,21 @@ visit_store_var(struct ac_nir_context *ctx,
>  	int writemask = instr->const_index[0] << comp;
>  	LLVMValueRef indir_index;
>  	unsigned const_index;
>  	get_deref_offset(ctx, instr->variables[0], false,
>  		         NULL, NULL, &const_index, &indir_index);
> 
>  	if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
>  		int old_writemask = writemask;
> 
>  		src = LLVMBuildBitCast(ctx->ac.builder, src,
> -		                       LLVMVectorType(ctx->ac.f32,
> get_llvm_num_components(src) * 2),
> +		                       LLVMVectorType(ctx->ac.f32,
> ac_get_llvm_num_components(src) * 2),
>  		                       "");
> 
>  		writemask = 0;
>  		for (unsigned chan = 0; chan < 4; chan++) {
>  			if (old_writemask & (1 << chan))
>  				writemask |= 3u << (2 * chan);
>  		}
>  	}
> 
>  	switch (instr->variables[0]->var->data.mode) {
> @@ -3157,21 +3135,21 @@ visit_store_var(struct ac_nir_context *ctx,
>  						    const_index, location, driver_location,
>  						    src, comp, is_patch, is_compact, writemask);
>  			return;
>  		}
> 
>  		for (unsigned chan = 0; chan < 8; chan++) {
>  			int stride = 4;
>  			if (!(writemask & (1 << chan)))
>  				continue;
> 
> -			value = llvm_extract_elem(&ctx->ac, src, chan - comp);
> +			value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
> 
>  			if (instr->variables[0]->var->data.compact)
>  				stride = 1;
>  			if (indir_index) {
>  				unsigned count = glsl_count_attribute_slots(
>  						instr->variables[0]->var->type, false);
>  				count -= chan / 4;
>  				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>  						&ctx->ac, ctx->outputs + idx + chan, count,
>  						stride, true, true);
> @@ -3186,21 +3164,21 @@ visit_store_var(struct ac_nir_context *ctx,
> 
>  				LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
>  			}
>  		}
>  		break;
>  	case nir_var_local:
>  		for (unsigned chan = 0; chan < 8; chan++) {
>  			if (!(writemask & (1 << chan)))
>  				continue;
> 
> -			value = llvm_extract_elem(&ctx->ac, src, chan);
> +			value = ac_llvm_extract_elem(&ctx->ac, src, chan);
>  			if (indir_index) {
>  				unsigned count = glsl_count_attribute_slots(
>  					instr->variables[0]->var->type, false);
>  				count -= chan / 4;
>  				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>  					&ctx->ac, ctx->locals + idx + chan, count,
>  					4, true, true);
> 
>  				tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
>  								 value, indir_index, "");
> @@ -3226,22 +3204,22 @@ visit_store_var(struct ac_nir_context *ctx,
>  			   ctx->ac.builder, val,
>  			   LLVMGetElementType(LLVMTypeOf(address)), "");
>  			LLVMBuildStore(ctx->ac.builder, val, address);
>  		} else {
>  			for (unsigned chan = 0; chan < 4; chan++) {
>  				if (!(writemask & (1 << chan)))
>  					continue;
>  				LLVMValueRef ptr =
>  					LLVMBuildStructGEP(ctx->ac.builder,
>  							   address, chan, "");
> -				LLVMValueRef src = llvm_extract_elem(&ctx->ac, val,
> -								     chan);
> +				LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
> +									chan);
>  				src = LLVMBuildBitCast(
>  				   ctx->ac.builder, src,
>  				   LLVMGetElementType(LLVMTypeOf(ptr)), "");
>  				LLVMBuildStore(ctx->ac.builder, src, ptr);
>  			}
>  		}
>  		break;
>  	}
>  	default:
>  		break;
> @@ -3359,21 +3337,21 @@ static LLVMValueRef get_image_coords(struct
> ac_nir_context *ctx,
>  	if(instr->variables[0]->deref.child)
>  		type = instr->variables[0]->deref.child->type;
> 
>  	LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>  	LLVMValueRef coords[4];
>  	LLVMValueRef masks[] = {
>  		LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, 
> false),
>  		LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, 
> false),
>  	};
>  	LLVMValueRef res;
> -	LLVMValueRef sample_index = llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[1]), 0);
> +	LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac,
> get_src(ctx, instr->src[1]), 0);
> 
>  	int count;
>  	enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
>  	bool is_array = glsl_sampler_type_is_array(type);
>  	bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
>  			     dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
>  	bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
>  		      dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
>  	bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == 
> GLSL_SAMPLER_DIM_1D;
>  	count = image_type_to_components_count(dim, is_array);
> @@ -3406,21 +3384,21 @@ static LLVMValueRef get_image_coords(struct
> ac_nir_context *ctx,
>  	if (count == 1 && !gfx9_1d) {
>  		if (instr->src[0].ssa->num_components)
>  			res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
>  		else
>  			res = src0;
>  	} else {
>  		int chan;
>  		if (is_ms)
>  			count--;
>  		for (chan = 0; chan < count; ++chan) {
> -			coords[chan] = llvm_extract_elem(&ctx->ac, src0, chan);
> +			coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
>  		}
>  		if (add_frag_pos) {
>  			for (chan = 0; chan < 2; ++chan)
>  				coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan],
> LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
>  						ctx->ac.i32, ""), "");
>  			coords[2] = ac_to_integer(&ctx->ac,
> ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
>  			count++;
>  		}
> 
>  		if (gfx9_1d) {
> @@ -4607,50 +4585,50 @@ static void visit_tex(struct ac_nir_context
> *ctx, nir_tex_instr *instr)
>  		samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
>  				       samples, "");
>  		samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
>  					  ctx->ac.i32_1, "");
>  		result = samples;
>  		goto write_result;
>  	}
> 
>  	if (coord)
>  		for (chan = 0; chan < instr->coord_components; chan++)
> -			coords[chan] = llvm_extract_elem(&ctx->ac, coord, chan);
> +			coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
> 
>  	if (offsets && instr->op != nir_texop_txf) {
>  		LLVMValueRef offset[3], pack;
>  		for (chan = 0; chan < 3; ++chan)
>  			offset[chan] = ctx->ac.i32_0;
> 
>  		args.offset = true;
> -		for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
> -			offset[chan] = llvm_extract_elem(&ctx->ac, offsets, chan);
> +		for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) {
> +			offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan);
>  			offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
>  						    LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
>  			if (chan)
>  				offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
>  							    LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
>  		}
>  		pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
>  		pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
>  		address[count++] = pack;
> 
>  	}
>  	/* pack LOD bias value */
>  	if (instr->op == nir_texop_txb && bias) {
>  		address[count++] = bias;
>  	}
> 
>  	/* Pack depth comparison value */
>  	if (instr->is_shadow && comparator) {
>  		LLVMValueRef z = ac_to_float(&ctx->ac,
> -		                             llvm_extract_elem(&ctx->ac, comparator, 
> 0));
> +		                             ac_llvm_extract_elem(&ctx->ac, 
> comparator, 0));
> 
>  		/* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to 
> Z32_FLOAT,
>  		 * so the depth comparison value isn't clamped for Z16 and
>  		 * Z24 anymore. Do it manually here.
>  		 *
>  		 * It's unnecessary if the original texture format was
>  		 * Z32_FLOAT, but we don't know that here.
>  		 */
>  		if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
>  			z = ac_build_clamp(&ctx->ac, z);
> @@ -4680,22 +4658,22 @@ static void visit_tex(struct ac_nir_context
> *ctx, nir_tex_instr *instr)
>  				num_dest_deriv_channels = 2;
>  				num_deriv_comp = 2;
>  			} else {
>  				num_dest_deriv_channels = 1;
>  				num_deriv_comp = 1;
>  			}
>  			break;
>  		}
> 
>  		for (unsigned i = 0; i < num_src_deriv_channels; i++) {
> -			derivs[i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddx, 
> i));
> -			derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
> llvm_extract_elem(&ctx->ac, ddy, i));
> +			derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, 
> ddx, i));
> +			derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
> ac_llvm_extract_elem(&ctx->ac, ddy, i));
>  		}
>  		for (unsigned i = num_src_deriv_channels; i < 
> num_dest_deriv_channels; i++) {
>  			derivs[i] = ctx->ac.f32_0;
>  			derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
>  		}
>  	}
> 
>  	if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
>  		for (chan = 0; chan < instr->coord_components; chan++)
>  			coords[chan] = ac_to_float(&ctx->ac, coords[chan]);