[Mesa-dev] [PATCH v2 07/20] ac: move some helpers to ac_llvm_build.c
Dieter Nützel
Dieter at nuetzel-hh.de
Fri Dec 15 04:34:02 UTC 2017
This one do not apply anylonger after Samuel's commit
amd/common: add ac_build_waitcnt()
#225b19880204024a805cc54b1001d09ef3b58054
For your motivation:
I've tested V1 and V2 of the whole series (before the latest master
commits) and could ran _all_ my 'normal' stuff.
Even UH run with GREAT tess speed without any hick ups.
GREAT stuff!
V1 + V2 have my tb even before Nicolai formulated his comments.
BTW There are many commits waiting...;-)
Cheers,
Dieter
Am 13.12.2017 08:52, schrieb Timothy Arceri:
> We will call these from the radeonsi NIR backend.
>
> Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> ---
> src/amd/common/ac_llvm_build.c | 24 +++++++++++++++++
> src/amd/common/ac_llvm_build.h | 8 ++++++
> src/amd/common/ac_nir_to_llvm.c | 58
> +++++++++++++----------------------------
> 3 files changed, 50 insertions(+), 40 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c
> b/src/amd/common/ac_llvm_build.c
> index b2bf1bf7b51..faa08b6301c 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -91,20 +91,44 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
> LLVMContextRef context,
>
> args[0] = LLVMConstReal(ctx->f32, 2.5);
> ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
>
> ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
> "amdgpu.uniform", 14);
>
> ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
> }
>
> +int
> +ac_get_llvm_num_components(LLVMValueRef value)
> +{
> + LLVMTypeRef type = LLVMTypeOf(value);
> + unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
> + ? LLVMGetVectorSize(type)
> + : 1;
> + return num_components;
> +}
> +
> +LLVMValueRef
> +ac_llvm_extract_elem(struct ac_llvm_context *ac,
> + LLVMValueRef value,
> + int index)
> +{
> + int count = ac_get_llvm_num_components(value);
> +
> + if (count == 1)
> + return value;
> +
> + return LLVMBuildExtractElement(ac->builder, value,
> + LLVMConstInt(ac->i32, index, false), "");
> +}
> +
> unsigned
> ac_get_type_size(LLVMTypeRef type)
> {
> LLVMTypeKind kind = LLVMGetTypeKind(type);
>
> switch (kind) {
> case LLVMIntegerTypeKind:
> return LLVMGetIntTypeWidth(type) / 8;
> case LLVMFloatTypeKind:
> return 4;
> diff --git a/src/amd/common/ac_llvm_build.h
> b/src/amd/common/ac_llvm_build.h
> index 655dc1dcc86..c14b0d9f019 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -75,20 +75,28 @@ struct ac_llvm_context {
>
> enum chip_class chip_class;
>
> LLVMValueRef lds;
> };
>
> void
> ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef
> context,
> enum chip_class chip_class);
>
> +int
> +ac_get_llvm_num_components(LLVMValueRef value);
> +
> +LLVMValueRef
> +ac_llvm_extract_elem(struct ac_llvm_context *ac,
> + LLVMValueRef value,
> + int index);
> +
> unsigned ac_get_type_size(LLVMTypeRef type);
>
> LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx,
> LLVMTypeRef t);
> LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef
> v);
> LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef
> t);
> LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
>
> LLVMValueRef
> ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
> LLVMTypeRef return_type, LLVMValueRef *params,
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index 6f84604d54a..6060df75314 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -995,46 +995,24 @@ static void create_function(struct
> nir_to_llvm_context *ctx,
> set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET,
> &user_sgpr_idx, 1);
> }
> break;
> default:
> unreachable("Shader stage not implemented");
> }
>
> ctx->shader_info->num_user_sgprs = user_sgpr_idx;
> }
>
> -static int get_llvm_num_components(LLVMValueRef value)
> -{
> - LLVMTypeRef type = LLVMTypeOf(value);
> - unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
> - ? LLVMGetVectorSize(type)
> - : 1;
> - return num_components;
> -}
> -
> -static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac,
> - LLVMValueRef value,
> - int index)
> -{
> - int count = get_llvm_num_components(value);
> -
> - if (count == 1)
> - return value;
> -
> - return LLVMBuildExtractElement(ac->builder, value,
> - LLVMConstInt(ac->i32, index, false), "");
> -}
> -
> static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
> LLVMValueRef value, unsigned count)
> {
> - unsigned num_components = get_llvm_num_components(value);
> + unsigned num_components = ac_get_llvm_num_components(value);
> if (count == num_components)
> return value;
>
> LLVMValueRef masks[] = {
> LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1,
> false),
> LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3,
> false)};
>
> if (count == 1)
> return LLVMBuildExtractElement(ctx->builder, value, masks[0],
> "");
> @@ -2321,21 +2299,21 @@ static void visit_store_ssbo(struct
> ac_nir_context *ctx,
> ctx->ac.i32_0, "");
>
> tmp = LLVMBuildExtractElement(ctx->ac.builder,
> base_data, LLVMConstInt(ctx->ac.i32, start + 1, false),
> "");
> data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
> ctx->ac.i32_1, "");
> store_name = "llvm.amdgcn.buffer.store.v2f32";
>
> } else {
> assert(count == 1);
> - if (get_llvm_num_components(base_data) > 1)
> + if (ac_get_llvm_num_components(base_data) > 1)
> data = LLVMBuildExtractElement(ctx->ac.builder, base_data,
> LLVMConstInt(ctx->ac.i32, start, false), "");
> else
> data = base_data;
> store_name = "llvm.amdgcn.buffer.store.f32";
> }
>
> offset = base_offset;
> if (start != 0) {
> offset = LLVMBuildAdd(ctx->ac.builder, offset,
> LLVMConstInt(ctx->ac.i32, start * 4, false), "");
> @@ -2348,23 +2326,23 @@ static void visit_store_ssbo(struct
> ac_nir_context *ctx,
> }
>
> static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
> const nir_intrinsic_instr
> *instr)
> {
> const char *name;
> LLVMValueRef params[6];
> int arg_count = 0;
>
> if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
> - params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[3]), 0);
> + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[3]), 0);
> }
> - params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[2]), 0);
> + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[2]), 0);
> params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
> get_src(ctx, instr->src[0]),
> true);
> params[arg_count++] = LLVMConstInt(ctx->ac.i32, 0, false); /* vindex
> */
> params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
> params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */
>
> switch (instr->intrinsic) {
> case nir_intrinsic_ssbo_atomic_add:
> name = "llvm.amdgcn.buffer.atomic.add";
> @@ -2827,21 +2805,21 @@ store_tcs_output(struct ac_shader_abi *abi,
>
> bool is_tess_factor = false;
> if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
> location == VARYING_SLOT_TESS_LEVEL_OUTER)
> is_tess_factor = true;
>
> unsigned base = is_compact ? const_index : 0;
> for (unsigned chan = 0; chan < 8; chan++) {
> if (!(writemask & (1 << chan)))
> continue;
> - LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan -
> component);
> + LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan -
> component);
>
> if (store_lds || is_tess_factor)
> ac_lds_store(&ctx->ac, dw_addr, value);
>
> if (!is_tess_factor && writemask != 0xF)
> ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip,
> value, 1,
> buf_addr, ctx->oc_lds,
> 4 * (base + chan), 1, 0, true, false);
>
> dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
> @@ -3119,21 +3097,21 @@ visit_store_var(struct ac_nir_context *ctx,
> int writemask = instr->const_index[0] << comp;
> LLVMValueRef indir_index;
> unsigned const_index;
> get_deref_offset(ctx, instr->variables[0], false,
> NULL, NULL, &const_index, &indir_index);
>
> if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
> int old_writemask = writemask;
>
> src = LLVMBuildBitCast(ctx->ac.builder, src,
> - LLVMVectorType(ctx->ac.f32,
> get_llvm_num_components(src) * 2),
> + LLVMVectorType(ctx->ac.f32,
> ac_get_llvm_num_components(src) * 2),
> "");
>
> writemask = 0;
> for (unsigned chan = 0; chan < 4; chan++) {
> if (old_writemask & (1 << chan))
> writemask |= 3u << (2 * chan);
> }
> }
>
> switch (instr->variables[0]->var->data.mode) {
> @@ -3157,21 +3135,21 @@ visit_store_var(struct ac_nir_context *ctx,
> const_index, location, driver_location,
> src, comp, is_patch, is_compact, writemask);
> return;
> }
>
> for (unsigned chan = 0; chan < 8; chan++) {
> int stride = 4;
> if (!(writemask & (1 << chan)))
> continue;
>
> - value = llvm_extract_elem(&ctx->ac, src, chan - comp);
> + value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
>
> if (instr->variables[0]->var->data.compact)
> stride = 1;
> if (indir_index) {
> unsigned count = glsl_count_attribute_slots(
> instr->variables[0]->var->type, false);
> count -= chan / 4;
> LLVMValueRef tmp_vec = ac_build_gather_values_extended(
> &ctx->ac, ctx->outputs + idx + chan, count,
> stride, true, true);
> @@ -3186,21 +3164,21 @@ visit_store_var(struct ac_nir_context *ctx,
>
> LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
> }
> }
> break;
> case nir_var_local:
> for (unsigned chan = 0; chan < 8; chan++) {
> if (!(writemask & (1 << chan)))
> continue;
>
> - value = llvm_extract_elem(&ctx->ac, src, chan);
> + value = ac_llvm_extract_elem(&ctx->ac, src, chan);
> if (indir_index) {
> unsigned count = glsl_count_attribute_slots(
> instr->variables[0]->var->type, false);
> count -= chan / 4;
> LLVMValueRef tmp_vec = ac_build_gather_values_extended(
> &ctx->ac, ctx->locals + idx + chan, count,
> 4, true, true);
>
> tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
> value, indir_index, "");
> @@ -3226,22 +3204,22 @@ visit_store_var(struct ac_nir_context *ctx,
> ctx->ac.builder, val,
> LLVMGetElementType(LLVMTypeOf(address)), "");
> LLVMBuildStore(ctx->ac.builder, val, address);
> } else {
> for (unsigned chan = 0; chan < 4; chan++) {
> if (!(writemask & (1 << chan)))
> continue;
> LLVMValueRef ptr =
> LLVMBuildStructGEP(ctx->ac.builder,
> address, chan, "");
> - LLVMValueRef src = llvm_extract_elem(&ctx->ac, val,
> - chan);
> + LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
> + chan);
> src = LLVMBuildBitCast(
> ctx->ac.builder, src,
> LLVMGetElementType(LLVMTypeOf(ptr)), "");
> LLVMBuildStore(ctx->ac.builder, src, ptr);
> }
> }
> break;
> }
> default:
> break;
> @@ -3359,21 +3337,21 @@ static LLVMValueRef get_image_coords(struct
> ac_nir_context *ctx,
> if(instr->variables[0]->deref.child)
> type = instr->variables[0]->deref.child->type;
>
> LLVMValueRef src0 = get_src(ctx, instr->src[0]);
> LLVMValueRef coords[4];
> LLVMValueRef masks[] = {
> LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1,
> false),
> LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3,
> false),
> };
> LLVMValueRef res;
> - LLVMValueRef sample_index = llvm_extract_elem(&ctx->ac, get_src(ctx,
> instr->src[1]), 0);
> + LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac,
> get_src(ctx, instr->src[1]), 0);
>
> int count;
> enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
> bool is_array = glsl_sampler_type_is_array(type);
> bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
> dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
> bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
> dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
> bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim ==
> GLSL_SAMPLER_DIM_1D;
> count = image_type_to_components_count(dim, is_array);
> @@ -3406,21 +3384,21 @@ static LLVMValueRef get_image_coords(struct
> ac_nir_context *ctx,
> if (count == 1 && !gfx9_1d) {
> if (instr->src[0].ssa->num_components)
> res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
> else
> res = src0;
> } else {
> int chan;
> if (is_ms)
> count--;
> for (chan = 0; chan < count; ++chan) {
> - coords[chan] = llvm_extract_elem(&ctx->ac, src0, chan);
> + coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
> }
> if (add_frag_pos) {
> for (chan = 0; chan < 2; ++chan)
> coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan],
> LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
> ctx->ac.i32, ""), "");
> coords[2] = ac_to_integer(&ctx->ac,
> ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
> count++;
> }
>
> if (gfx9_1d) {
> @@ -4607,50 +4585,50 @@ static void visit_tex(struct ac_nir_context
> *ctx, nir_tex_instr *instr)
> samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
> samples, "");
> samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
> ctx->ac.i32_1, "");
> result = samples;
> goto write_result;
> }
>
> if (coord)
> for (chan = 0; chan < instr->coord_components; chan++)
> - coords[chan] = llvm_extract_elem(&ctx->ac, coord, chan);
> + coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
>
> if (offsets && instr->op != nir_texop_txf) {
> LLVMValueRef offset[3], pack;
> for (chan = 0; chan < 3; ++chan)
> offset[chan] = ctx->ac.i32_0;
>
> args.offset = true;
> - for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
> - offset[chan] = llvm_extract_elem(&ctx->ac, offsets, chan);
> + for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) {
> + offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan);
> offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
> LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
> if (chan)
> offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
> LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
> }
> pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
> pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
> address[count++] = pack;
>
> }
> /* pack LOD bias value */
> if (instr->op == nir_texop_txb && bias) {
> address[count++] = bias;
> }
>
> /* Pack depth comparison value */
> if (instr->is_shadow && comparator) {
> LLVMValueRef z = ac_to_float(&ctx->ac,
> - llvm_extract_elem(&ctx->ac, comparator,
> 0));
> + ac_llvm_extract_elem(&ctx->ac,
> comparator, 0));
>
> /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to
> Z32_FLOAT,
> * so the depth comparison value isn't clamped for Z16 and
> * Z24 anymore. Do it manually here.
> *
> * It's unnecessary if the original texture format was
> * Z32_FLOAT, but we don't know that here.
> */
> if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
> z = ac_build_clamp(&ctx->ac, z);
> @@ -4680,22 +4658,22 @@ static void visit_tex(struct ac_nir_context
> *ctx, nir_tex_instr *instr)
> num_dest_deriv_channels = 2;
> num_deriv_comp = 2;
> } else {
> num_dest_deriv_channels = 1;
> num_deriv_comp = 1;
> }
> break;
> }
>
> for (unsigned i = 0; i < num_src_deriv_channels; i++) {
> - derivs[i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddx,
> i));
> - derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
> llvm_extract_elem(&ctx->ac, ddy, i));
> + derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac,
> ddx, i));
> + derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
> ac_llvm_extract_elem(&ctx->ac, ddy, i));
> }
> for (unsigned i = num_src_deriv_channels; i <
> num_dest_deriv_channels; i++) {
> derivs[i] = ctx->ac.f32_0;
> derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
> }
> }
>
> if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
> for (chan = 0; chan < instr->coord_components; chan++)
> coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
More information about the mesa-dev
mailing list