[Mesa-dev] [PATCH 6/7] radeonsi: move building llvm.SI.load.const into ac_build_buffer_load
Nicolai Hähnle
nhaehnle at gmail.com
Fri May 26 10:04:24 UTC 2017
On 25.05.2017 19:04, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/amd/common/ac_llvm_build.c | 46 +++++++++++++++++++++++---------
> src/amd/common/ac_llvm_build.h | 3 ++-
> src/amd/common/ac_nir_to_llvm.c | 2 +-
> src/gallium/drivers/radeonsi/si_shader.c | 23 +++++++---------
> 4 files changed, 47 insertions(+), 27 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 3df9f53..237e929 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -626,47 +626,69 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
> LLVMValueRef
> ac_build_buffer_load(struct ac_llvm_context *ctx,
> LLVMValueRef rsrc,
> int num_channels,
> LLVMValueRef vindex,
> LLVMValueRef voffset,
> LLVMValueRef soffset,
> unsigned inst_offset,
> unsigned glc,
> unsigned slc,
> - bool can_speculate)
> + bool can_speculate,
> + bool allow_smem)
> {
> + LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
> + if (voffset)
> + offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
> + if (soffset)
> + offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
> +
> + /* TODO: VI and later generations can use SMEM with GLC=1.*/
> + if (allow_smem && !glc && !slc) {
This still needs to depend on can_speculate.
At least, looking at patch #7, if allow_smem were enabled for shader
buffer loads, we'd still only want to really use SMEM for read_only
buffers. Unless the plan is to put the responsibility for setting
allow_smem in the caller?
Cheers,
Nicolai
> + assert(vindex == NULL);
> +
> + LLVMValueRef result[4];
> +
> + for (int i = 0; i < num_channels; i++) {
> + if (i) {
> + offset = LLVMBuildAdd(ctx->builder, offset,
> + LLVMConstInt(ctx->i32, 4, 0), "");
> + }
> + LLVMValueRef args[2] = {rsrc, offset};
> + result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
> + ctx->f32, args, 2,
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_LEGACY);
> + }
> + if (num_channels == 1)
> + return result[0];
> +
> + if (num_channels == 3)
> + result[num_channels++] = LLVMGetUndef(ctx->f32);
> + return ac_build_gather_values(ctx, result, num_channels);
> + }
> +
> unsigned func = CLAMP(num_channels, 1, 3) - 1;
>
> LLVMValueRef args[] = {
> LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
> vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
> - LLVMConstInt(ctx->i32, inst_offset, 0),
> + offset,
> LLVMConstInt(ctx->i1, glc, 0),
> LLVMConstInt(ctx->i1, slc, 0)
> };
>
> LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
> ctx->v4f32};
> const char *type_names[] = {"f32", "v2f32", "v4f32"};
> char name[256];
>
> - if (voffset) {
> - args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
> - "");
> - }
> -
> - if (soffset) {
> - args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
> - "");
> - }
> -
> snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
> type_names[func]);
>
> return ac_build_intrinsic(ctx, name, types[func], args,
> ARRAY_SIZE(args),
> /* READNONE means writes can't affect it, while
> * READONLY means that writes can affect it. */
> can_speculate && HAVE_LLVM >= 0x0400 ?
> AC_FUNC_ATTR_READNONE :
> AC_FUNC_ATTR_READONLY);
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index c1b5f3d..ebb78fb 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -136,21 +136,22 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
> LLVMValueRef
> ac_build_buffer_load(struct ac_llvm_context *ctx,
> LLVMValueRef rsrc,
> int num_channels,
> LLVMValueRef vindex,
> LLVMValueRef voffset,
> LLVMValueRef soffset,
> unsigned inst_offset,
> unsigned glc,
> unsigned slc,
> - bool can_speculate);
> + bool can_speculate,
> + bool allow_smem);
>
> LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
> LLVMValueRef rsrc,
> LLVMValueRef vindex,
> LLVMValueRef voffset,
> bool can_speculate);
>
> LLVMValueRef
> ac_get_thread_id(struct ac_llvm_context *ctx);
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 8ae0a75..28ba47d 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2816,21 +2816,21 @@ load_tes_input(struct nir_to_llvm_context *ctx,
> param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
> if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
> is_compact && const_index > 3) {
> const_index -= 3;
> param++;
> }
> buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
> is_compact, vertex_index, indir_index);
>
> result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
> - buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true);
> + buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
> result = trim_vector(ctx, result, instr->num_components);
> result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
> return result;
> }
>
> static LLVMValueRef
> load_gs_input(struct nir_to_llvm_context *ctx,
> nir_intrinsic_instr *instr)
> {
> LLVMValueRef indir_index, vtx_offset;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index fe0cd4a..c2b3cae 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -826,39 +826,39 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
> LLVMValueRef base, bool can_speculate)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct gallivm_state *gallivm = &ctx->gallivm;
> LLVMValueRef value, value2;
> LLVMTypeRef llvm_type = tgsi2llvmtype(bld_base, type);
> LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4);
>
> if (swizzle == ~0) {
> value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
> - 0, 1, 0, can_speculate);
> + 0, 1, 0, can_speculate, false);
>
> return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
> }
>
> if (!tgsi_type_is_64bit(type)) {
> value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
> - 0, 1, 0, can_speculate);
> + 0, 1, 0, can_speculate, false);
>
> value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
> return LLVMBuildExtractElement(gallivm->builder, value,
> LLVMConstInt(ctx->i32, swizzle, 0), "");
> }
>
> value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
> - swizzle * 4, 1, 0, can_speculate);
> + swizzle * 4, 1, 0, can_speculate, false);
>
> value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
> - swizzle * 4 + 4, 1, 0, can_speculate);
> + swizzle * 4 + 4, 1, 0, can_speculate, false);
>
> return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
> }
>
> /**
> * Load from LDS.
> *
> * \param type output value type
> * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
> * \param dw_addr address in dwords
> @@ -1147,28 +1147,28 @@ static LLVMValueRef fetch_input_gs(
> vtx_offset_param += ctx->param_gs_vtx2_offset - 2;
> }
> vtx_offset = lp_build_mul_imm(uint,
> LLVMGetParam(ctx->main_fn,
> vtx_offset_param),
> 4);
>
> soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0);
>
> value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0,
> - vtx_offset, soffset, 0, 1, 0, true);
> + vtx_offset, soffset, 0, 1, 0, true, false);
> if (tgsi_type_is_64bit(type)) {
> LLVMValueRef value2;
> soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 256, 0);
>
> value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
> ctx->i32_0, vtx_offset, soffset,
> - 0, 1, 0, true);
> + 0, 1, 0, true, false);
> return si_llvm_emit_fetch_64bit(bld_base, type,
> value, value2);
> }
> return LLVMBuildBitCast(gallivm->builder,
> value,
> tgsi2llvmtype(bld_base, type), "");
> }
>
> static int lookup_interp_param_index(unsigned interpolate, unsigned location)
> {
> @@ -1382,26 +1382,22 @@ static LLVMValueRef get_sample_id(struct si_shader_context *ctx)
> }
>
>
> /**
> * Load a dword from a constant buffer.
> */
> static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
> LLVMValueRef resource,
> LLVMValueRef offset)
> {
> - LLVMBuilderRef builder = ctx->gallivm.builder;
> - LLVMValueRef args[2] = {resource, offset};
> -
> - return lp_build_intrinsic(builder, "llvm.SI.load.const.v4i32", ctx->f32, args, 2,
> - LP_FUNC_ATTR_READNONE |
> - LP_FUNC_ATTR_LEGACY);
> + return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
> + 0, 0, 0, true, true);
> }
>
> static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id)
> {
> struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
> struct gallivm_state *gallivm = &ctx->gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
> LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_PS_CONST_SAMPLE_POSITIONS, 0);
> LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index);
> @@ -5201,21 +5197,22 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
> }
>
> LLVMValueRef soffset = LLVMConstInt(ctx.i32,
> offset * gs_selector->gs_max_out_vertices * 16 * 4, 0);
> offset++;
>
> outputs[i].values[chan] =
> ac_build_buffer_load(&ctx.ac,
> ctx.gsvs_ring[0], 1,
> ctx.i32_0, voffset,
> - soffset, 0, 1, 1, true);
> + soffset, 0, 1, 1,
> + true, false);
> }
> }
>
> /* Streamout and exports. */
> if (gs_selector->so.num_outputs) {
> si_llvm_emit_streamout(&ctx, outputs,
> gsinfo->num_outputs,
> stream);
> }
>
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list