[Mesa-dev] [PATCH 10/10] radeonsi: don't fetch 8 dwords for samplerBuffer and imageBuffer
Nicolai Hähnle
nhaehnle at gmail.com
Tue Nov 29 19:45:56 UTC 2016
One minor comment on patch #8, apart from that the series is:
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
On 28.11.2016 12:17, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> The compiler doesn't shrink s_load_dwordx8, so we always wasted 4 SGPRs.
> Also, the extraction of the descriptor created some really ugly asm code
> with lots of VALU bitwise ops and v_readfirstlane.
>
> Totals from *affected* shaders:
> SGPRS: 13880 -> 13253 (-4.52 %)
> VGPRS: 15200 -> 15088 (-0.74 %)
> Code Size: 499864 -> 459816 (-8.01 %) bytes
> Max Waves: 1554 -> 1564 (0.64 %)
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 94 +++++++++++++++-----------------
> 1 file changed, 43 insertions(+), 51 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index f4c6e9c..bb57e78 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -3107,30 +3107,30 @@ static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
> */
> static LLVMValueRef get_buffer_size(
> struct lp_build_tgsi_context *bld_base,
> LLVMValueRef descriptor)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> LLVMValueRef size =
> LLVMBuildExtractElement(builder, descriptor,
> - lp_build_const_int32(gallivm, 6), "");
> + lp_build_const_int32(gallivm, 2), "");
>
> if (ctx->screen->b.chip_class >= VI) {
> /* On VI, the descriptor contains the size in bytes,
> * but TXQ must return the size in elements.
> * The stride is always non-zero for resources using TXQ.
> */
> LLVMValueRef stride =
> LLVMBuildExtractElement(builder, descriptor,
> - lp_build_const_int32(gallivm, 5), "");
> + lp_build_const_int32(gallivm, 1), "");
> stride = LLVMBuildLShr(builder, stride,
> lp_build_const_int32(gallivm, 16), "");
> stride = LLVMBuildAnd(builder, stride,
> lp_build_const_int32(gallivm, 0x3FFF), "");
>
> size = LLVMBuildUDiv(builder, size, stride, "");
> }
>
> return size;
> }
> @@ -3271,20 +3271,26 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
> LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
> LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
> LLVMValueRef tmp;
>
> tmp = LLVMBuildExtractElement(builder, rsrc, i32_6, "");
> tmp = LLVMBuildAnd(builder, tmp, i32_C, "");
> return LLVMBuildInsertElement(builder, rsrc, tmp, i32_6, "");
> }
> }
>
> +static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
> +{
> + return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
> + CONST_ADDR_SPACE);
> +}
> +
> /**
> * Load the resource descriptor for \p image.
> */
> static void
> image_fetch_rsrc(
> struct lp_build_tgsi_context *bld_base,
> const struct tgsi_full_src_register *image,
> bool is_store, unsigned target,
> LLVMValueRef *rsrc)
> {
> @@ -3312,20 +3318,33 @@ image_fetch_rsrc(
> * and if the index used to select an individual element is
> * negative or greater than or equal to the size of the
> * array, the results of the operation are undefined but may
> * not lead to termination.
> */
> index = get_bounded_indirect_index(ctx, &image->Indirect,
> image->Register.Index,
> SI_NUM_IMAGES);
> }
>
> + if (target == TGSI_TEXTURE_BUFFER) {
> + LLVMBuilderRef builder = ctx->gallivm.builder;
> +
> + rsrc_ptr = LLVMBuildPointerCast(builder, rsrc_ptr,
> + const_array(ctx->v4i32, 0), "");
> + index = LLVMBuildMul(builder, index,
> + LLVMConstInt(ctx->i32, 2, 0), "");
> + index = LLVMBuildAdd(builder, index,
> + LLVMConstInt(ctx->i32, 1, 0), "");
> + *rsrc = build_indexed_load_const(ctx, rsrc_ptr, index);
> + return;
> + }
> +
> tmp = build_indexed_load_const(ctx, rsrc_ptr, index);
> if (dcc_off)
> tmp = force_dcc_off(ctx, tmp);
> *rsrc = tmp;
> }
>
> static LLVMValueRef image_fetch_coords(
> struct lp_build_tgsi_context *bld_base,
> const struct tgsi_full_instruction *inst,
> unsigned src)
> @@ -3387,39 +3406,20 @@ static void image_append_args(
> }
>
> /* HAVE_LLVM >= 0x0400 */
> emit_data->args[emit_data->arg_count++] = glc;
> emit_data->args[emit_data->arg_count++] = slc;
> emit_data->args[emit_data->arg_count++] = lwe;
> emit_data->args[emit_data->arg_count++] = da;
> }
>
> /**
> - * Given a 256 bit resource, extract the top half (which stores the buffer
> - * resource in the case of textures and images).
> - */
> -static LLVMValueRef extract_rsrc_top_half(
> - struct si_shader_context *ctx,
> - LLVMValueRef rsrc)
> -{
> - struct gallivm_state *gallivm = &ctx->gallivm;
> - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
> - LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
> -
> - rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, v2i128, "");
> - rsrc = LLVMBuildExtractElement(gallivm->builder, rsrc, bld_base->uint_bld.one, "");
> - rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, "");
> -
> - return rsrc;
> -}
> -
> -/**
> * Append the resource and indexing arguments for buffer intrinsics.
> *
> * \param rsrc the v4i32 buffer resource
> * \param index index into the buffer (stride-based)
> * \param offset byte offset into the buffer
> */
> static void buffer_append_args(
> struct si_shader_context *ctx,
> struct lp_build_emit_data *emit_data,
> LLVMValueRef rsrc,
> @@ -3466,21 +3466,20 @@ static void load_fetch_args(
>
> buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
> offset, false);
> } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
> LLVMValueRef coords;
>
> image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
> coords = image_fetch_coords(bld_base, inst, 1);
>
> if (target == TGSI_TEXTURE_BUFFER) {
> - rsrc = extract_rsrc_top_half(ctx, rsrc);
> buffer_append_args(ctx, emit_data, rsrc, coords,
> bld_base->uint_bld.zero, false);
> } else {
> emit_data->args[0] = coords;
> emit_data->args[1] = rsrc;
> emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
> emit_data->arg_count = 3;
>
> image_append_args(ctx, emit_data, target, false);
> }
> @@ -3674,22 +3673,20 @@ static void store_fetch_args(
> buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
> offset, false);
> } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) {
> unsigned target = inst->Memory.Texture;
> LLVMValueRef coords;
>
> coords = image_fetch_coords(bld_base, inst, 0);
>
> if (target == TGSI_TEXTURE_BUFFER) {
> image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
> -
> - rsrc = extract_rsrc_top_half(ctx, rsrc);
> buffer_append_args(ctx, emit_data, rsrc, coords,
> bld_base->uint_bld.zero, false);
> } else {
> emit_data->args[1] = coords;
> image_fetch_rsrc(bld_base, &memory, true, target,
> &emit_data->args[2]);
> emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
> emit_data->arg_count = 4;
>
> image_append_args(ctx, emit_data, target, false);
> @@ -3878,21 +3875,20 @@ static void atomic_fetch_args(
> buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
> offset, true);
> } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
> unsigned target = inst->Memory.Texture;
> LLVMValueRef coords;
>
> image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
> coords = image_fetch_coords(bld_base, inst, 1);
>
> if (target == TGSI_TEXTURE_BUFFER) {
> - rsrc = extract_rsrc_top_half(ctx, rsrc);
> buffer_append_args(ctx, emit_data, rsrc, coords,
> bld_base->uint_bld.zero, true);
> } else {
> emit_data->args[emit_data->arg_count++] = coords;
> emit_data->args[emit_data->arg_count++] = rsrc;
>
> image_append_args(ctx, emit_data, target, true);
> }
> }
> }
> @@ -4122,45 +4118,47 @@ static void set_tex_fetch_args(struct si_shader_context *ctx,
> emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* tfe */
> emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* lwe */
>
> emit_data->arg_count = num_args;
> }
>
> static const struct lp_build_tgsi_action tex_action;
>
> enum desc_type {
> DESC_IMAGE,
> + DESC_BUFFER,
> DESC_FMASK,
> - DESC_SAMPLER
> + DESC_SAMPLER,
> };
>
> -static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
> -{
> - return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
> - CONST_ADDR_SPACE);
> -}
> -
> /**
> * Load an image view, fmask view. or sampler state descriptor.
> */
> static LLVMValueRef load_sampler_desc_custom(struct si_shader_context *ctx,
> LLVMValueRef list, LLVMValueRef index,
> enum desc_type type)
> {
> struct gallivm_state *gallivm = &ctx->gallivm;
> LLVMBuilderRef builder = gallivm->builder;
>
> switch (type) {
> case DESC_IMAGE:
> /* The image is at [0:7]. */
> index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
> break;
> + case DESC_BUFFER:
> + /* The buffer is in [4:7]. */
> + index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
> + index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 1, 0), "");
> + list = LLVMBuildPointerCast(builder, list,
> + const_array(ctx->v4i32, 0), "");
> + break;
> case DESC_FMASK:
> /* The FMASK is at [8:15]. */
> index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
> index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 1, 0), "");
> break;
> case DESC_SAMPLER:
> /* The sampler state is at [12:15]. */
> index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
> index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
> list = LLVMBuildPointerCast(builder, list,
> @@ -4228,56 +4226,57 @@ static void tex_fetch_ptrs(
> const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
>
> index = get_bounded_indirect_index(ctx,
> ®->Indirect,
> reg->Register.Index,
> SI_NUM_SAMPLERS);
> } else {
> index = LLVMConstInt(ctx->i32, sampler_index, 0);
> }
>
> - *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
> + if (target == TGSI_TEXTURE_BUFFER)
> + *res_ptr = load_sampler_desc(ctx, index, DESC_BUFFER);
> + else
> + *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
> +
> + if (samp_ptr)
> + *samp_ptr = NULL;
> + if (fmask_ptr)
> + *fmask_ptr = NULL;
>
> if (target == TGSI_TEXTURE_2D_MSAA ||
> target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
> - if (samp_ptr)
> - *samp_ptr = NULL;
> if (fmask_ptr)
> *fmask_ptr = load_sampler_desc(ctx, index, DESC_FMASK);
> - } else {
> + } else if (target != TGSI_TEXTURE_BUFFER) {
> if (samp_ptr) {
> *samp_ptr = load_sampler_desc(ctx, index, DESC_SAMPLER);
> *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
> }
> - if (fmask_ptr)
> - *fmask_ptr = NULL;
> }
> }
>
> static void txq_fetch_args(
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> - struct gallivm_state *gallivm = bld_base->base.gallivm;
> - LLVMBuilderRef builder = gallivm->builder;
> const struct tgsi_full_instruction *inst = emit_data->inst;
> unsigned target = inst->Texture.Texture;
> LLVMValueRef res_ptr;
> LLVMValueRef address;
>
> tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL);
>
> if (target == TGSI_TEXTURE_BUFFER) {
> /* Read the size from the buffer descriptor directly. */
> - LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, ctx->v8i32, "");
> - emit_data->args[0] = get_buffer_size(bld_base, res);
> + emit_data->args[0] = get_buffer_size(bld_base, res_ptr);
> return;
> }
>
> /* Textures - set the mip level. */
> address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
>
> set_tex_fetch_args(ctx, emit_data, TGSI_OPCODE_TXQ, target, res_ptr,
> NULL, &address, 1, 0xf);
> }
>
> @@ -4331,30 +4330,23 @@ static void tex_fetch_args(
> unsigned count = 0;
> unsigned chan;
> unsigned num_deriv_channels = 0;
> bool has_offset = inst->Texture.NumOffsets > 0;
> LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
> unsigned dmask = 0xf;
>
> tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
>
> if (target == TGSI_TEXTURE_BUFFER) {
> - LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
> -
> - /* Bitcast and truncate v8i32 to v16i8. */
> - LLVMValueRef res = res_ptr;
> - res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
> - res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
> - res = LLVMBuildBitCast(gallivm->builder, res, ctx->v16i8, "");
> -
> emit_data->dst_type = ctx->v4f32;
> - emit_data->args[0] = res;
> + emit_data->args[0] = LLVMBuildBitCast(gallivm->builder, res_ptr,
> + ctx->v16i8, "");
> emit_data->args[1] = bld_base->uint_bld.zero;
> emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
> emit_data->arg_count = 3;
> return;
> }
>
> /* Fetch and project texture coordinates */
> coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
> for (chan = 0; chan < 3; chan++ ) {
> coords[chan] = lp_build_emit_fetch(bld_base,
>
More information about the mesa-dev
mailing list