[Mesa-dev] [PATCH 10/10] radeonsi: don't fetch 8 dwords for samplerBuffer and imageBuffer
Marek Olšák
maraeo at gmail.com
Mon Nov 28 11:17:03 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
The compiler doesn't shrink s_load_dwordx8, so we always wasted 4 SGPRs.
Also, the extraction of the descriptor created some really ugly asm code
with lots of VALU bitwise ops and v_readfirstlane.
Totals from *affected* shaders:
SGPRS: 13880 -> 13253 (-4.52 %)
VGPRS: 15200 -> 15088 (-0.74 %)
Code Size: 499864 -> 459816 (-8.01 %) bytes
Max Waves: 1554 -> 1564 (0.64 %)
---
src/gallium/drivers/radeonsi/si_shader.c | 94 +++++++++++++++-----------------
1 file changed, 43 insertions(+), 51 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index f4c6e9c..bb57e78 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3107,30 +3107,30 @@ static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
*/
static LLVMValueRef get_buffer_size(
struct lp_build_tgsi_context *bld_base,
LLVMValueRef descriptor)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef size =
LLVMBuildExtractElement(builder, descriptor,
- lp_build_const_int32(gallivm, 6), "");
+ lp_build_const_int32(gallivm, 2), "");
if (ctx->screen->b.chip_class >= VI) {
/* On VI, the descriptor contains the size in bytes,
* but TXQ must return the size in elements.
* The stride is always non-zero for resources using TXQ.
*/
LLVMValueRef stride =
LLVMBuildExtractElement(builder, descriptor,
- lp_build_const_int32(gallivm, 5), "");
+ lp_build_const_int32(gallivm, 1), "");
stride = LLVMBuildLShr(builder, stride,
lp_build_const_int32(gallivm, 16), "");
stride = LLVMBuildAnd(builder, stride,
lp_build_const_int32(gallivm, 0x3FFF), "");
size = LLVMBuildUDiv(builder, size, stride, "");
}
return size;
}
@@ -3271,20 +3271,26 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
LLVMValueRef tmp;
tmp = LLVMBuildExtractElement(builder, rsrc, i32_6, "");
tmp = LLVMBuildAnd(builder, tmp, i32_C, "");
return LLVMBuildInsertElement(builder, rsrc, tmp, i32_6, "");
}
}
+static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
+{
+ return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
+ CONST_ADDR_SPACE);
+}
+
/**
* Load the resource descriptor for \p image.
*/
static void
image_fetch_rsrc(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *image,
bool is_store, unsigned target,
LLVMValueRef *rsrc)
{
@@ -3312,20 +3318,33 @@ image_fetch_rsrc(
* and if the index used to select an individual element is
* negative or greater than or equal to the size of the
* array, the results of the operation are undefined but may
* not lead to termination.
*/
index = get_bounded_indirect_index(ctx, &image->Indirect,
image->Register.Index,
SI_NUM_IMAGES);
}
+ if (target == TGSI_TEXTURE_BUFFER) {
+ LLVMBuilderRef builder = ctx->gallivm.builder;
+
+ rsrc_ptr = LLVMBuildPointerCast(builder, rsrc_ptr,
+ const_array(ctx->v4i32, 0), "");
+ index = LLVMBuildMul(builder, index,
+ LLVMConstInt(ctx->i32, 2, 0), "");
+ index = LLVMBuildAdd(builder, index,
+ LLVMConstInt(ctx->i32, 1, 0), "");
+ *rsrc = build_indexed_load_const(ctx, rsrc_ptr, index);
+ return;
+ }
+
tmp = build_indexed_load_const(ctx, rsrc_ptr, index);
if (dcc_off)
tmp = force_dcc_off(ctx, tmp);
*rsrc = tmp;
}
static LLVMValueRef image_fetch_coords(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
unsigned src)
@@ -3387,39 +3406,20 @@ static void image_append_args(
}
/* HAVE_LLVM >= 0x0400 */
emit_data->args[emit_data->arg_count++] = glc;
emit_data->args[emit_data->arg_count++] = slc;
emit_data->args[emit_data->arg_count++] = lwe;
emit_data->args[emit_data->arg_count++] = da;
}
/**
- * Given a 256 bit resource, extract the top half (which stores the buffer
- * resource in the case of textures and images).
- */
-static LLVMValueRef extract_rsrc_top_half(
- struct si_shader_context *ctx,
- LLVMValueRef rsrc)
-{
- struct gallivm_state *gallivm = &ctx->gallivm;
- struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
- LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
-
- rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, v2i128, "");
- rsrc = LLVMBuildExtractElement(gallivm->builder, rsrc, bld_base->uint_bld.one, "");
- rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, "");
-
- return rsrc;
-}
-
-/**
* Append the resource and indexing arguments for buffer intrinsics.
*
* \param rsrc the v4i32 buffer resource
* \param index index into the buffer (stride-based)
* \param offset byte offset into the buffer
*/
static void buffer_append_args(
struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data,
LLVMValueRef rsrc,
@@ -3466,21 +3466,20 @@ static void load_fetch_args(
buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
offset, false);
} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
LLVMValueRef coords;
image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
coords = image_fetch_coords(bld_base, inst, 1);
if (target == TGSI_TEXTURE_BUFFER) {
- rsrc = extract_rsrc_top_half(ctx, rsrc);
buffer_append_args(ctx, emit_data, rsrc, coords,
bld_base->uint_bld.zero, false);
} else {
emit_data->args[0] = coords;
emit_data->args[1] = rsrc;
emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
emit_data->arg_count = 3;
image_append_args(ctx, emit_data, target, false);
}
@@ -3674,22 +3673,20 @@ static void store_fetch_args(
buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
offset, false);
} else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) {
unsigned target = inst->Memory.Texture;
LLVMValueRef coords;
coords = image_fetch_coords(bld_base, inst, 0);
if (target == TGSI_TEXTURE_BUFFER) {
image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
-
- rsrc = extract_rsrc_top_half(ctx, rsrc);
buffer_append_args(ctx, emit_data, rsrc, coords,
bld_base->uint_bld.zero, false);
} else {
emit_data->args[1] = coords;
image_fetch_rsrc(bld_base, &memory, true, target,
&emit_data->args[2]);
emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
emit_data->arg_count = 4;
image_append_args(ctx, emit_data, target, false);
@@ -3878,21 +3875,20 @@ static void atomic_fetch_args(
buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
offset, true);
} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
unsigned target = inst->Memory.Texture;
LLVMValueRef coords;
image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
coords = image_fetch_coords(bld_base, inst, 1);
if (target == TGSI_TEXTURE_BUFFER) {
- rsrc = extract_rsrc_top_half(ctx, rsrc);
buffer_append_args(ctx, emit_data, rsrc, coords,
bld_base->uint_bld.zero, true);
} else {
emit_data->args[emit_data->arg_count++] = coords;
emit_data->args[emit_data->arg_count++] = rsrc;
image_append_args(ctx, emit_data, target, true);
}
}
}
@@ -4122,45 +4118,47 @@ static void set_tex_fetch_args(struct si_shader_context *ctx,
emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* tfe */
emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* lwe */
emit_data->arg_count = num_args;
}
static const struct lp_build_tgsi_action tex_action;
enum desc_type {
DESC_IMAGE,
+ DESC_BUFFER,
DESC_FMASK,
- DESC_SAMPLER
+ DESC_SAMPLER,
};
-static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
-{
- return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
- CONST_ADDR_SPACE);
-}
-
/**
* Load an image view, fmask view. or sampler state descriptor.
*/
static LLVMValueRef load_sampler_desc_custom(struct si_shader_context *ctx,
LLVMValueRef list, LLVMValueRef index,
enum desc_type type)
{
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = gallivm->builder;
switch (type) {
case DESC_IMAGE:
/* The image is at [0:7]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
break;
+ case DESC_BUFFER:
+ /* The buffer is in [4:7]. */
+ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
+ index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 1, 0), "");
+ list = LLVMBuildPointerCast(builder, list,
+ const_array(ctx->v4i32, 0), "");
+ break;
case DESC_FMASK:
/* The FMASK is at [8:15]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 1, 0), "");
break;
case DESC_SAMPLER:
/* The sampler state is at [12:15]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
list = LLVMBuildPointerCast(builder, list,
@@ -4228,56 +4226,57 @@ static void tex_fetch_ptrs(
const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
index = get_bounded_indirect_index(ctx,
®->Indirect,
reg->Register.Index,
SI_NUM_SAMPLERS);
} else {
index = LLVMConstInt(ctx->i32, sampler_index, 0);
}
- *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
+ if (target == TGSI_TEXTURE_BUFFER)
+ *res_ptr = load_sampler_desc(ctx, index, DESC_BUFFER);
+ else
+ *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
+
+ if (samp_ptr)
+ *samp_ptr = NULL;
+ if (fmask_ptr)
+ *fmask_ptr = NULL;
if (target == TGSI_TEXTURE_2D_MSAA ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
- if (samp_ptr)
- *samp_ptr = NULL;
if (fmask_ptr)
*fmask_ptr = load_sampler_desc(ctx, index, DESC_FMASK);
- } else {
+ } else if (target != TGSI_TEXTURE_BUFFER) {
if (samp_ptr) {
*samp_ptr = load_sampler_desc(ctx, index, DESC_SAMPLER);
*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
}
- if (fmask_ptr)
- *fmask_ptr = NULL;
}
}
static void txq_fetch_args(
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction *inst = emit_data->inst;
unsigned target = inst->Texture.Texture;
LLVMValueRef res_ptr;
LLVMValueRef address;
tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL);
if (target == TGSI_TEXTURE_BUFFER) {
/* Read the size from the buffer descriptor directly. */
- LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, ctx->v8i32, "");
- emit_data->args[0] = get_buffer_size(bld_base, res);
+ emit_data->args[0] = get_buffer_size(bld_base, res_ptr);
return;
}
/* Textures - set the mip level. */
address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
set_tex_fetch_args(ctx, emit_data, TGSI_OPCODE_TXQ, target, res_ptr,
NULL, &address, 1, 0xf);
}
@@ -4331,30 +4330,23 @@ static void tex_fetch_args(
unsigned count = 0;
unsigned chan;
unsigned num_deriv_channels = 0;
bool has_offset = inst->Texture.NumOffsets > 0;
LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
unsigned dmask = 0xf;
tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
if (target == TGSI_TEXTURE_BUFFER) {
- LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
-
- /* Bitcast and truncate v8i32 to v16i8. */
- LLVMValueRef res = res_ptr;
- res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
- res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
- res = LLVMBuildBitCast(gallivm->builder, res, ctx->v16i8, "");
-
emit_data->dst_type = ctx->v4f32;
- emit_data->args[0] = res;
+ emit_data->args[0] = LLVMBuildBitCast(gallivm->builder, res_ptr,
+ ctx->v16i8, "");
emit_data->args[1] = bld_base->uint_bld.zero;
emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
emit_data->arg_count = 3;
return;
}
/* Fetch and project texture coordinates */
coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
for (chan = 0; chan < 3; chan++ ) {
coords[chan] = lp_build_emit_fetch(bld_base,
--
2.7.4
More information about the mesa-dev
mailing list