[Mesa-dev] [PATCH] radeonsi: get rid of img/buf/sampler descriptor preloading (v2)
Marek Olšák
maraeo at gmail.com
Tue Sep 13 20:20:08 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
26011 shaders in 14651 tests
Totals:
SGPRS: 1251920 -> 1152636 (-7.93 %)
VGPRS: 728421 -> 728198 (-0.03 %)
Spilled SGPRs: 16644 -> 3776 (-77.31 %)
Spilled VGPRs: 369 -> 369 (0.00 %)
Scratch VGPRs: 1344 -> 1344 (0.00 %) dwords per thread
Code Size: 36001064 -> 35835152 (-0.46 %) bytes
LDS: 767 -> 767 (0.00 %) blocks
Max Waves: 222221 -> 222372 (0.07 %)
Wait states: 0 -> 0 (0.00 %)
v2: merge codepaths where possible
---
src/gallium/drivers/radeonsi/si_shader.c | 173 ++++++++-----------------------
1 file changed, 41 insertions(+), 132 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 84cbfd7..6f9c45f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -100,25 +100,20 @@ struct si_shader_context
LLVMTargetMachineRef tm;
unsigned invariant_load_md_kind;
unsigned range_md_kind;
unsigned uniform_md_kind;
LLVMValueRef empty_md;
/* Preloaded descriptors. */
LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
- LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS];
- LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
- LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
- LLVMValueRef fmasks[SI_NUM_SAMPLERS];
- LLVMValueRef images[SI_NUM_IMAGES];
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
LLVMValueRef lds;
LLVMValueRef gs_next_vertex[4];
LLVMValueRef return_value;
LLVMTypeRef voidt;
LLVMTypeRef i1;
LLVMTypeRef i8;
@@ -3399,32 +3394,32 @@ static void membar_emit(
{
struct si_shader_context *ctx = si_shader_context(bld_base);
emit_waitcnt(ctx);
}
static LLVMValueRef
shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
const struct tgsi_full_src_register *reg)
{
- LLVMValueRef ind_index;
- LLVMValueRef rsrc_ptr;
+ LLVMValueRef index;
+ LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_SHADER_BUFFERS);
if (!reg->Register.Indirect)
- return ctx->shader_buffers[reg->Register.Index];
-
- ind_index = get_bounded_indirect_index(ctx, ®->Indirect,
- reg->Register.Index,
- SI_NUM_SHADER_BUFFERS);
+ index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
+ else
+ index = get_bounded_indirect_index(ctx, ®->Indirect,
+ reg->Register.Index,
+ SI_NUM_SHADER_BUFFERS);
- rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS);
- return build_indexed_load_const(ctx, rsrc_ptr, ind_index);
+ return build_indexed_load_const(ctx, rsrc_ptr, index);
}
static bool tgsi_is_array_sampler(unsigned target)
{
return target == TGSI_TEXTURE_1D_ARRAY ||
target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
target == TGSI_TEXTURE_2D_ARRAY ||
target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
@@ -3473,51 +3468,47 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
* Load the resource descriptor for \p image.
*/
static void
image_fetch_rsrc(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *image,
bool dcc_off,
LLVMValueRef *rsrc)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
+ LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_IMAGES);
+ LLVMValueRef index, tmp;
assert(image->Register.File == TGSI_FILE_IMAGE);
if (!image->Register.Indirect) {
- /* Fast path: use preloaded resources */
- *rsrc = ctx->images[image->Register.Index];
+ index = LLVMConstInt(ctx->i32, image->Register.Index, 0);
} else {
- /* Indexing and manual load */
- LLVMValueRef ind_index;
- LLVMValueRef rsrc_ptr;
- LLVMValueRef tmp;
-
/* From the GL_ARB_shader_image_load_store extension spec:
*
* If a shader performs an image load, store, or atomic
* operation using an image variable declared as an array,
* and if the index used to select an individual element is
* negative or greater than or equal to the size of the
* array, the results of the operation are undefined but may
* not lead to termination.
*/
- ind_index = get_bounded_indirect_index(ctx, &image->Indirect,
- image->Register.Index,
- SI_NUM_IMAGES);
-
- rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
- tmp = build_indexed_load_const(ctx, rsrc_ptr, ind_index);
- if (dcc_off)
- tmp = force_dcc_off(ctx, tmp);
- *rsrc = tmp;
+ index = get_bounded_indirect_index(ctx, &image->Indirect,
+ image->Register.Index,
+ SI_NUM_IMAGES);
}
+
+ tmp = build_indexed_load_const(ctx, rsrc_ptr, index);
+ if (dcc_off)
+ tmp = force_dcc_off(ctx, tmp);
+ *rsrc = tmp;
}
static LLVMValueRef image_fetch_coords(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
unsigned src)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
unsigned target = inst->Memory.Texture;
@@ -4355,55 +4346,51 @@ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
static void tex_fetch_ptrs(
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data,
LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
const struct tgsi_full_instruction *inst = emit_data->inst;
unsigned target = inst->Texture.Texture;
unsigned sampler_src;
unsigned sampler_index;
+ LLVMValueRef index;
sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
if (emit_data->inst->Src[sampler_src].Register.Indirect) {
const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
- LLVMValueRef ind_index;
- ind_index = get_bounded_indirect_index(ctx,
- ®->Indirect,
- reg->Register.Index,
- SI_NUM_SAMPLERS);
+ index = get_bounded_indirect_index(ctx,
+ ®->Indirect,
+ reg->Register.Index,
+ SI_NUM_SAMPLERS);
+ } else {
+ index = LLVMConstInt(ctx->i32, sampler_index, 0);
+ }
- *res_ptr = load_sampler_desc(ctx, ind_index, DESC_IMAGE);
+ *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
- if (target == TGSI_TEXTURE_2D_MSAA ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
- if (samp_ptr)
- *samp_ptr = NULL;
- if (fmask_ptr)
- *fmask_ptr = load_sampler_desc(ctx, ind_index, DESC_FMASK);
- } else {
- if (samp_ptr) {
- *samp_ptr = load_sampler_desc(ctx, ind_index, DESC_SAMPLER);
- *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
- }
- if (fmask_ptr)
- *fmask_ptr = NULL;
- }
- } else {
- *res_ptr = ctx->sampler_views[sampler_index];
+ if (target == TGSI_TEXTURE_2D_MSAA ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
if (samp_ptr)
- *samp_ptr = ctx->sampler_states[sampler_index];
+ *samp_ptr = NULL;
if (fmask_ptr)
- *fmask_ptr = ctx->fmasks[sampler_index];
+ *fmask_ptr = load_sampler_desc(ctx, index, DESC_FMASK);
+ } else {
+ if (samp_ptr) {
+ *samp_ptr = load_sampler_desc(ctx, index, DESC_SAMPLER);
+ *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
+ }
+ if (fmask_ptr)
+ *fmask_ptr = NULL;
}
}
static void txq_fetch_args(
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
@@ -5856,95 +5843,20 @@ static void preload_constant_buffers(struct si_shader_context *ctx)
for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
if (info->const_file_max[buf] == -1)
continue;
/* Load the resource descriptor */
ctx->const_buffers[buf] =
build_indexed_load_const(ctx, ptr, lp_build_const_int32(gallivm, buf));
}
}
-static void preload_shader_buffers(struct si_shader_context *ctx)
-{
- struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
- LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS);
- int buf, maxbuf;
-
- maxbuf = MIN2(ctx->shader->selector->info.file_max[TGSI_FILE_BUFFER],
- SI_NUM_SHADER_BUFFERS - 1);
- for (buf = 0; buf <= maxbuf; ++buf) {
- ctx->shader_buffers[buf] =
- build_indexed_load_const(
- ctx, ptr, lp_build_const_int32(gallivm, buf));
- }
-}
-
-static void preload_samplers(struct si_shader_context *ctx)
-{
- struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- const struct tgsi_shader_info *info = bld_base->info;
- unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
- LLVMValueRef offset;
-
- if (num_samplers == 0)
- return;
-
- /* Load the resources and samplers, we rely on the code sinking to do the rest */
- for (i = 0; i < num_samplers; ++i) {
- /* Resource */
- offset = lp_build_const_int32(gallivm, i);
- ctx->sampler_views[i] =
- load_sampler_desc(ctx, offset, DESC_IMAGE);
-
- /* FMASK resource */
- if (info->is_msaa_sampler[i])
- ctx->fmasks[i] =
- load_sampler_desc(ctx, offset, DESC_FMASK);
- else {
- ctx->sampler_states[i] =
- load_sampler_desc(ctx, offset, DESC_SAMPLER);
- ctx->sampler_states[i] =
- sici_fix_sampler_aniso(ctx, ctx->sampler_views[i],
- ctx->sampler_states[i]);
- }
- }
-}
-
-static void preload_images(struct si_shader_context *ctx)
-{
- struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
- struct tgsi_shader_info *info = &ctx->shader->selector->info;
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- unsigned num_images = bld_base->info->file_max[TGSI_FILE_IMAGE] + 1;
- LLVMValueRef res_ptr;
- unsigned i;
-
- if (num_images == 0)
- return;
-
- res_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
-
- for (i = 0; i < num_images; ++i) {
- /* Rely on LLVM to shrink the load for buffer resources. */
- LLVMValueRef rsrc =
- build_indexed_load_const(ctx, res_ptr,
- lp_build_const_int32(gallivm, i));
-
- if (info->images_writemask & (1 << i) &&
- !(info->images_buffers & (1 << i)))
- rsrc = force_dcc_off(ctx, rsrc);
-
- ctx->images[i] = rsrc;
- }
-}
-
/**
* Load ESGS and GSVS ring buffer resource descriptors and save the variables
* for later use.
*/
static void preload_ring_buffers(struct si_shader_context *ctx)
{
struct gallivm_state *gallivm =
ctx->radeon_bld.soa.bld_base.base.gallivm;
LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
@@ -6773,23 +6685,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
ctx.radeon_bld.declare_memory_region = declare_compute_memory;
break;
default:
assert(!"Unsupported shader type");
return -1;
}
create_meta_data(&ctx);
create_function(&ctx);
preload_constant_buffers(&ctx);
- preload_shader_buffers(&ctx);
- preload_samplers(&ctx);
- preload_images(&ctx);
preload_ring_buffers(&ctx);
if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT &&
shader->key.ps.prolog.poly_stipple) {
LLVMValueRef list = LLVMGetParam(ctx.radeon_bld.main_fn,
SI_PARAM_RW_BUFFERS);
si_llvm_emit_polygon_stipple(&ctx, list,
SI_PARAM_POS_FIXED_PT);
}
--
2.7.4
More information about the mesa-dev
mailing list