[Mesa-dev] [PATCH 3/3] radeonsi: set GLC=1 for all write-only shader resources
Marek Olšák
maraeo at gmail.com
Fri Jul 27 03:36:42 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
.../drivers/radeonsi/si_shader_tgsi_mem.c | 21 +++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 427fead09d0..f5729acb8df 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -647,20 +647,27 @@ static void store_emit_buffer(
struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data,
bool writeonly_memory)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef base_data = emit_data->args[0];
LLVMValueRef base_offset = emit_data->args[3];
unsigned writemask = inst->Dst[0].Register.WriteMask;
+ /* If this is write-only, don't keep data in L1 to prevent
+ * evicting L1 cache lines that may be needed by other
+ * instructions.
+ */
+ if (writeonly_memory)
+ emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
+
while (writemask) {
int start, count;
const char *intrinsic_name;
LLVMValueRef data;
LLVMValueRef offset;
LLVMValueRef tmp;
u_bit_scan_consecutive_range(&writemask, &start, &count);
/* Due to an LLVM limitation, split 3-element writes
@@ -762,40 +769,50 @@ static void store_emit(
info->shader_buffers_atomic,
info->images_load |
info->images_atomic);
if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
store_emit_buffer(ctx, emit_data, writeonly_memory);
return;
}
if (target == TGSI_TEXTURE_BUFFER) {
+ /* If this is write-only, don't keep data in L1 to prevent
+ * evicting L1 cache lines that may be needed by other
+ * instructions.
+ */
+ if (writeonly_memory)
+ emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
+
emit_data->output[emit_data->chan] = ac_build_intrinsic(
&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
emit_data->dst_type, emit_data->args,
emit_data->arg_count,
ac_get_store_intr_attribs(writeonly_memory));
} else {
struct ac_image_args args = {};
args.opcode = ac_image_store;
args.data[0] = emit_data->args[0];
args.resource = emit_data->args[1];
memcpy(args.coords, &emit_data->args[2], sizeof(args.coords));
args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
args.attributes = ac_get_store_intr_attribs(writeonly_memory);
args.dmask = 0xf;
/* Workaround for 8bit/16bit TC L1 write corruption bug on SI.
* All store opcodes not aligned to a dword are affected.
*/
- bool force_glc = ctx->screen->info.chip_class == SI;
- if (force_glc ||
+ if (ctx->screen->info.chip_class == SI ||
+ /* If this is write-only, don't keep data in L1 to prevent
+ * evicting L1 cache lines that may be needed by other
+ * instructions. */
+ writeonly_memory ||
inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))
args.cache_policy = ac_glc;
emit_data->output[emit_data->chan] =
ac_build_image_opcode(&ctx->ac, &args);
}
}
static void atomic_fetch_args(
struct lp_build_tgsi_context * bld_base,
--
2.17.1
More information about the mesa-dev
mailing list