Mesa (master): aco: implement sparse texture fetches
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jan 8 14:43:26 UTC 2021
Module: Mesa
Branch: master
Commit: 382f50ad2ca23f98f517cd2604df286816f9ce6e
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=382f50ad2ca23f98f517cd2604df286816f9ce6e
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Fri Nov 20 15:11:16 2020 +0000
aco: implement sparse texture fetches
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7775>
---
src/amd/compiler/aco_builder_h.py | 2 +-
src/amd/compiler/aco_instruction_selection.cpp | 75 +++++++++++++++++---------
2 files changed, 52 insertions(+), 25 deletions(-)
diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py
index f33e78e33ae..a08eafd1c26 100644
--- a/src/amd/compiler/aco_builder_h.py
+++ b/src/amd/compiler/aco_builder_h.py
@@ -514,7 +514,7 @@ public:
}
<%
import itertools
-formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(5))) + [(8, 1), (1, 8)]),
+formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8)]),
("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]),
("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])),
("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])),
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index b2c1efc6758..119a35e8e8e 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5876,6 +5876,20 @@ memory_sync_info get_memory_sync_info(nir_intrinsic_instr *instr, storage_class
return memory_sync_info(storage, semantics);
}
+Operand emit_tfe_init(Builder& bld, Temp dst)
+{
+ Temp tmp = bld.tmp(dst.regClass());
+
+ aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+ aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
+ for (unsigned i = 0; i < dst.size(); i++)
+ vec->operands[i] = Operand(0u);
+ vec->definitions[0] = Definition(tmp);
+ bld.insert(std::move(vec));
+
+ return Operand(tmp);
+}
+
void visit_image_load(isel_context *ctx, nir_intrinsic_instr *instr)
{
Builder bld(ctx->program, ctx->block);
@@ -8967,22 +8981,26 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
}
/* Build tex instruction */
- unsigned dmask = nir_ssa_def_components_read(&instr->dest.ssa);
+ unsigned dmask = nir_ssa_def_components_read(&instr->dest.ssa) & 0xf;
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+ dmask = u_bit_consecutive(0, util_last_bit(dmask));
+ if (instr->is_sparse)
+ dmask = MAX2(dmask, 1) | 0x10;
unsigned dim = ctx->options->chip_class >= GFX10 && instr->sampler_dim != GLSL_SAMPLER_DIM_BUF
? ac_get_sampler_dim(ctx->options->chip_class, instr->sampler_dim, instr->is_array)
: 0;
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
Temp tmp_dst = dst;
- /* gather4 selects the component by dmask and always returns vec4 */
+ /* gather4 selects the component by dmask and always returns vec4 (vec5 if sparse) */
if (instr->op == nir_texop_tg4) {
- assert(instr->dest.ssa.num_components == 4);
+ assert(instr->dest.ssa.num_components == (4 + instr->is_sparse));
if (instr->is_shadow)
dmask = 1;
else
dmask = 1 << instr->component;
if (tg4_integer_cube_workaround || dst.type() == RegType::sgpr)
- tmp_dst = bld.tmp(v4);
+ tmp_dst = bld.tmp(instr->is_sparse ? v5 : v4);
} else if (instr->op == nir_texop_samples_identical) {
tmp_dst = bld.tmp(v1);
} else if (util_bitcount(dmask) != instr->dest.ssa.num_components || dst.type() == RegType::sgpr) {
@@ -9124,9 +9142,8 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
//FIXME: if (ctx->abi->gfx9_stride_size_workaround) return ac_build_buffer_load_format_gfx9_safe()
assert(coords.size() == 1);
- unsigned last_bit = util_last_bit(nir_ssa_def_components_read(&instr->dest.ssa));
aco_opcode op;
- switch (last_bit) {
+ switch (util_last_bit(dmask & 0xf)) {
case 1:
op = aco_opcode::buffer_load_format_x; break;
case 2:
@@ -9139,21 +9156,19 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
unreachable("Tex instruction loads more than 4 components.");
}
- /* if the instruction return value matches exactly the nir dest ssa, we can use it directly */
- if (last_bit == instr->dest.ssa.num_components && dst.type() == RegType::vgpr)
- tmp_dst = dst;
- else
- tmp_dst = bld.tmp(RegType::vgpr, last_bit);
-
- aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
+ aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(
+ op, Format::MUBUF, 3 + instr->is_sparse, 1)};
mubuf->operands[0] = Operand(resource);
mubuf->operands[1] = Operand(coords[0]);
mubuf->operands[2] = Operand((uint32_t) 0);
mubuf->definitions[0] = Definition(tmp_dst);
mubuf->idxen = true;
+ mubuf->tfe = instr->is_sparse;
+ if (mubuf->tfe)
+ mubuf->operands[3] = emit_tfe_init(bld, tmp_dst);
ctx->block->instructions.emplace_back(std::move(mubuf));
- expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, (1 << last_bit) - 1);
+ expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
return;
}
@@ -9190,15 +9205,18 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
instr->op == nir_texop_fragment_fetch ||
instr->op == nir_texop_fragment_mask_fetch) {
aco_opcode op = level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS || instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ? aco_opcode::image_load : aco_opcode::image_load_mip;
- tex.reset(create_instruction<MIMG_instruction>(op, Format::MIMG, 3, 1));
+ tex.reset(create_instruction<MIMG_instruction>(op, Format::MIMG, 3 + instr->is_sparse, 1));
tex->operands[0] = Operand(resource);
tex->operands[1] = Operand(s4); /* no sampler */
tex->operands[2] = Operand(arg);
tex->dim = dim;
- tex->dmask = dmask;
+ tex->dmask = dmask & 0xf;
tex->unrm = true;
tex->da = da;
+ tex->tfe = instr->is_sparse;
tex->definitions[0] = Definition(tmp_dst);
+ if (tex->tfe)
+ tex->operands[3] = emit_tfe_init(bld, tmp_dst);
ctx->block->instructions.emplace_back(std::move(tex));
if (instr->op == nir_texop_samples_identical) {
@@ -9331,23 +9349,26 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
instr->sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
arg = emit_wqm(ctx, arg, bld.tmp(arg.regClass()), true);
- tex.reset(create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3, 1));
+ tex.reset(create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3 + instr->is_sparse, 1));
tex->operands[0] = Operand(resource);
tex->operands[1] = Operand(sampler);
tex->operands[2] = Operand(arg);
tex->dim = dim;
- tex->dmask = dmask;
+ tex->dmask = dmask & 0xf;
tex->da = da;
+ tex->tfe = instr->is_sparse;
tex->definitions[0] = Definition(tmp_dst);
+ if (tex->tfe)
+ tex->operands[3] = emit_tfe_init(bld, tmp_dst);
ctx->block->instructions.emplace_back(std::move(tex));
if (tg4_integer_cube_workaround) {
assert(tmp_dst.id() != dst.id());
- assert(tmp_dst.size() == dst.size() && dst.size() == 4);
+ assert(tmp_dst.size() == dst.size());
emit_split_vector(ctx, tmp_dst, tmp_dst.size());
Temp val[4];
- for (unsigned i = 0; i < dst.size(); i++) {
+ for (unsigned i = 0; i < 4; i++) {
val[i] = emit_extract_vector(ctx, tmp_dst, i, v1);
Temp cvt_val;
if (stype == GLSL_TYPE_UINT)
@@ -9356,11 +9377,17 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]);
val[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), val[i], cvt_val, tg4_compare_cube_wa64);
}
- Temp tmp = dst.regClass() == v4 ? dst : bld.tmp(v4);
- tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp),
- val[0], val[1], val[2], val[3]);
+
+ Temp tmp = dst.regClass() == tmp_dst.regClass() ? dst : bld.tmp(tmp_dst.regClass());
+ if (instr->is_sparse)
+ tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp),
+ val[0], val[1], val[2], val[3],
+ emit_extract_vector(ctx, tmp_dst, 4, v1));
+ else
+ tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp),
+ val[0], val[1], val[2], val[3]);
}
- unsigned mask = instr->op == nir_texop_tg4 ? 0xF : dmask;
+ unsigned mask = instr->op == nir_texop_tg4 ? (instr->is_sparse ? 0x1F : 0xF) : dmask;
expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, mask);
}
More information about the mesa-commit
mailing list