Mesa (main): aco: emit p_jump_to_epilog if the main fragment shader has an epilog

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Jul 18 19:05:12 UTC 2022


Module: Mesa
Branch: main
Commit: a6dff6caa1f106bfe846544d5173c1909262b1a4
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a6dff6caa1f106bfe846544d5173c1909262b1a4

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Fri Jun 24 16:03:52 2022 +0200

aco: emit p_jump_to_epilog if the main fragment shader has an epilog

MRTZ is still exported from the main shader.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17485>

---

 src/amd/compiler/aco_instruction_selection.cpp | 79 ++++++++++++++++++++++++--
 src/amd/compiler/aco_instruction_selection.h   | 10 ++++
 2 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 22536dc267f..5c99b6e22f0 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5160,6 +5160,18 @@ store_output_to_temps(isel_context* ctx, nir_intrinsic_instr* instr)
       idx++;
    }
 
+   if (ctx->stage == fragment_fs && ctx->program->info.ps.has_epilog) {
+      unsigned index = nir_intrinsic_base(instr) - FRAG_RESULT_DATA0;
+
+      if (nir_intrinsic_src_type(instr) == nir_type_float16) {
+         ctx->output_color_types |= ACO_TYPE_FLOAT16 << (index * 2);
+      } else if (nir_intrinsic_src_type(instr) == nir_type_int16) {
+         ctx->output_color_types |= ACO_TYPE_INT16 << (index * 2);
+      } else if (nir_intrinsic_src_type(instr) == nir_type_uint16) {
+         ctx->output_color_types |= ACO_TYPE_UINT16 << (index * 2);
+      }
+   }
+
    return true;
 }
 
@@ -11115,9 +11127,60 @@ create_fs_null_export(isel_context* ctx)
            /* enabled_mask */ 0, dest, /* compr */ false, /* done */ true, /* vm */ true);
 }
 
+static void
+create_fs_jump_to_epilog(isel_context* ctx)
+{
+   Builder bld(ctx->program, ctx->block);
+   std::vector<Operand> color_exports;
+   PhysReg exports_start(256); /* VGPR 0 */
+
+   for (unsigned slot = FRAG_RESULT_DATA0; slot < FRAG_RESULT_DATA7 + 1; ++slot) {
+      unsigned color_index = slot - FRAG_RESULT_DATA0;
+      unsigned color_type = (ctx->output_color_types >> (color_index * 2)) & 0x3;
+      unsigned write_mask = ctx->outputs.mask[slot];
+
+      if (!write_mask)
+         continue;
+
+      PhysReg color_start(exports_start.reg() + color_index * 4);
+
+      for (unsigned i = 0; i < 4; i++) {
+         if (!(write_mask & BITFIELD_BIT(i))) {
+            color_exports.emplace_back(Operand(v1));
+            continue;
+         }
+
+         PhysReg chan_reg = color_start.advance(i * 4u);
+         Operand chan(ctx->outputs.temps[slot * 4u + i]);
+
+         if (color_type == ACO_TYPE_FLOAT16) {
+            chan = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), chan);
+         } else if (color_type == ACO_TYPE_INT16 || color_type == ACO_TYPE_UINT16) {
+            bool sign_ext = color_type == ACO_TYPE_INT16;
+            Temp tmp = convert_int(ctx, bld, chan.getTemp(), 16, 32, sign_ext);
+            chan = Operand(tmp);
+         }
+
+         chan.setFixed(chan_reg);
+         color_exports.emplace_back(chan);
+      }
+   }
+
+   Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ps_epilog_pc));
+
+   aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
+      aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + color_exports.size(), 0)};
+   jump->operands[0] = Operand(continue_pc);
+   for (unsigned i = 0; i < color_exports.size(); i++) {
+      jump->operands[i + 1] = color_exports[i];
+   }
+   ctx->block->instructions.emplace_back(std::move(jump));
+}
+
 static void
 create_fs_exports(isel_context* ctx)
 {
+   Builder bld(ctx->program, ctx->block);
    bool exported = false;
 
    /* Export depth, stencil and sample mask. */
@@ -11125,13 +11188,17 @@ create_fs_exports(isel_context* ctx)
        ctx->outputs.mask[FRAG_RESULT_SAMPLE_MASK])
       exported |= export_fs_mrt_z(ctx);
 
-   /* Export all color render targets. */
-   for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i)
-      if (ctx->outputs.mask[i])
-         exported |= export_fs_mrt_color(ctx, i);
+   if (ctx->program->info.ps.has_epilog) {
+      create_fs_jump_to_epilog(ctx);
+   } else {
+      /* Export all color render targets. */
+      for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i)
+         if (ctx->outputs.mask[i])
+            exported |= export_fs_mrt_color(ctx, i);
 
-   if (!exported)
-      create_fs_null_export(ctx);
+      if (!exported)
+         create_fs_null_export(ctx);
+   }
 
    ctx->block->kind |= block_kind_export_end;
 }
diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h
index 7650e72b5e2..d700f61ae69 100644
--- a/src/amd/compiler/aco_instruction_selection.h
+++ b/src/amd/compiler/aco_instruction_selection.h
@@ -35,6 +35,13 @@
 
 namespace aco {
 
+enum aco_color_output_type {
+   ACO_TYPE_ANY32,
+   ACO_TYPE_FLOAT16,
+   ACO_TYPE_INT16,
+   ACO_TYPE_UINT16,
+};
+
 struct shader_io_state {
    uint8_t mask[VARYING_SLOT_MAX];
    Temp temps[VARYING_SLOT_MAX * 4u];
@@ -99,6 +106,9 @@ struct isel_context {
    uint32_t tcs_num_patches;
    bool tcs_in_out_eq = false;
 
+   /* Fragment color output information */
+   uint16_t output_color_types;
+
    /* I/O information */
    shader_io_state inputs;
    shader_io_state outputs;



More information about the mesa-commit mailing list