Mesa (master): r600/sfn: Fix vertex stage export to accomodate IO lowering

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Nov 27 10:42:38 UTC 2020


Module: Mesa
Branch: master
Commit: b8fdcffc4c755241d645c62385729ee14dd8620b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b8fdcffc4c755241d645c62385729ee14dd8620b

Author: Gert Wollny <gert.wollny at collabora.com>
Date:   Mon Nov 23 11:44:04 2020 +0100

r600/sfn: Fix vertex stage export to accomodate IO lowering

Signed-off-by: Gert Wollny <gert.wollny at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7715>

---

 .../drivers/r600/sfn/sfn_vertexstageexport.cpp     | 216 +++++++++++++++------
 .../drivers/r600/sfn/sfn_vertexstageexport.h       |  58 ++++--
 2 files changed, 199 insertions(+), 75 deletions(-)

diff --git a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
index 756da878169..6a9feb50292 100644
--- a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
@@ -8,8 +8,7 @@ using std::priority_queue;
 
 VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
    m_proc(proc),
-   m_cur_clip_pos(1),
-   m_cur_param(0)
+   m_cur_clip_pos(1)
 {
 
 }
@@ -19,10 +18,52 @@ VertexStageExportBase::~VertexStageExportBase()
 
 }
 
+bool VertexStageExportBase::do_process_outputs(nir_variable *output)
+{
+   return true;
+}
+
+void VertexStageExportBase::emit_shader_start()
+{
+
+}
+
+void VertexStageExportBase::scan_store_output(nir_intrinsic_instr* instr)
+{
+
+}
+
+bool VertexStageExportBase::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+   const store_loc store_info  = {
+      out_var->data.location_frac,
+      (unsigned int)out_var->data.location,
+      out_var->data.driver_location,
+      1
+   };
+
+   return do_store_output(store_info, instr);
+}
+
+bool VertexStageExportBase::store_output(nir_intrinsic_instr* instr)
+{
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index && "Indirect outputs not supported");
+
+   const store_loc store_info  = {
+      nir_intrinsic_component(instr),
+      nir_intrinsic_io_semantics(instr).location,
+      (unsigned)nir_intrinsic_base(instr) + index->u32,
+      0
+   };
+
+   return do_store_output(store_info, instr);
+}
+
 VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
                                                const pipe_stream_output_info *so_info,
                                                r600_pipe_shader *pipe_shader, const r600_shader_key &key):
-   VertexStageExportBase(proc),
+   VertexStageWithOutputInfo(proc),
    m_last_param_export(nullptr),
    m_last_pos_export(nullptr),
    m_num_clip_dist(0),
@@ -33,7 +74,7 @@ VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
 {
 }
 
-bool VertexStageExportBase::do_process_outputs(nir_variable *output)
+bool VertexStageWithOutputInfo::do_process_outputs(nir_variable *output)
 {
    if (output->data.location == VARYING_SLOT_COL0 ||
        output->data.location == VARYING_SLOT_COL1 ||
@@ -73,60 +114,58 @@ bool VertexStageExportBase::do_process_outputs(nir_variable *output)
           output->data.location != VARYING_SLOT_EDGE &&
           output->data.location != VARYING_SLOT_PSIZ &&
           output->data.location != VARYING_SLOT_CLIP_VERTEX)
-         m_param_map[output->data.location] = m_cur_param++;
+         m_param_driver_locations.push(output->data.driver_location);
 
       return true;
    }
    return false;
 }
 
-
-bool VertexStageExportForFS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
+bool VertexStageExportForFS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
 {
-
-   switch (out_var->data.location) {
+   switch (store_info.location) {
    case VARYING_SLOT_PSIZ:
       m_proc.sh_info().vs_out_point_size = 1;
       m_proc.sh_info().vs_out_misc_write = 1;
       /* fallthrough */
    case VARYING_SLOT_POS:
-      return emit_varying_pos(out_var, instr);
+      return emit_varying_pos(store_info, instr);
    case VARYING_SLOT_EDGE: {
       std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
-      return emit_varying_pos(out_var, instr, &swizzle_override);
+      return emit_varying_pos(store_info, instr, &swizzle_override);
    }
    case VARYING_SLOT_VIEWPORT: {
       std::array<uint32_t, 4> swizzle_override = {7, 7, 7, 0};
-      return emit_varying_pos(out_var, instr, &swizzle_override) &&
-            emit_varying_param(out_var, instr);
+      return emit_varying_pos(store_info, instr, &swizzle_override) &&
+            emit_varying_param(store_info, instr);
    }
    case VARYING_SLOT_CLIP_VERTEX:
-      return emit_clip_vertices(out_var, instr);
+      return emit_clip_vertices(store_info, instr);
    case VARYING_SLOT_CLIP_DIST0:
    case VARYING_SLOT_CLIP_DIST1:
       m_num_clip_dist += 4;
-      return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
+      return emit_varying_param(store_info, instr) && emit_varying_pos(store_info, instr);
    case VARYING_SLOT_LAYER: {
       m_proc.sh_info().vs_out_misc_write = 1;
       m_proc.sh_info().vs_out_layer = 1;
       std::array<uint32_t, 4> swz = {7,7,0,7};
-      return emit_varying_pos(out_var, instr, &swz) &&
-            emit_varying_param(out_var, instr);
+      return emit_varying_pos(store_info, instr, &swz) &&
+            emit_varying_param(store_info, instr);
    }
    case VARYING_SLOT_VIEW_INDEX:
-      return emit_varying_pos(out_var, instr) &&
-            emit_varying_param(out_var, instr);
+      return emit_varying_pos(store_info, instr) &&
+            emit_varying_param(store_info, instr);
 
    default:
-         return emit_varying_param(out_var, instr);
+         return emit_varying_param(store_info, instr);
    }
 
    fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
-           out_var->data.location);
+           store_info.location);
    return false;
 }
 
-bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
+bool VertexStageExportForFS::emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr* instr,
                                               std::array<uint32_t, 4> *swizzle_override)
 {
    std::array<uint32_t,4> swizzle;
@@ -139,25 +178,25 @@ bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_i
             write_mask |= 1 << i;
       }
    } else {
-      write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
+      write_mask = nir_intrinsic_write_mask(instr) << store_info.frac;
       for (int i = 0; i < 4; ++i)
-         swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
+         swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
    }
 
-   m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
+   m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask;
 
-   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
-   m_proc.set_output(out_var->data.driver_location, value.sel());
+   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle);
+   m_proc.set_output(store_info.driver_location, value.sel());
 
    int export_slot = 0;
 
-   switch (out_var->data.location) {
+   switch (store_info.location) {
    case VARYING_SLOT_EDGE: {
       m_proc.sh_info().vs_out_misc_write = 1;
       m_proc.sh_info().vs_out_edgeflag = 1;
       m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
       m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr});
-      m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf;
+      m_proc.sh_info().output[store_info.driver_location].write_mask = 0xf;
    }
       /* fallthrough */
    case VARYING_SLOT_PSIZ:
@@ -177,53 +216,51 @@ bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_i
       break;
    default:
       sfn_log << SfnLog::err << __func__ << "Unsupported location "
-              << out_var->data.location << "\n";
+              << store_info.location << "\n";
       return false;
    }
 
    m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos);
    m_proc.emit_export_instruction(m_last_pos_export);
-   m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
+   m_proc.add_param_output_reg(store_info.driver_location, m_last_pos_export->gpr_ptr());
    return true;
 }
 
-bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
+bool VertexStageExportForFS::emit_varying_param(const store_loc &store_info, nir_intrinsic_instr* instr)
 {
-   assert(out_var->data.driver_location < m_proc.sh_info().noutput);
-   sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
+   assert(store_info.driver_location < m_proc.sh_info().noutput);
+   sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location << "\n";
 
-   int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
+   int write_mask = nir_intrinsic_write_mask(instr) << store_info.frac;
    std::array<uint32_t,4> swizzle;
    for (int i = 0; i < 4; ++i)
-      swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
+      swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
 
-   m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
+   //m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask;
 
-   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle, true);
-   m_proc.sh_info().output[out_var->data.driver_location].gpr = value.sel();
+   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle, true);
+   m_proc.sh_info().output[store_info.driver_location].gpr = value.sel();
 
    /* This should use the registers!! */
-   m_proc.set_output(out_var->data.driver_location, value.sel());
+   m_proc.set_output(store_info.driver_location, value.sel());
 
-   auto param_loc = m_param_map.find(out_var->data.location);
-   assert(param_loc != m_param_map.end());
-
-   m_last_param_export = new ExportInstruction(param_loc->second, value, ExportInstruction::et_param);
+   m_last_param_export = new ExportInstruction(param_id(store_info.driver_location),
+                                               value, ExportInstruction::et_param);
    m_proc.emit_export_instruction(m_last_param_export);
-   m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
+   m_proc.add_param_output_reg(store_info.driver_location, m_last_param_export->gpr_ptr());
    return true;
 }
 
-bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
+bool VertexStageExportForFS::emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr)
 {
    m_proc.sh_info().cc_dist_mask = 0xff;
    m_proc.sh_info().clip_dist_write = 0xff;
 
-   m_clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3});
-   m_proc.add_param_output_reg(out_var->data.driver_location, &m_clip_vertex);
+   m_clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], 0xf, {0,1,2,3});
+   m_proc.add_param_output_reg(store_info.driver_location, &m_clip_vertex);
 
    for (int i = 0; i < 4; ++i)
-      m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
+      m_proc.sh_info().output[store_info.driver_location].write_mask |= 1 << i;
 
    GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
 
@@ -249,12 +286,68 @@ bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir
    return true;
 }
 
+VertexStageWithOutputInfo::VertexStageWithOutputInfo(VertexStage& proc):
+   VertexStageExportBase(proc),
+   m_current_param(0)
+{
+
+}
+
+void VertexStageWithOutputInfo::scan_store_output(nir_intrinsic_instr* instr)
+{
+   auto location = nir_intrinsic_io_semantics(instr).location;
+   auto driver_location = nir_intrinsic_base(instr);
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index);
+
+   r600_shader_io& io = m_proc.sh_info().output[driver_location + index->u32];
+   auto semantic = r600_get_varying_semantic(location + index->u32);
+   io.name = semantic.first;
+   io.sid = semantic.second;
+   m_proc.evaluate_spi_sid(io);
+   io.write_mask = nir_intrinsic_write_mask(instr);
+   ++m_proc.sh_info().noutput;
+
+   if (location == VARYING_SLOT_PSIZ ||
+       location == VARYING_SLOT_EDGE ||
+       location == VARYING_SLOT_LAYER) // VIEWPORT?
+      m_cur_clip_pos = 2;
+
+   if (location != VARYING_SLOT_POS &&
+       location != VARYING_SLOT_EDGE &&
+       location != VARYING_SLOT_PSIZ &&
+       location != VARYING_SLOT_CLIP_VERTEX) {
+      m_param_driver_locations.push(driver_location + index->u32);
+   }
+}
+
+unsigned VertexStageWithOutputInfo::param_id(unsigned driver_location)
+{
+   auto param_loc = m_param_map.find(driver_location);
+   assert(param_loc != m_param_map.end());
+   return param_loc->second;
+}
+
+void VertexStageWithOutputInfo::emit_shader_start()
+{
+   while (!m_param_driver_locations.empty()) {
+      auto loc = m_param_driver_locations.top();
+      m_param_driver_locations.pop();
+      m_param_map[loc] = m_current_param++;
+   }
+}
+
+unsigned VertexStageWithOutputInfo::current_param() const
+{
+   return m_current_param;
+}
+
 void VertexStageExportForFS::finalize_exports()
 {
    if (m_key.vs.as_gs_a) {
       PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
       GPRVector primid({m_proc.primitive_id(), o,o,o});
-      m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
+      m_last_param_export = new ExportInstruction(current_param(), primid, ExportInstruction::et_param);
       m_proc.emit_export_instruction(m_last_param_export);
       int i;
       i = m_proc.sh_info().noutput++;
@@ -375,20 +468,19 @@ bool VertexStageExportForFS::emit_stream(int stream)
 
 VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
                                                const r600_shader *gs_shader):
-   VertexStageExportBase(proc),
+   VertexStageWithOutputInfo(proc),
    m_num_clip_dist(0),
    m_gs_shader(gs_shader)
 {
 
 }
 
-bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
+bool VertexStageExportForGS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
 {
-
    int ring_offset = -1;
-   const r600_shader_io& out_io = m_proc.sh_info().output[out_var->data.driver_location];
+   const r600_shader_io& out_io = m_proc.sh_info().output[store_info.driver_location];
 
-   sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
+   sfn_log << SfnLog::io << "check output " << store_info.driver_location
            << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
    for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
       auto& in_io = m_gs_shader->input[k];
@@ -401,7 +493,7 @@ bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrin
       }
    }
 
-   if (out_var->data.location == VARYING_SLOT_VIEWPORT) {
+   if (store_info.location == VARYING_SLOT_VIEWPORT) {
       m_proc.sh_info().vs_out_viewport = 1;
       m_proc.sh_info().vs_out_misc_write = 1;
       return true;
@@ -409,23 +501,23 @@ bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrin
 
    if (ring_offset == -1) {
       sfn_log << SfnLog::err << "VS defines output at "
-              << out_var->data.driver_location << "name=" << out_io.name
+              << store_info.driver_location << "name=" << out_io.name
               << " sid=" << out_io.sid << " that is not consumed as GS input\n";
       return true;
    }
 
    uint32_t write_mask =  (1 << instr->num_components) - 1;
 
-   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
+   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask,
          swizzle_from_comps(instr->num_components), true);
 
    auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value,
                                       ring_offset >> 2, 4, PValue());
    m_proc.emit_export_instruction(ir);
 
-   m_proc.sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
-   if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
-       out_var->data.location == VARYING_SLOT_CLIP_DIST1)
+   m_proc.sh_info().output[store_info.driver_location].write_mask |= write_mask;
+   if (store_info.location == VARYING_SLOT_CLIP_DIST0 ||
+       store_info.location == VARYING_SLOT_CLIP_DIST1)
       m_num_clip_dist += 4;
 
    return true;
@@ -441,7 +533,7 @@ VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
 {
 }
 
-bool VertexStageExportForES::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
+bool VertexStageExportForES::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
 {
    return true;
 }
diff --git a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
index 3740a325f14..b75d7ccfdd0 100644
--- a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
+++ b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
@@ -2,6 +2,7 @@
 #define VERTEXSTAGEEXPORT_H
 
 #include "sfn_shader_base.h"
+#include <queue>
 
 namespace r600 {
 
@@ -17,19 +18,48 @@ class VertexStageExportBase
 public:
    VertexStageExportBase(VertexStage& proc);
    virtual ~VertexStageExportBase();
-   virtual bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0;
+   virtual bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr);
    virtual void finalize_exports() = 0;
    virtual bool do_process_outputs(nir_variable *output);
-   int cur_param() const {return m_cur_param;}
+
+   virtual void emit_shader_start();
+
+   virtual void scan_store_output(nir_intrinsic_instr* instr);
+   bool store_output(nir_intrinsic_instr* instr);
 protected:
+
+   struct store_loc {
+      unsigned frac;
+      unsigned location;
+      unsigned driver_location;
+      int data_loc;
+   };
+   virtual bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) = 0;
+
    VertexStage& m_proc;
-   std::map<unsigned, unsigned> m_param_map;
    int m_cur_clip_pos;
-   int m_cur_param;
    GPRVector m_clip_vertex;
 };
 
-class VertexStageExportForFS : public VertexStageExportBase
+
+class VertexStageWithOutputInfo: public VertexStageExportBase
+{
+protected:
+   VertexStageWithOutputInfo(VertexStage& proc);
+   void scan_store_output(nir_intrinsic_instr* instr) override;
+   void emit_shader_start() override;
+   bool do_process_outputs(nir_variable *output) override;
+protected:
+   unsigned param_id(unsigned driver_location);
+   unsigned current_param() const;
+private:
+   std::priority_queue<unsigned, std::vector<int>, std::greater<int> > m_param_driver_locations;
+   std::map<unsigned, unsigned> m_param_map;
+   unsigned m_current_param;
+};
+
+
+class VertexStageExportForFS : public VertexStageWithOutputInfo
 {
 public:
    VertexStageExportForFS(VertexStage& proc,
@@ -37,14 +67,14 @@ public:
                           r600_pipe_shader *pipe_shader,
                           const r600_shader_key& key);
 
-   bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
    void finalize_exports() override;
-
 private:
-   bool emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr);
-   bool emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
+   bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
+
+   bool emit_varying_param(const store_loc& store_info, nir_intrinsic_instr* instr);
+   bool emit_varying_pos(const store_loc& store_info, nir_intrinsic_instr* instr,
                          std::array<uint32_t, 4> *swizzle_override = nullptr);
-   bool emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr);
+   bool emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr);
    bool emit_stream(int stream);
 
    ExportInstruction *m_last_param_export;
@@ -56,17 +86,18 @@ private:
    r600_pipe_shader *m_pipe_shader;
    const r600_shader_key& m_key;
 
+
 };
 
-class VertexStageExportForGS : public VertexStageExportBase
+class VertexStageExportForGS : public VertexStageWithOutputInfo
 {
 public:
    VertexStageExportForGS(VertexStage& proc,
                           const r600_shader *gs_shader);
-   bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
    void finalize_exports() override;
 
 private:
+   bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
    unsigned m_num_clip_dist;
    const r600_shader *m_gs_shader;
 };
@@ -75,8 +106,9 @@ class VertexStageExportForES : public VertexStageExportBase
 {
 public:
    VertexStageExportForES(VertexStage& proc);
-   bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
    void finalize_exports() override;
+private:
+   bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
 };
 
 



More information about the mesa-commit mailing list