[Mesa-dev] [PATCH v2 06/25] st/glsl_to_tgsi: plumb the GS output stream qualifier through to TGSI

Nicolai Hähnle nhaehnle at gmail.com
Tue Dec 6 10:48:17 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Allow drivers to emit GS outputs in a smarter way.
---
 src/gallium/auxiliary/tgsi/tgsi_ureg.c     | 21 ++++++++++++++++++++-
 src/gallium/auxiliary/tgsi/tgsi_ureg.h     |  1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 ++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 7bcd242..196a893 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -120,20 +120,21 @@ struct ureg_program
 
    struct {
       unsigned semantic_name;
       unsigned semantic_index;
    } system_value[UREG_MAX_SYSTEM_VALUE];
    unsigned nr_system_values;
 
    struct {
       unsigned semantic_name;
       unsigned semantic_index;
+      unsigned streams;
       unsigned usage_mask; /* = TGSI_WRITEMASK_* */
       unsigned first;
       unsigned last;
       unsigned array_id;
    } output[UREG_MAX_OUTPUT];
    unsigned nr_outputs, nr_output_regs;
 
    struct {
       union {
          float f[4];
@@ -402,28 +403,33 @@ ureg_DECL_system_value(struct ureg_program *ureg,
 
 out:
    return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i);
 }
 
 
 struct ureg_dst
 ureg_DECL_output_layout(struct ureg_program *ureg,
                         unsigned semantic_name,
                         unsigned semantic_index,
+                        unsigned streams,
                         unsigned index,
                         unsigned usage_mask,
                         unsigned array_id,
                         unsigned array_size)
 {
    unsigned i;
 
    assert(usage_mask != 0);
+   assert(!(streams & 0x03) || (usage_mask & 1));
+   assert(!(streams & 0x0c) || (usage_mask & 2));
+   assert(!(streams & 0x30) || (usage_mask & 4));
+   assert(!(streams & 0xc0) || (usage_mask & 8));
 
    for (i = 0; i < ureg->nr_outputs; i++) {
       if (ureg->output[i].semantic_name == semantic_name &&
           ureg->output[i].semantic_index == semantic_index) {
          if (ureg->output[i].array_id == array_id) {
             ureg->output[i].usage_mask |= usage_mask;
             goto out;
          }
          assert((ureg->output[i].usage_mask & usage_mask) == 0);
       }
@@ -434,37 +440,40 @@ ureg_DECL_output_layout(struct ureg_program *ureg,
       ureg->output[i].semantic_index = semantic_index;
       ureg->output[i].usage_mask = usage_mask;
       ureg->output[i].first = index;
       ureg->output[i].last = index + array_size - 1;
       ureg->output[i].array_id = array_id;
       ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size);
       ureg->nr_outputs++;
    }
    else {
       set_bad( ureg );
+      i = 0;
    }
 
 out:
+   ureg->output[i].streams |= streams;
+
    return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first,
                                   array_id);
 }
 
 
 struct ureg_dst
 ureg_DECL_output_masked(struct ureg_program *ureg,
                         unsigned name,
                         unsigned index,
                         unsigned usage_mask,
                         unsigned array_id,
                         unsigned array_size)
 {
-   return ureg_DECL_output_layout(ureg, name, index,
+   return ureg_DECL_output_layout(ureg, name, index, 0,
                                   ureg->nr_output_regs, usage_mask, array_id, array_size);
 }
 
 
 struct ureg_dst 
 ureg_DECL_output(struct ureg_program *ureg,
                  unsigned name,
                  unsigned index)
 {
    return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW,
@@ -1547,40 +1556,45 @@ ureg_memory_insn(struct ureg_program *ureg,
 }
 
 
 static void
 emit_decl_semantic(struct ureg_program *ureg,
                    unsigned file,
                    unsigned first,
                    unsigned last,
                    unsigned semantic_name,
                    unsigned semantic_index,
+                   unsigned streams,
                    unsigned usage_mask,
                    unsigned array_id)
 {
    union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
 
    out[0].value = 0;
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
    out[0].decl.NrTokens = 3;
    out[0].decl.File = file;
    out[0].decl.UsageMask = usage_mask;
    out[0].decl.Semantic = 1;
    out[0].decl.Array = array_id != 0;
 
    out[1].value = 0;
    out[1].decl_range.First = first;
    out[1].decl_range.Last = last;
 
    out[2].value = 0;
    out[2].decl_semantic.Name = semantic_name;
    out[2].decl_semantic.Index = semantic_index;
+   out[2].decl_semantic.StreamX = streams & 3;
+   out[2].decl_semantic.StreamY = (streams >> 2) & 3;
+   out[2].decl_semantic.StreamZ = (streams >> 4) & 3;
+   out[2].decl_semantic.StreamW = (streams >> 6) & 3;
 
    if (array_id) {
       out[3].value = 0;
       out[3].array.ArrayID = array_id;
    }
 }
 
 
 static void
 emit_decl_fs(struct ureg_program *ureg,
@@ -1871,70 +1885,75 @@ static void emit_decls( struct ureg_program *ureg )
       }
    } else {
       if (ureg->supports_any_inout_decl_range) {
          for (i = 0; i < ureg->nr_inputs; i++) {
             emit_decl_semantic(ureg,
                                TGSI_FILE_INPUT,
                                ureg->input[i].first,
                                ureg->input[i].last,
                                ureg->input[i].semantic_name,
                                ureg->input[i].semantic_index,
+                               0,
                                TGSI_WRITEMASK_XYZW,
                                ureg->input[i].array_id);
          }
       }
       else {
          for (i = 0; i < ureg->nr_inputs; i++) {
             for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
                emit_decl_semantic(ureg,
                                   TGSI_FILE_INPUT,
                                   j, j,
                                   ureg->input[i].semantic_name,
                                   ureg->input[i].semantic_index +
                                   (j - ureg->input[i].first),
+                                  0,
                                   TGSI_WRITEMASK_XYZW, 0);
             }
          }
       }
    }
 
    for (i = 0; i < ureg->nr_system_values; i++) {
       emit_decl_semantic(ureg,
                          TGSI_FILE_SYSTEM_VALUE,
                          i,
                          i,
                          ureg->system_value[i].semantic_name,
                          ureg->system_value[i].semantic_index,
+                         0,
                          TGSI_WRITEMASK_XYZW, 0);
    }
 
    if (ureg->supports_any_inout_decl_range) {
       for (i = 0; i < ureg->nr_outputs; i++) {
          emit_decl_semantic(ureg,
                             TGSI_FILE_OUTPUT,
                             ureg->output[i].first,
                             ureg->output[i].last,
                             ureg->output[i].semantic_name,
                             ureg->output[i].semantic_index,
+                            ureg->output[i].streams,
                             ureg->output[i].usage_mask,
                             ureg->output[i].array_id);
       }
    }
    else {
       for (i = 0; i < ureg->nr_outputs; i++) {
          for (j = ureg->output[i].first; j <= ureg->output[i].last; j++) {
             emit_decl_semantic(ureg,
                                TGSI_FILE_OUTPUT,
                                j, j,
                                ureg->output[i].semantic_name,
                                ureg->output[i].semantic_index +
                                (j - ureg->output[i].first),
+                               ureg->output[i].streams,
                                ureg->output[i].usage_mask, 0);
          }
       }
    }
 
    for (i = 0; i < ureg->nr_samplers; i++) {
       emit_decl_range( ureg, 
                        TGSI_FILE_SAMPLER,
                        ureg->sampler[i].Index, 1 );
    }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index d3c28b3..51f6985 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -244,20 +244,21 @@ ureg_DECL_input(struct ureg_program *,
 
 struct ureg_src
 ureg_DECL_system_value(struct ureg_program *,
                        unsigned semantic_name,
                        unsigned semantic_index);
 
 struct ureg_dst
 ureg_DECL_output_layout(struct ureg_program *,
                         unsigned semantic_name,
                         unsigned semantic_index,
+                        unsigned streams,
                         unsigned index,
                         unsigned usage_mask,
                         unsigned array_id,
                         unsigned array_size);
 
 struct ureg_dst
 ureg_DECL_output_masked(struct ureg_program *,
                         unsigned semantic_name,
                         unsigned semantic_index,
                         unsigned usage_mask,
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 7720edf..8707d28 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -332,20 +332,21 @@ public:
 };
 
 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 
 struct inout_decl {
    unsigned mesa_index;
    unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */
    unsigned size;
    unsigned interp_loc;
+   unsigned gs_out_streams;
    enum glsl_interp_mode interp;
    enum glsl_base_type base_type;
    ubyte usage_mask; /* GLSL-style usage-mask,  i.e. single bit per double */
 };
 
 static struct inout_decl *
 find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id)
 {
    assert(array_id != 0);
 
@@ -2471,20 +2472,28 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
          if (type_without_array->is_64bit())
             component = component / 2;
          if (type_without_array->vector_elements)
             num_components = type_without_array->vector_elements;
          else
             num_components = 4;
 
          decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index;
          decl->base_type = type_without_array->base_type;
          decl->usage_mask = u_bit_consecutive(component, num_components);
+         if (var->data.stream & (1u << 31)) {
+            decl->gs_out_streams = var->data.stream & ~(1u << 31);
+         } else {
+            assert(var->data.stream < 4);
+            decl->gs_out_streams = 0;
+            for (unsigned i = 0; i < num_components; ++i)
+               decl->gs_out_streams |= var->data.stream << (2 * (component + i));
+         }
 
          if (is_inout_array(shader->Stage, var, &remove_array)) {
             decl->array_id = num_output_arrays + 1;
             num_output_arrays++;
          } else {
             decl->array_id = 0;
          }
 
          if (remove_array)
             decl->size = type_size(var->type->fields.array);
@@ -6084,20 +6093,21 @@ st_translate_program(
             if (tgsi_usage_mask == 1)
                tgsi_usage_mask = TGSI_WRITEMASK_XY;
             else if (tgsi_usage_mask == 2)
                tgsi_usage_mask = TGSI_WRITEMASK_ZW;
             else
                tgsi_usage_mask = TGSI_WRITEMASK_XYZW;
          }
 
          dst = ureg_DECL_output_layout(ureg,
                      outputSemanticName[slot], outputSemanticIndex[slot],
+                     decl->gs_out_streams,
                      slot, tgsi_usage_mask, decl->array_id, decl->size);
 
          for (unsigned j = 0; j < decl->size; ++j) {
             if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) {
                /* The ArrayID is set up in dst_register */
                t->outputs[slot + j] = dst;
                t->outputs[slot + j].ArrayID = 0;
                t->outputs[slot + j].Index += j;
             }
          }
-- 
2.7.4



More information about the mesa-dev mailing list