Mesa (master): ilo: rework gen7_3DSTATE_SO_DECL_LIST()

Chia-I Wu olv at kemper.freedesktop.org
Tue Nov 11 08:04:46 UTC 2014


Module: Mesa
Branch: master
Commit: eab595d573ff7719a1960bdf819eda75a1aa4c26
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=eab595d573ff7719a1960bdf819eda75a1aa4c26

Author: Chia-I Wu <olvaffe at gmail.com>
Date:   Tue Nov 11 11:53:57 2014 +0800

ilo: rework gen7_3DSTATE_SO_DECL_LIST()

Started to make pipe_stream_output_info mandatory, but ended up adding support
for stream id and making a workaround Gen7-specific.

Signed-off-by: Chia-I Wu <olvaffe at gmail.com>

---

 src/gallium/drivers/ilo/ilo_builder_3d_top.h |  126 ++++++++++++++------------
 1 file changed, 66 insertions(+), 60 deletions(-)

diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/ilo_builder_3d_top.h
index 5f758a1..ce2debe 100644
--- a/src/gallium/drivers/ilo/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/ilo_builder_3d_top.h
@@ -867,90 +867,96 @@ static inline void
 gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
                           const struct pipe_stream_output_info *so_info)
 {
+   /*
+    * Note that "DWord Length" has 9 bits for this command and the type of
+    * cmd_len cannot be uint8_t.
+    */
    uint16_t cmd_len;
-   int buffer_selects, num_entries, i;
-   uint16_t so_decls[128];
+   struct {
+      int buf_selects;
+      int decl_count;
+      uint16_t decls[128];
+   } streams[4];
+   unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
+   int hw_decl_count, i;
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   buffer_selects = 0;
-   num_entries = 0;
+   memset(streams, 0, sizeof(streams));
+   memset(buf_offsets, 0, sizeof(buf_offsets));
 
-   if (so_info) {
-      int buffer_offsets[PIPE_MAX_SO_BUFFERS];
+   for (i = 0; i < so_info->num_outputs; i++) {
+      unsigned decl, st, buf, reg, mask;
 
-      memset(buffer_offsets, 0, sizeof(buffer_offsets));
+      st = so_info->output[i].stream;
+      buf = so_info->output[i].output_buffer;
 
-      for (i = 0; i < so_info->num_outputs; i++) {
-         unsigned decl, buf, reg, mask;
+      /* pad with holes */
+      while (buf_offsets[buf] < so_info->output[i].dst_offset) {
+         int num_dwords;
 
-         buf = so_info->output[i].output_buffer;
+         num_dwords = so_info->output[i].dst_offset - buf_offsets[buf];
+         if (num_dwords > 4)
+            num_dwords = 4;
 
-         /* pad with holes */
-         assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
-         while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
-            int num_dwords;
+         decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
+                GEN7_SO_DECL_HOLE_FLAG |
+                ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
 
-            num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
-            if (num_dwords > 4)
-               num_dwords = 4;
+         assert(streams[st].decl_count < Elements(streams[st].decls));
+         streams[st].decls[streams[st].decl_count++] = decl;
+         buf_offsets[buf] += num_dwords;
+      }
+      assert(buf_offsets[buf] == so_info->output[i].dst_offset);
 
-            decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
-                   GEN7_SO_DECL_HOLE_FLAG |
-                   ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
+      reg = so_info->output[i].register_index;
+      mask = ((1 << so_info->output[i].num_components) - 1) <<
+         so_info->output[i].start_component;
 
-            so_decls[num_entries++] = decl;
-            buffer_offsets[buf] += num_dwords;
-         }
+      decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
+             reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
+             mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
 
-         reg = so_info->output[i].register_index;
-         mask = ((1 << so_info->output[i].num_components) - 1) <<
-            so_info->output[i].start_component;
+      assert(streams[st].decl_count < Elements(streams[st].decls));
 
-         decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
-                reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
-                mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
+      streams[st].buf_selects |= 1 << buf;
+      streams[st].decls[streams[st].decl_count++] = decl;
+      buf_offsets[buf] += so_info->output[i].num_components;
+   }
 
-         so_decls[num_entries++] = decl;
-         buffer_selects |= 1 << buf;
-         buffer_offsets[buf] += so_info->output[i].num_components;
-      }
+   if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) {
+      hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count,
+                           streams[2].decl_count, streams[3].decl_count);
+   } else {
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 201:
+       *
+       *     "Errata: All 128 decls for all four streams must be included
+       *      whenever this command is issued. The "Num Entries [n]" fields
+       *      still contain the actual numbers of valid decls."
+       */
+      hw_decl_count = 128;
    }
 
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 201:
-    *
-    *     "Errata: All 128 decls for all four streams must be included
-    *      whenever this command is issued. The "Num Entries [n]" fields still
-    *      contain the actual numbers of valid decls."
-    *
-    * Also note that "DWord Length" has 9 bits for this command, and the type
-    * of cmd_len is thus uint16_t.
-    */
-   cmd_len = 2 * 128 + 3;
+   cmd_len = 3 + 2 * hw_decl_count;
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
-   dw[1] = 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
-           0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
-           0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
-           buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
-   dw[2] = 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
-           0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
-           0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
-           num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
+   dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
+           streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
+           streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
+           streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
+   dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
+           streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
+           streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
+           streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
    dw += 3;
 
-   for (i = 0; i < num_entries; i++) {
-      dw[0] = so_decls[i];
-      dw[1] = 0;
-      dw += 2;
-   }
-   for (; i < 128; i++) {
-      dw[0] = 0;
-      dw[1] = 0;
+   for (i = 0; i < hw_decl_count; i++) {
+      dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i];
+      dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i];
       dw += 2;
    }
 }




More information about the mesa-commit mailing list