[Mesa-dev] [RFC PATCH] i965/gs: add snb support

Ilia Mirkin imirkin at alum.mit.edu
Mon Feb 24 00:36:25 PST 2014


Before you read any further, this is nowhere close to working. However it's in
a state where I think most of the structure is there, albeit with a lot of XXX
comments. And I haven't actually implemented the new opcodes I've added.

I was hoping one or two Intel people could take a look at this and let me know
of any pitfalls I'm likely to run into. I've already gotten a lot of help and
advice from Ken, but wanted to put something out publicly.

Any and all feedback much appreciated!

Not-Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>

---
 src/mesa/drivers/dri/i965/Makefile.sources        |   1 +
 src/mesa/drivers/dri/i965/brw_defines.h           |  13 +
 src/mesa/drivers/dri/i965/brw_vec4.h              |   1 +
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  36 ++-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |   5 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp    |  14 ++
 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp     | 285 ++++++++++++++++++++++
 src/mesa/drivers/dri/i965/gen6_gs_visitor.h       |  63 +++++
 8 files changed, 405 insertions(+), 13 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 create mode 100644 src/mesa/drivers/dri/i965/gen6_gs_visitor.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index dc30eb3..96b637f 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -119,6 +119,7 @@ i965_FILES = \
 	gen6_clip_state.c \
 	gen6_depthstencil.c \
 	gen6_gs_state.c \
+	gen6_gs_visitor.cpp \
         gen6_multisample_state.c \
 	gen6_queryobj.c \
 	gen6_sampler_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 01d3cb6..a24919c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -909,6 +909,19 @@ enum opcode {
     * - dst is the GRF for gl_InvocationID.
     */
    GS_OPCODE_GET_INSTANCE_ID,
+
+   /**
+    * Sets DWORD 2 of dst to the value in src DWORD 0. Used by geometry
+    * shaders to initialize DWORD 2 of the message header, which contains
+    * primitive start/end flags.
+    */
+   GS_OPCODE_SET_DWORD_2,
+
+   /**
+    * Emits a FF_SYNC, which on Gen6 returns a VUE handle, which is needed to
+    * emit state in GS.
+    */
+   GS_OPCODE_FF_SYNC,
 };
 
 enum brw_urb_write_flags {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 6bd8b80..14d67b7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -130,6 +130,7 @@ public:
    bool is_one() const;
 
    src_reg(class vec4_visitor *v, const struct glsl_type *type);
+   src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
 
    explicit src_reg(dst_reg reg);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 0a2d8ff..ede9002 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -28,6 +28,7 @@
  */
 
 #include "brw_vec4_gs_visitor.h"
+#include "gen6_gs_visitor.h"
 
 const unsigned MAX_GS_INPUT_VERTICES = 6;
 
@@ -587,6 +588,8 @@ brw_gs_emit(struct brw_context *brw,
 {
    struct brw_shader *shader =
       (brw_shader *) prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
+   vec4_gs_visitor *gs;
+   const unsigned *ret = NULL;
 
    if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
       printf("GLSL IR for native geometry shader %d:\n", prog->Name);
@@ -602,12 +605,17 @@ brw_gs_emit(struct brw_context *brw,
        likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
       c->prog_data.dual_instanced_dispatch = false;
 
-      vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */);
-      if (v.run()) {
-         return generate_assembly(brw, prog, &c->gp->program.Base,
-                                  &c->prog_data.base, mem_ctx, &v.instructions,
-                                  final_assembly_size);
+      if (brw->gen >= 7)
+         gs = new vec4_gs_visitor(brw, c, prog, shader, mem_ctx, true /* no_spills */);
+      else
+         gs = new gen6_gs_visitor(brw, c, prog, shader, mem_ctx, true /* no_spills */);
+      if (gs->run()) {
+         ret = generate_assembly(brw, prog, &c->gp->program.Base,
+                                 &c->prog_data.base, mem_ctx, &gs->instructions,
+                                 final_assembly_size);
+         goto done;
       }
+      delete gs;
    }
 
    /* Either we failed to compile in DUAL_OBJECT mode (probably because it
@@ -622,15 +630,21 @@ brw_gs_emit(struct brw_context *brw,
     */
    c->prog_data.dual_instanced_dispatch = true;
 
-   vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, false /* no_spills */);
-   if (!v.run()) {
+   if (brw->gen >= 7)
+      gs = new vec4_gs_visitor(brw, c, prog, shader, mem_ctx, false /* no_spills */);
+   else
+      gs = new gen6_gs_visitor(brw, c, prog, shader, mem_ctx, false /* no_spills */);
+   if (!gs->run()) {
       prog->LinkStatus = false;
-      ralloc_strcat(&prog->InfoLog, v.fail_msg);
-      return NULL;
+      ralloc_strcat(&prog->InfoLog, gs->fail_msg);
+      goto done;
    }
 
-   return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base,
-                            mem_ctx, &v.instructions, final_assembly_size);
+   ret = generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base,
+                           mem_ctx, &gs->instructions, final_assembly_size);
+done:
+   delete gs;
+   return ret;
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index 68756f7..7a4a262 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -96,14 +96,15 @@ protected:
    virtual void visit(ir_emit_vertex *);
    virtual void visit(ir_end_primitive *);
 
+   src_reg vertex_count;
+   const struct brw_gs_compile * const c;
+
 private:
    int setup_varying_inputs(int payload_reg, int *attribute_map,
                             int attributes_per_reg);
    void emit_control_data_bits();
 
-   src_reg vertex_count;
    src_reg control_data_bits;
-   const struct brw_gs_compile * const c;
 };
 
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 601b364..0ebf118 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -616,6 +616,20 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
    this->type = brw_type_for_base_type(type);
 }
 
+src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
+{
+   assert(size > 0);
+
+   init();
+
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(type_size(type) * size);
+
+   this->swizzle = BRW_SWIZZLE_NOOP;
+
+   this->type = brw_type_for_base_type(type);
+}
+
 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 {
    init();
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
new file mode 100644
index 0000000..7be04d0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -0,0 +1,285 @@
+/*
+ * Copyright © 2014 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file gen6_gs_visitor.cpp
+ *
+ * Gen6 geometry-shader-specific code, derived from the Gen7+ vec4_gs_visitor.
+ */
+
+#include "gen6_gs_visitor.h"
+
+namespace brw {
+
+void
+gen6_gs_visitor::emit_prolog()
+{
+   vec4_gs_visitor::emit_prolog();
+
+   /* vertex_output layout:
+    *
+    * This is an array that contains all the data emitted during the runtime
+    * of this GP. For each emitted vertex, there are vue_map.num_slots data
+    * items, and an extra register used to store flags. This register comes
+    * after the first num_slots. To match the URB_WRITE message header, bit 0
+    * is PrimEnd, and bit 1 is PrimStart. The next vertex then continues
+    * afterwards.
+    */
+   this->vertex_output = src_reg(this, glsl_type::uint_type,
+                                 (prog_data->vue_map.num_slots + 1) *
+                                 c->gp->program.VerticesOut);
+   this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
+   emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));
+
+   this->first_vertex = src_reg(this, glsl_type::uint_type);
+   emit(MOV(dst_reg(this->first_vertex), src_reg(2u)));
+}
+
+void
+gen6_gs_visitor::visit(ir_emit_vertex *)
+{
+   /* copy all of the outputs into the temporary area */
+
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+   {
+      for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
+         dst_reg dst(this->vertex_output);
+         dst.reladdr = ralloc(mem_ctx, src_reg);
+         memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+         /* xxx generalize emit_urb_slot and use that, otherwise gl_Position &
+          * co don't work */
+         emit_generic_urb_slot(dst, prog_data->vue_map.slot_to_varying[slot]);
+         emit(ADD(dst_reg(this->vertex_output_offset),
+                  this->vertex_output_offset, src_reg(1u)));
+      }
+
+      /* XXX write the flags in a format that will make implementation of the
+       * "write flags" opcode easy.
+       */
+      dst_reg dst(this->vertex_output);
+      dst.reladdr = ralloc(mem_ctx, src_reg);
+      memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+      if (c->gp->program.OutputType == GL_POINTS) {
+         /* Each point starts and ends the vertex */
+         emit(MOV(dst, src_reg(3u)));
+      } else {
+         /* Set the flags to first_vertex, which will be set to 2 when it's
+          * the first, or 0 if it's not the first. Zero out first_vertex so
+          * that future runs will work correctly as well.
+          */
+         emit(MOV(dst, this->first_vertex));
+         emit(MOV(dst_reg(this->first_vertex), src_reg(0u)));
+      }
+
+      emit(ADD(dst_reg(this->vertex_output_offset),
+               this->vertex_output_offset, src_reg(1u)));
+
+      emit(ADD(dst_reg(this->vertex_count),
+               this->vertex_count, src_reg(1u)));
+   }
+   emit(BRW_OPCODE_ENDIF);
+}
+
+void
+gen6_gs_visitor::visit(ir_end_primitive *)
+{
+   /* This has no effect for GL_POINTS */
+   if (c->gp->program.OutputType == GL_POINTS)
+      return;
+
+   /* mark the current vertex as ending the primitive */
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+   {
+      /* vertex_output_offset is already pointing at the first entry of the
+       * next vertex. So subtract 1 to modify the flags for the previous
+       * vertex.
+       */
+      src_reg offset(this, glsl_type::uint_type);
+      emit(MOV(dst_reg(offset), this->vertex_output_offset));
+      emit(ADD(dst_reg(offset), offset, src_reg(~0u)));
+
+      src_reg dst(this->vertex_output);
+      dst.reladdr = ralloc(mem_ctx, src_reg);
+      memcpy(dst.reladdr, &offset, sizeof(src_reg));
+
+      /* Set the 0 bit to indicate that the primitive is ending */
+      emit(OR(dst_reg(dst), dst, src_reg(1u)));
+
+      /* Set the first vertex flag to indicate that the next vertex will start
+       * a primitive
+       */
+      emit(MOV(dst_reg(this->first_vertex), src_reg(2u)));
+   }
+   emit(BRW_OPCODE_ENDIF);
+}
+
+void
+gen6_gs_visitor::emit_thread_end()
+{
+   /* Now we take all of the vertex data previously written into
+    * vertex_output, and emit it for real. This involves looping through all
+    * the vertices, loading the relevant data back into the MRFs, and emitting
+    * write opcodes.
+    *
+    * Note that this has to be done before move_grf_array_access_to_scratch
+    * runs and does its magic, otherwise we have to manually deal with scratch
+    * space.
+    */
+
+   src_reg i(this, glsl_type::uint_type);
+   emit(MOV(dst_reg(i), src_reg(0u)));
+
+   src_reg offset(this, glsl_type::uint_type);
+   emit(MOV(dst_reg(offset), src_reg(0u)));
+
+   src_reg vue_handle(this, glsl_type::uint_type);
+   src_reg prim_flags(this, glsl_type::uint_type);
+
+   /* XXX check if FF_SYNC really needs the # of primitives generated, if it
+    * does, figure out exactly what it wants a counter of, and count it.
+    */
+   /* XXX look into SO and whether it interacts with this GS logic in any way.
+    */
+   emit(GS_OPCODE_FF_SYNC, dst_reg(vue_handle));
+
+   /* XXX double-check loop logic -- can this be done using a regular
+    * condition instead of a nested if inside an infinite do/while?
+    */
+   emit(BRW_OPCODE_DO);
+   {
+      emit(CMP(dst_null_d(), this->vertex_count, i, BRW_CONDITIONAL_L));
+      emit(IF(BRW_PREDICATE_NORMAL));
+      {
+         emit(BRW_OPCODE_BREAK);
+      }
+      emit(BRW_OPCODE_ENDIF);
+
+      /* Writes out a single vertex's worth data. This is basically the same
+       * as vec4_visitor::emit_vertex, except that it's using vertex_output as
+       * its source rather than the varying slots
+       */
+
+      int base_mrf = 1;
+      int mrf = base_mrf;
+      /* MRF 14/15 are used for spill handling */
+      int max_usable_mrf = 13;
+
+      dst_reg base_mrf_reg = dst_reg(MRF, base_mrf);
+      base_mrf_reg.type = BRW_REGISTER_TYPE_UD;
+
+      emit_urb_write_header(mrf++);
+
+      /* XXX double-check that these really are the right primitive ids */
+      unsigned prim_type;
+      switch (c->gp->program.OutputType) {
+      case GL_POINTS:
+         prim_type = _3DPRIM_POINTLIST << 2;
+         break;
+      case GL_LINE_STRIP:
+         prim_type = _3DPRIM_LINESTRIP << 2;
+         break;
+      case GL_TRIANGLE_STRIP:
+         prim_type = _3DPRIM_TRISTRIP << 2;
+         break;
+      default:
+         assert(!"Unexpected output type");
+         prim_type = 0;
+         break;
+      }
+
+      /* Location of the primitive flags for the current vertex */
+      src_reg flag_offset(this, glsl_type::uint_type);
+      emit(ADD(dst_reg(flag_offset),
+               offset, src_reg(prog_data->vue_map.num_slots)));
+
+      /* Pointer to the primitive flag data for the current vertex */
+      src_reg prim_data(this->vertex_output);
+      prim_data.reladdr = ralloc(mem_ctx, src_reg);
+      memcpy(&prim_data.reladdr, &flag_offset, sizeof(src_reg));
+
+      /* Combine the current vertex prim start/end flags with the primitive
+       * type
+       */
+      emit(OR(dst_reg(prim_flags), prim_data, src_reg(prim_type)));
+
+      emit(GS_OPCODE_SET_DWORD_2, base_mrf_reg, prim_flags);
+
+      src_reg data(this->vertex_output);
+      data.reladdr = ralloc(mem_ctx, src_reg);
+      memcpy(data.reladdr, &offset, sizeof(src_reg));
+
+      int slot = 0;
+      bool complete = false;
+      do {
+         /* URB offset is in URB row increments, and each of our MRFs is half of
+          * one of those, since we're doing interleaved writes.
+          */
+         int urb_offset = slot / 2;
+
+         mrf = base_mrf + 1;
+         for (; slot < prog_data->vue_map.num_slots; ++slot) {
+            dst_reg reg = dst_reg(MRF, mrf++);
+            reg.type = BRW_REGISTER_TYPE_F;
+
+            emit(MOV(reg, data));
+
+            /* XXX double-check that adding to offset will also alter the
+             * reladdr above, or if we need to create a fresh offset for each
+             * slot. */
+            emit(ADD(dst_reg(offset), offset, src_reg(1u)));
+
+            /* If this was max_usable_mrf, we can't fit anything more into this
+             * URB WRITE.
+             */
+            if (mrf > max_usable_mrf) {
+               slot++;
+               break;
+            }
+         }
+
+         complete = slot >= prog_data->vue_map.num_slots;
+         current_annotation = "URB write";
+         vec4_instruction *inst = emit_urb_write_opcode(complete);
+         inst->base_mrf = base_mrf;
+         inst->mlen = mrf - base_mrf;
+         if ((inst->mlen % 2) != 1)
+            inst->mlen++;
+         inst->offset += urb_offset;
+      } while (!complete);
+
+      emit(ADD(dst_reg(i), i, src_reg(1u)));
+   }
+   emit(BRW_OPCODE_WHILE);
+
+   /* XXX more thread ending logic here, perhaps similar to what
+    * vec4_gs_visitor::emit_thread_end does. Need to check the docs.
+    */
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
new file mode 100644
index 0000000..b519438
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2014 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file gen6_gs_visitor.h
+ *
+ * Gen6 geometry-shader-specific code, derived from the Gen7+ vec4_gs_visitor.
+ */
+
+#ifndef GEN6_GS_VISITOR_H
+#define GEN6_GS_VISITOR_H
+
+#include "brw_vec4.h"
+#include "brw_vec4_gs_visitor.h"
+
+#ifdef __cplusplus
+namespace brw {
+
+class gen6_gs_visitor : public vec4_gs_visitor {
+  public:
+   gen6_gs_visitor(struct brw_context *brw,
+                   struct brw_gs_compile *c,
+                   struct gl_shader_program *prog,
+                   struct brw_shader *shader,
+                   void *mem_ctx,
+                   bool no_spills) :
+         vec4_gs_visitor(brw, c, prog, shader, mem_ctx, no_spills) {}
+  protected:
+   virtual void emit_prolog();
+   virtual void emit_thread_end();
+   virtual void visit(ir_emit_vertex *);
+   virtual void visit(ir_end_primitive *);
+
+  private:
+   src_reg vertex_output;
+   src_reg vertex_output_offset;
+   src_reg first_vertex;
+};
+
+} /* namespace brw */
+#endif /* __cplusplus */
+
+#endif /* GEN6_VS_VISITOR_H */
-- 
1.8.3.2



More information about the mesa-dev mailing list