[Mesa-dev] [PATCH 04/10] i965: Port tessellation evaluation shaders to vec4 mode.

Kenneth Graunke kenneth at whitecape.org
Thu Dec 24 17:34:22 PST 2015


This can be used on Broadwell by setting INTEL_SCALAR_TES=0.
More importantly, it will be used for Ivybridge and Haswell.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/Makefile.sources         |   1 +
 src/mesa/drivers/dri/i965/brw_defines.h            |   4 +
 src/mesa/drivers/dri/i965/brw_shader.cpp           |  25 ++-
 src/mesa/drivers/dri/i965/brw_vec4.cpp             |   1 +
 .../dri/i965/brw_vec4_dead_code_eliminate.cpp      |   2 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp   |  61 ++++++
 src/mesa/drivers/dri/i965/brw_vec4_tes.cpp         | 204 +++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4_tes.h           |  69 +++++++
 8 files changed, 365 insertions(+), 2 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_tes.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 0b706de..05c49ee 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -76,6 +76,7 @@ i965_compiler_FILES = \
 	brw_vec4_surface_builder.cpp \
 	brw_vec4_surface_builder.h \
 	brw_vec4_tcs.cpp \
+	brw_vec4_tes.cpp \
 	brw_vec4_visitor.cpp \
 	brw_vec4_vs_visitor.cpp \
 	brw_vue_map.c \
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index cc19c06..61bcebd 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1313,6 +1313,10 @@ enum opcode {
    TCS_OPCODE_SET_OUTPUT_URB_OFFSETS,
    TCS_OPCODE_GET_PRIMITIVE_ID,
    TCS_OPCODE_CREATE_BARRIER_HEADER,
+
+   TES_OPCODE_GET_PRIMITIVE_ID,
+   TES_OPCODE_CREATE_INPUT_READ_HEADER,
+   TES_OPCODE_ADD_INDIRECT_URB_OFFSET,
 };
 
 enum brw_urb_write_flags {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 5140cfb..3a36678 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -26,6 +26,7 @@
 #include "brw_eu.h"
 #include "brw_fs.h"
 #include "brw_nir.h"
+#include "brw_vec4_tes.h"
 #include "glsl/glsl_parser_extras.h"
 #include "main/shaderobj.h"
 #include "main/uniforms.h"
@@ -86,7 +87,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
    compiler->scalar_stage[MESA_SHADER_VERTEX] =
       devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
    compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
-   compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true;
+   compiler->scalar_stage[MESA_SHADER_TESS_EVAL] =
+      devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true);
    compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
       devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
    compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
@@ -566,6 +568,12 @@ brw_instruction_name(enum opcode op)
       return "tcs_get_primitive_id";
    case TCS_OPCODE_CREATE_BARRIER_HEADER:
       return "tcs_create_barrier_header";
+   case TES_OPCODE_CREATE_INPUT_READ_HEADER:
+      return "tes_create_input_read_header";
+   case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
+      return "tes_add_indirect_urb_offset";
+   case TES_OPCODE_GET_PRIMITIVE_ID:
+      return "tes_get_primitive_id";
    }
 
    unreachable("not reached");
@@ -1400,6 +1408,19 @@ brw_compile_tes(const struct brw_compiler *compiler,
 
       return g.get_assembly(final_assembly_size);
    } else {
-      unreachable("XXX: vec4 tessellation evalation shaders not merged yet.");
+      brw::vec4_tes_visitor v(compiler, log_data, key, prog_data,
+			      nir, mem_ctx, shader_time_index);
+      if (!v.run()) {
+	 if (error_str)
+	    *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
+	 return NULL;
+      }
+
+      if (unlikely(INTEL_DEBUG & DEBUG_TES))
+	 v.dump_instructions();
+
+      return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
+					&prog_data->base, v.cfg,
+					final_assembly_size);
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 0cded0c..116dd35 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -189,6 +189,7 @@ vec4_instruction::has_source_and_destination_hazard() const
    switch (opcode) {
    case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
    case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+   case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
       return true;
    default:
       return false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index c31e72d..166bc17 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -47,6 +47,8 @@ can_do_writemask(const struct brw_device_info *devinfo,
    case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
    case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
    case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+   case TES_OPCODE_CREATE_INPUT_READ_HEADER:
+   case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
    case VEC4_OPCODE_URB_READ:
       return false;
    default:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 6325569..2541c25 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -865,6 +865,46 @@ generate_tcs_output_urb_offsets(struct brw_codegen *p,
 }
 
 static void
+generate_tes_create_input_read_header(struct brw_codegen *p,
+                                      struct brw_reg dst)
+{
+   brw_push_insn_state(p);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+   /* Initialize the register to 0 */
+   brw_MOV(p, dst, brw_imm_ud(0));
+
+   /* Enable all the channels in m0.5 bits 15:8 */
+   brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00));
+
+   /* Copy g1.3 (the patch URB handle) to m0.0 and m0.1.  For safety,
+    * mask out irrelevant "Reserved" bits, as they're not marked MBZ.
+    */
+   brw_AND(p, vec2(get_element_ud(dst, 0)),
+           retype(brw_vec1_grf(1, 3), BRW_REGISTER_TYPE_UD),
+           brw_imm_ud(0x1fff));
+   brw_pop_insn_state(p);
+}
+
+static void
+generate_tes_add_indirect_urb_offset(struct brw_codegen *p,
+                                     struct brw_reg dst,
+                                     struct brw_reg header,
+                                     struct brw_reg offset)
+{
+   brw_push_insn_state(p);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+   brw_MOV(p, dst, header);
+   /* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */
+   brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
+
+   brw_pop_insn_state(p);
+}
+
+static void
 generate_vec4_urb_read(struct brw_codegen *p,
                        vec4_instruction *inst,
                        struct brw_reg dst,
@@ -890,6 +930,15 @@ generate_vec4_urb_read(struct brw_codegen *p,
 }
 
 static void
+generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
+{
+   brw_push_insn_state(p);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_MOV(p, dst, retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_D));
+   brw_pop_insn_state(p);
+}
+
+static void
 generate_tcs_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
 {
    brw_push_insn_state(p);
@@ -1780,6 +1829,18 @@ generate_code(struct brw_codegen *p,
          generate_tcs_create_barrier_header(p, prog_data, dst);
          break;
 
+      case TES_OPCODE_CREATE_INPUT_READ_HEADER:
+         generate_tes_create_input_read_header(p, dst);
+         break;
+
+      case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
+         generate_tes_add_indirect_urb_offset(p, dst, src[0], src[1]);
+         break;
+
+      case TES_OPCODE_GET_PRIMITIVE_ID:
+         generate_tes_get_primitive_id(p, dst);
+         break;
+
       case SHADER_OPCODE_BARRIER:
          brw_barrier(p, src[0]);
          brw_WAIT(p);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
new file mode 100644
index 0000000..ce5fefc
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
@@ -0,0 +1,204 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_tes.cpp
+ *
+ * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
+ */
+
+#include "brw_vec4_tes.h"
+
+namespace brw {
+
+vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
+                                  void *log_data,
+                                  const struct brw_tes_prog_key *key,
+                                  struct brw_tes_prog_data *prog_data,
+                                  const nir_shader *shader,
+                                  void *mem_ctx,
+                                  int shader_time_index)
+   : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
+                  shader, mem_ctx, false, shader_time_index)
+{
+}
+
+
+dst_reg *
+vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type)
+{
+   return NULL;
+}
+
+void
+vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
+{
+   const struct brw_tes_prog_data *tes_prog_data =
+      (const struct brw_tes_prog_data *) prog_data;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_tess_level_outer: {
+      dst_reg dst(this, glsl_type::vec4_type);
+      nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst;
+
+      dst_reg temp(this, glsl_type::vec4_type);
+      vec4_instruction *read =
+         emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
+      read->offset = 1;
+      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
+      emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
+      break;
+   }
+   case nir_intrinsic_load_tess_level_inner: {
+      dst_reg dst(this, glsl_type::vec2_type);
+      nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst;
+
+      /* Set up the message header to reference the proper parts of the URB */
+      dst_reg temp(this, glsl_type::vec4_type);
+      vec4_instruction *read =
+         emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
+      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
+      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
+         emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
+      } else {
+         read->offset = 1;
+         emit(MOV(dst, src_reg(temp)));
+      }
+      break;
+   }
+   default:
+      vec4_visitor::nir_setup_system_value_intrinsic(instr);
+   }
+}
+
+
+void
+vec4_tes_visitor::setup_payload()
+{
+   int reg = 0;
+
+   /* The payload always contains important data in r0 and r1, which contains
+    * the URB handles that are passed on to the URB write at the end
+    * of the thread.
+    */
+   reg += 2;
+
+   reg = setup_uniforms(reg);
+
+   this->first_non_payload_grf = reg;
+}
+
+
+void
+vec4_tes_visitor::emit_prolog()
+{
+   input_read_header = src_reg(this, glsl_type::uvec4_type);
+   emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
+
+   this->current_annotation = NULL;
+}
+
+
+void
+vec4_tes_visitor::emit_urb_write_header(int mrf)
+{
+   /* No need to do anything for DS; an implied write to this MRF will be
+    * performed by VS_OPCODE_URB_WRITE.
+    */
+   (void) mrf;
+}
+
+
+vec4_instruction *
+vec4_tes_visitor::emit_urb_write_opcode(bool complete)
+{
+   /* For DS, the URB writes end the thread. */
+   if (complete) {
+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_end();
+   }
+
+   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+   inst->urb_write_flags = complete ?
+      BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
+
+   return inst;
+}
+
+void
+vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_tess_coord:
+      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
+      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+               src_reg(brw_vec8_grf(1, 0))));
+      break;
+   case nir_intrinsic_load_primitive_id:
+      emit(TES_OPCODE_GET_PRIMITIVE_ID,
+           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
+      break;
+
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_per_vertex_input: {
+      src_reg indirect_offset = get_indirect_offset(instr);
+      unsigned imm_offset = instr->const_index[0];
+      src_reg header = input_read_header;
+
+      if (indirect_offset.file != BAD_FILE) {
+         header = src_reg(this, glsl_type::uvec4_type);
+         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
+              input_read_header, indirect_offset);
+      }
+
+      dst_reg temp(this, glsl_type::ivec4_type);
+      vec4_instruction *read =
+         emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
+      read->offset = imm_offset;
+      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
+
+      /* Copy to target.  We might end up with some funky writemasks landing
+       * in here, but we really don't want them in the above pseudo-ops.
+       */
+      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+      dst.writemask = brw_writemask_for_size(instr->num_components);
+      emit(MOV(dst, src_reg(temp)));
+      break;
+   }
+   default:
+      vec4_visitor::nir_emit_intrinsic(instr);
+   }
+}
+
+
+void
+vec4_tes_visitor::emit_thread_end()
+{
+   /* For DS, we always end the thread by emitting a single vertex.
+    * emit_urb_write_opcode() will take care of setting the eot flag on the
+    * SEND instruction.
+    */
+   emit_vertex();
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.h b/src/mesa/drivers/dri/i965/brw_vec4_tes.h
new file mode 100644
index 0000000..4b697aa
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_tes.h
+ *
+ * The vec4 mode tessellation evaluation shader compiler backend.
+ */
+
+#ifndef BRW_VEC4_TES_H
+#define BRW_VEC4_TES_H
+
+#include "brw_vec4.h"
+
+#ifdef __cplusplus
+namespace brw {
+
+class vec4_tes_visitor : public vec4_visitor
+{
+public:
+   vec4_tes_visitor(const struct brw_compiler *compiler,
+                   void *log_data,
+                   const struct brw_tes_prog_key *key,
+                   struct brw_tes_prog_data *prog_data,
+                   const nir_shader *nir,
+                   void *mem_ctx,
+                   int shader_time_index);
+
+protected:
+   virtual dst_reg *make_reg_for_system_value(int location,
+                                              const glsl_type *type);
+   virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
+   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
+
+   virtual void setup_payload();
+   virtual void emit_prolog();
+   virtual void emit_thread_end();
+
+   virtual void emit_urb_write_header(int mrf);
+   virtual vec4_instruction *emit_urb_write_opcode(bool complete);
+
+private:
+   src_reg input_read_header;
+};
+
+} /* namespace brw */
+#endif /* __cplusplus */
+
+#endif /* BRW_VEC4_TES_H */
-- 
2.6.4



More information about the mesa-dev mailing list