[Mesa-dev] [PATCH 9/9] i965/vec4: Use NIR to do GS input remapping
Jason Ekstrand
jason at jlekstrand.net
Fri May 5 02:11:47 UTC 2017
We're already doing this in the FS back-end. This just does the same
thing in the vec4 back-end.
---
src/intel/compiler/brw_nir.c | 3 --
src/intel/compiler/brw_vec4.cpp | 44 -------------------
src/intel/compiler/brw_vec4.h | 2 -
src/intel/compiler/brw_vec4_gs_nir.cpp | 10 ++---
src/intel/compiler/brw_vec4_gs_visitor.cpp | 70 +++++++++++++++++++++---------
src/intel/compiler/brw_vec4_gs_visitor.h | 4 +-
src/intel/compiler/gen6_gs_visitor.cpp | 4 +-
7 files changed, 56 insertions(+), 81 deletions(-)
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index d92e8fa..8a9f89f 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -342,9 +342,6 @@ brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
/* Inputs are stored in vec4 slots, so use type_size_vec4(). */
nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
- if (nir->stage == MESA_SHADER_GEOMETRY && !is_scalar)
- return;
-
/* This pass needs actual constants */
nir_opt_constant_folding(nir);
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index b387321..ad92951 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1693,50 +1693,6 @@ attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved)
return reg;
}
-
-/**
- * Replace each register of type ATTR in this->instructions with a reference
- * to a fixed HW register.
- *
- * If interleaved is true, then each attribute takes up half a register, with
- * register N containing attribute 2*N in its first half and attribute 2*N+1
- * in its second half (this corresponds to the payload setup used by geometry
- * shaders in "single" or "dual instanced" dispatch mode). If interleaved is
- * false, then each attribute takes up a whole register, with register N
- * containing attribute N (this corresponds to the payload setup used by
- * vertex shaders, and by geometry shaders in "dual object" dispatch mode).
- */
-void
-vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
- bool interleaved)
-{
- foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
- for (int i = 0; i < 3; i++) {
- if (inst->src[i].file != ATTR)
- continue;
-
- int grf = attribute_map[inst->src[i].nr +
- inst->src[i].offset / REG_SIZE];
- assert(inst->src[i].offset % REG_SIZE == 0);
-
- /* All attributes used in the shader need to have been assigned a
- * hardware register by the caller
- */
- assert(grf != 0);
-
- struct brw_reg reg =
- attribute_to_hw_reg(grf, inst->src[i].type, interleaved);
- reg.swizzle = inst->src[i].swizzle;
- if (inst->src[i].abs)
- reg = brw_abs(reg);
- if (inst->src[i].negate)
- reg = negate(reg);
-
- inst->src[i] = reg;
- }
- }
-}
-
int
vec4_vs_visitor::setup_attributes(int payload_reg)
{
diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h
index 89adfaa..0f92f34 100644
--- a/src/intel/compiler/brw_vec4.h
+++ b/src/intel/compiler/brw_vec4.h
@@ -367,8 +367,6 @@ public:
protected:
void emit_vertex();
- void lower_attributes_to_hw_regs(const int *attribute_map,
- bool interleaved);
void setup_payload_interference(struct ra_graph *g, int first_payload_node,
int reg_node_count);
virtual void setup_payload() = 0;
diff --git a/src/intel/compiler/brw_vec4_gs_nir.cpp b/src/intel/compiler/brw_vec4_gs_nir.cpp
index ed8c03b..577f587 100644
--- a/src/intel/compiler/brw_vec4_gs_nir.cpp
+++ b/src/intel/compiler/brw_vec4_gs_nir.cpp
@@ -66,8 +66,10 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
nir_const_value *offset_reg = nir_src_as_const_value(instr->src[1]);
+ const unsigned input_array_stride = prog_data->urb_read_length * 2;
+
if (nir_dest_bit_size(instr->dest) == 64) {
- src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
+ src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
instr->const_index[0] + offset_reg->u32[0],
glsl_type::dvec4_type);
@@ -85,15 +87,11 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
/* Make up a type...we have no way of knowing... */
const glsl_type *const type = glsl_type::ivec(instr->num_components);
- src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
+ src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
instr->const_index[0] + offset_reg->u32[0],
type);
src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
- /* gl_PointSize is passed in the .w component of the VUE header */
- if (instr->const_index[0] == VARYING_SLOT_PSIZ)
- src.swizzle = BRW_SWIZZLE_WWWW;
-
dest = get_nir_dest(instr->dest, src.type);
dest.writemask = brw_writemask_for_size(instr->num_components);
emit(MOV(dest, src));
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index 4a8b5be..516ab0b 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -29,6 +29,7 @@
#include "brw_vec4_gs_visitor.h"
#include "gen6_gs_visitor.h"
+#include "brw_cfg.h"
#include "brw_fs.h"
#include "brw_nir.h"
#include "common/gen_debug.h"
@@ -72,9 +73,36 @@ vec4_gs_visitor::make_reg_for_system_value(int location)
return reg;
}
+static inline struct brw_reg
+attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved)
+{
+ struct brw_reg reg;
+ unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type));
+ if (interleaved) {
+ reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0, width, 1);
+ } else {
+ reg = brw_vecn_grf(width, attr, 0);
+ }
+
+ reg.type = type;
+ return reg;
+}
+
+/**
+ * Replace each register of type ATTR in this->instructions with a reference
+ * to a fixed HW register.
+ *
+ * If interleaved is true, then each attribute takes up half a register, with
+ * register N containing attribute 2*N in its first half and attribute 2*N+1
+ * in its second half (this corresponds to the payload setup used by geometry
+ * shaders in "single" or "dual instanced" dispatch mode). If interleaved is
+ * false, then each attribute takes up a whole register, with register N
+ * containing attribute N (this corresponds to the payload setup used by
+ * vertex shaders, and by geometry shaders in "dual object" dispatch mode).
+ */
int
-vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
+vec4_gs_visitor::setup_varying_inputs(int payload_reg,
int attributes_per_reg)
{
/* For geometry shaders there are N copies of the input attributes, where N
@@ -89,12 +117,24 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
unsigned input_array_stride = prog_data->urb_read_length * 2;
- for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) {
- int varying = c->input_vue_map.slot_to_varying[slot];
- for (unsigned vertex = 0; vertex < num_input_vertices; vertex++) {
- attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] =
- attributes_per_reg * payload_reg + input_array_stride * vertex +
- slot;
+ foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file != ATTR)
+ continue;
+
+ assert(inst->src[i].offset % REG_SIZE == 0);
+ int grf = payload_reg * attributes_per_reg +
+ inst->src[i].nr + inst->src[i].offset / REG_SIZE;
+
+ struct brw_reg reg =
+ attribute_to_hw_reg(grf, inst->src[i].type, attributes_per_reg > 1);
+ reg.swizzle = inst->src[i].swizzle;
+ if (inst->src[i].abs)
+ reg = brw_abs(reg);
+ if (inst->src[i].negate)
+ reg = negate(reg);
+
+ inst->src[i] = reg;
}
}
@@ -103,25 +143,15 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
return payload_reg + regs_used;
}
-
void
vec4_gs_visitor::setup_payload()
{
- int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
-
/* If we are in dual instanced or single mode, then attributes are going
* to be interleaved, so one register contains two attribute slots.
*/
int attributes_per_reg =
prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
- /* If a geometry shader tries to read from an input that wasn't written by
- * the vertex shader, that produces undefined results, but it shouldn't
- * crash anything. So initialize attribute_map to zeros--that ensures that
- * these undefined results are read from r0.
- */
- memset(attribute_map, 0, sizeof(attribute_map));
-
int reg = 0;
/* The payload always contains important data in r0, which contains
@@ -132,13 +162,11 @@ vec4_gs_visitor::setup_payload()
/* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
if (gs_prog_data->include_primitive_id)
- attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++;
+ reg++;
reg = setup_uniforms(reg);
- reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg);
-
- lower_attributes_to_hw_regs(attribute_map, attributes_per_reg > 1);
+ reg = setup_varying_inputs(reg, attributes_per_reg);
this->first_non_payload_grf = reg;
}
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.h b/src/intel/compiler/brw_vec4_gs_visitor.h
index 09221f9..6b21a33 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.h
+++ b/src/intel/compiler/brw_vec4_gs_visitor.h
@@ -33,6 +33,7 @@
#include "brw_vec4.h"
#define MAX_GS_INPUT_VERTICES 6
+#define MAX_GS_INPUT_VARYINGS 32
#ifdef __cplusplus
namespace brw {
@@ -64,8 +65,7 @@ protected:
virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
protected:
- int setup_varying_inputs(int payload_reg, int *attribute_map,
- int attributes_per_reg);
+ int setup_varying_inputs(int payload_reg, int attributes_per_reg);
void emit_control_data_bits();
void set_stream_control_data_bits(unsigned stream_id);
diff --git a/src/intel/compiler/gen6_gs_visitor.cpp b/src/intel/compiler/gen6_gs_visitor.cpp
index 075bc4a..afc6056 100644
--- a/src/intel/compiler/gen6_gs_visitor.cpp
+++ b/src/intel/compiler/gen6_gs_visitor.cpp
@@ -516,9 +516,7 @@ gen6_gs_visitor::setup_payload()
reg = setup_uniforms(reg);
- reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg);
-
- lower_attributes_to_hw_regs(attribute_map, true);
+ reg = setup_varying_inputs(reg, attributes_per_reg);
this->first_non_payload_grf = reg;
}
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list