[Mesa-dev] [PATCH 4/4] i965/nir: Implement SIMD16 support in the NIR FS backend.

Kenneth Graunke kenneth at whitecape.org
Fri Jan 16 03:29:48 PST 2015


Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp     |  5 +--
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 73 +++++++++++++++++++++-----------
 2 files changed, 50 insertions(+), 28 deletions(-)

That was easy

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b7378de..9572f3a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1035,8 +1035,8 @@ fs_visitor::virtual_grf_alloc(int size)
 fs_reg
 fs_visitor::vgrf(int num_components)
 {
-   return fs_reg(GRF, virtual_grf_alloc(num_components), BRW_REGISTER_TYPE_F,
-                 dispatch_width);
+   return fs_reg(GRF, virtual_grf_alloc(num_components * (dispatch_width / 8)),
+                 BRW_REGISTER_TYPE_F, dispatch_width);
 }
 
 /** Fixed HW reg constructor. */
@@ -3759,7 +3759,6 @@ fs_visitor::run_fs()
        */
       if (shader) {
          if (getenv("INTEL_USE_NIR") != NULL && !brw->use_rep_send) {
-            no16("Cannot do 16-wide in NIR yet");
             emit_nir_code();
          } else {
             foreach_in_list(ir_instruction, ir, shader->base.ir) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index fca03e5..5208985 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -132,12 +132,14 @@ fs_visitor::emit_nir_code()
 void
 fs_visitor::nir_setup_inputs(nir_shader *shader)
 {
+   int reg_width = dispatch_width / 8;
+
    fs_reg varying = nir_inputs;
 
    struct hash_entry *entry;
    hash_table_foreach(shader->inputs, entry) {
       nir_variable *var = (nir_variable *) entry->data;
-      varying.reg_offset = var->data.driver_location;
+      varying.reg_offset = var->data.driver_location * reg_width;
 
       fs_reg reg;
       if (!strcmp(var->name, "gl_FragCoord")) {
@@ -159,13 +161,15 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
 void
 fs_visitor::nir_setup_outputs(nir_shader *shader)
 {
+   int reg_width = dispatch_width / 8;
+
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
    fs_reg reg = nir_outputs;
 
    struct hash_entry *entry;
    hash_table_foreach(shader->outputs, entry) {
       nir_variable *var = (nir_variable *) entry->data;
-      reg.reg_offset = var->data.driver_location;
+      reg.reg_offset = var->data.driver_location * reg_width;
 
       if (var->data.index > 0) {
          assert(var->data.location == FRAG_RESULT_DATA0);
@@ -195,7 +199,8 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
          for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
             int output = var->data.location - FRAG_RESULT_DATA0 + i;
             this->outputs[output] = reg;
-            this->outputs[output].reg_offset += vector_elements * i;
+            this->outputs[output].reg_offset +=
+               (vector_elements * i) * reg_width;
             this->output_components[output] = vector_elements;
          }
       }
@@ -1035,6 +1040,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
 fs_reg
 fs_visitor::get_nir_src(nir_src src)
 {
+   int reg_width = dispatch_width / 8;
+
    if (src.is_ssa) {
       assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
       nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
@@ -1057,7 +1064,7 @@ fs_visitor::get_nir_src(nir_src src)
        * this to F if they need to
        */
       reg.type = BRW_REGISTER_TYPE_D;
-      reg.reg_offset = src.reg.base_offset;
+      reg.reg_offset = src.reg.base_offset * reg_width;
       if (src.reg.indirect) {
          reg.reladdr = new(mem_ctx) fs_reg();
          *reg.reladdr = retype(get_nir_src(*src.reg.indirect),
@@ -1111,13 +1118,15 @@ fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
 fs_reg
 fs_visitor::get_nir_dest(nir_dest dest)
 {
+   int reg_width = dispatch_width / 8;
+
    fs_reg reg;
    if (dest.reg.reg->is_global)
       reg = nir_globals[dest.reg.reg->index];
    else
       reg = nir_locals[dest.reg.reg->index];
 
-   reg.reg_offset = dest.reg.base_offset;
+   reg.reg_offset = dest.reg.base_offset * reg_width;
    if (dest.reg.indirect) {
       reg.reladdr = new(mem_ctx) fs_reg();
       *reg.reladdr = retype(get_nir_src(*dest.reg.indirect),
@@ -1130,15 +1139,17 @@ fs_visitor::get_nir_dest(nir_dest dest)
 void
 fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
 {
+   int reg_width = dispatch_width / 8;
+
    for (unsigned i = 0; i < 4; i++) {
       if (!((wr_mask >> i) & 1))
          continue;
 
       fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
-      new_inst->dst.reg_offset += i;
+      new_inst->dst.reg_offset += i * reg_width;
       for (unsigned j = 0; j < new_inst->sources; j++)
          if (inst->src[j].file == GRF)
-            new_inst->src[j].reg_offset += i;
+            new_inst->src[j].reg_offset += i * reg_width;
 
       emit(new_inst);
    }
@@ -1150,15 +1161,17 @@ fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0,
                          enum brw_predicate predicate,
                          enum brw_conditional_mod mod)
 {
+   int reg_width = dispatch_width / 8;
+
    for (unsigned i = 0; i < 4; i++) {
       if (!((wr_mask >> i) & 1))
          continue;
 
       fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0);
-      new_inst->dst.reg_offset += i;
+      new_inst->dst.reg_offset += i * reg_width;
       for (unsigned j = 0; j < new_inst->sources; j++)
          if (new_inst->src[j].file == GRF)
-            new_inst->src[j].reg_offset += i;
+            new_inst->src[j].reg_offset += i * reg_width;
 
       new_inst->predicate = predicate;
       new_inst->conditional_mod = mod;
@@ -1173,15 +1186,17 @@ fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1,
                          enum brw_predicate predicate,
                          enum brw_conditional_mod mod)
 {
+   int reg_width = dispatch_width / 8;
+
    for (unsigned i = 0; i < 4; i++) {
       if (!((wr_mask >> i) & 1))
          continue;
 
       fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0, src1);
-      new_inst->dst.reg_offset += i;
+      new_inst->dst.reg_offset += i * reg_width;
       for (unsigned j = 0; j < new_inst->sources; j++)
          if (new_inst->src[j].file == GRF)
-            new_inst->src[j].reg_offset += i;
+            new_inst->src[j].reg_offset += i * reg_width;
 
       new_inst->predicate = predicate;
       new_inst->conditional_mod = mod;
@@ -1194,15 +1209,17 @@ void
 fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
                               unsigned wr_mask, bool saturate)
 {
+   int reg_width = dispatch_width / 8;
+
    for (unsigned i = 0; i < 4; i++) {
       if (!((wr_mask >> i) & 1))
          continue;
 
       fs_reg new_dest = dest;
-      new_dest.reg_offset += i;
+      new_dest.reg_offset += i * reg_width;
       fs_reg new_src0 = src0;
       if (src0.file == GRF)
-         new_src0.reg_offset += i;
+         new_src0.reg_offset += i * reg_width;
 
       fs_inst *new_inst = emit_math(op, new_dest, new_src0);
       new_inst->saturate = saturate;
@@ -1214,18 +1231,20 @@ fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
                               fs_reg src1, unsigned wr_mask,
                               bool saturate)
 {
+   int reg_width = dispatch_width / 8;
+
    for (unsigned i = 0; i < 4; i++) {
       if (!((wr_mask >> i) & 1))
          continue;
 
       fs_reg new_dest = dest;
-      new_dest.reg_offset += i;
+      new_dest.reg_offset += i * reg_width;
       fs_reg new_src0 = src0;
       if (src0.file == GRF)
-         new_src0.reg_offset += i;
+         new_src0.reg_offset += i * reg_width;
       fs_reg new_src1 = src1;
       if (src1.file == GRF)
-         new_src1.reg_offset += i;
+         new_src1.reg_offset += i * reg_width;
 
       fs_inst *new_inst = emit_math(op, new_dest, new_src0, new_src1);
       new_inst->saturate = saturate;
@@ -1236,9 +1255,11 @@ void
 fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
                            unsigned num_components)
 {
+   int reg_width = dispatch_width / 8;
+
    fs_reg src0 = src;
    fs_reg src1 = src;
-   src1.reg_offset++;
+   src1.reg_offset += 1 * reg_width;
 
    if (num_components == 2) {
       emit(op, dest, src0, src1);
@@ -1250,7 +1271,7 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
    emit(op, temp1, src0, src1);
 
    fs_reg src2 = src;
-   src2.reg_offset += 2;
+   src2.reg_offset += 2 * reg_width;
 
    if (num_components == 3) {
       emit(op, dest, temp1, src2);
@@ -1260,7 +1281,7 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
    assert(num_components == 4);
 
    fs_reg src3 = src;
-   src3.reg_offset += 3;
+   src3.reg_offset += 3 * reg_width;
    fs_reg temp2 = vgrf(1);
    temp2.type = src.type;
 
@@ -1271,6 +1292,8 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
 void
 fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 {
+   int reg_width = dispatch_width / 8;
+
    fs_reg dest;
    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
       dest = get_nir_dest(instr->dest);
@@ -1374,7 +1397,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
             index++;
 
             emit(MOV(dest, src));
-            dest.reg_offset++;
+            dest.reg_offset += 1 * reg_width;
          }
       }
       break;
@@ -1435,7 +1458,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
             assert(packed_consts.subreg_offset < 32);
 
             emit(MOV(dest, packed_consts));
-            dest.reg_offset++;
+            dest.reg_offset += 1 * reg_width;
          }
       }
       break;
@@ -1448,14 +1471,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       for (int i = 0; i < instr->const_index[1]; i++) {
          for (unsigned j = 0; j < instr->num_components; j++) {
             fs_reg src = nir_inputs;
-            src.reg_offset = instr->const_index[0] + index;
+            src.reg_offset = (instr->const_index[0] + index) * reg_width;
             if (has_indirect)
                src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
             src.type = dest.type;
             index++;
 
             emit(MOV(dest, src));
-            dest.reg_offset++;
+            dest.reg_offset += 1 * reg_width;
          }
       }
       break;
@@ -1587,13 +1610,13 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       for (int i = 0; i < instr->const_index[1]; i++) {
          for (unsigned j = 0; j < instr->num_components; j++) {
             fs_reg new_dest = nir_outputs;
-            new_dest.reg_offset = instr->const_index[0] + index;
+            new_dest.reg_offset = (instr->const_index[0] + index) * reg_width;
             if (has_indirect)
                src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
             new_dest.type = src.type;
             index++;
             emit(MOV(new_dest, src));
-            src.reg_offset++;
+            src.reg_offset += 1 * reg_width;
          }
       }
       break;
-- 
2.2.2



More information about the mesa-dev mailing list