[Mesa-dev] [PATCH v4] i965/fs: Handle non-const sample number in interpolateAtSample

Neil Roberts neil at linux.intel.com
Wed Oct 7 06:22:31 PDT 2015


If a non-const sample number is given to interpolateAtSample it will
now generate an indirect send message with the sample ID similar to
how non-const sampler array indexing works. Previously non-const
values were ignored and instead it ended up using a constant 0 value.

The generator will try to determine if the sample ID is dynamically
uniform via nir_src_is_dynamically_uniform. If not it will query the
pixel interpolator in a loop, once for each different live sample
number. The next live sample number is found using emit_uniformize. If
multiple live channels have the same sample number then they will be
handled in a single iteration of the loop. The loop is necessary
because the indirect send message doesn't seem to have a way to
specify a different value for each fragment.

This fixes the following two Piglit tests:

arb_gpu_shader5-interpolateAtSample-nonconst
arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform

v2: Handle dynamically non-uniform sample ids.
v3: Remove the BREAK instruction and predicate the WHILE directly.
    Make the tokens arrays const.
v4: Iterate over the live channels instead of each possible sample
    number.
---

This version of the patch iterates over each live channel instead of
each possible sample number. It doesn't need to access
STATE_NUM_SAMPLES so it avoids the problem that Francisco mentioned.
Note that if it turns out the fragments are mostly all using the same
sample number then all of them will be handled at once and the loop
will bail out early, so it should be more efficient. It also makes the
patch much simpler.

 src/mesa/drivers/dri/i965/brw_eu.h             |  2 +-
 src/mesa/drivers/dri/i965/brw_eu_emit.c        | 34 +++++++---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  5 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp       | 93 +++++++++++++++++++++-----
 4 files changed, 102 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 761aa0e..0ac1ad9 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              struct brw_reg mrf,
                              bool noperspective,
                              unsigned mode,
-                             unsigned data,
+                             struct brw_reg data,
                              unsigned msg_length,
                              unsigned response_length);
 
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index dc699bb..9c38e99 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3212,26 +3212,38 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              struct brw_reg mrf,
                              bool noperspective,
                              unsigned mode,
-                             unsigned data,
+                             struct brw_reg data,
                              unsigned msg_length,
                              unsigned response_length)
 {
    const struct brw_device_info *devinfo = p->devinfo;
-   struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+   struct brw_inst *insn;
+   uint16_t exec_size;
 
-   brw_set_dest(p, insn, dest);
-   brw_set_src0(p, insn, mrf);
-   brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
-                              msg_length, response_length,
-                              false /* header is never present for PI */,
-                              false);
+   if (data.file == BRW_IMMEDIATE_VALUE) {
+      insn = next_insn(p, BRW_OPCODE_SEND);
+      brw_set_dest(p, insn, dest);
+      brw_set_src0(p, insn, mrf);
+      brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
+                                 msg_length, response_length,
+                                 false /* header is never present for PI */,
+                                 false);
+      brw_inst_set_pi_message_data(devinfo, insn, data.dw1.ud);
+   } else {
+      insn = brw_send_indirect_message(p,
+                                       GEN7_SFID_PIXEL_INTERPOLATOR,
+                                       dest,
+                                       mrf,
+                                       vec1(data));
+      brw_inst_set_mlen(devinfo, insn, msg_length);
+      brw_inst_set_rlen(devinfo, insn, response_length);
+   }
 
-   brw_inst_set_pi_simd_mode(
-         devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16);
+   exec_size = brw_inst_exec_size(devinfo, p->current);
+   brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
    brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */
    brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
    brw_inst_set_pi_message_type(devinfo, insn, mode);
-   brw_inst_set_pi_message_data(devinfo, insn, data);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6f8b75e..17e19cf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1377,15 +1377,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
                                                 struct brw_reg msg_data,
                                                 unsigned msg_type)
 {
-   assert(msg_data.file == BRW_IMMEDIATE_VALUE &&
-          msg_data.type == BRW_REGISTER_TYPE_UD);
+   assert(msg_data.type == BRW_REGISTER_TYPE_UD);
 
    brw_pixel_interpolator_query(p,
          retype(dst, BRW_REGISTER_TYPE_UW),
          src,
          inst->pi_noperspective,
          msg_type,
-         msg_data.dw1.ud,
+         msg_data,
          inst->mlen,
          inst->regs_written);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 03fe680..8a73bda 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1180,6 +1180,23 @@ get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
    }
 }
 
+/* For most messages, we need one reg of ignored data; the hardware requires
+ * mlen==1 even when there is no payload. in the per-slot offset case, we'll
+ * replace this with the proper source data.
+ */
+static void
+setup_pixel_interpolater_instruction(fs_visitor *v,
+                                     nir_intrinsic_instr *instr,
+                                     fs_inst *inst,
+                                     int mlen = 1)
+{
+   inst->mlen = mlen;
+   /* 2 floats per slot returned */
+   inst->regs_written = 2 * v->dispatch_width / 8;
+   inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
+      INTERP_QUALIFIER_NOPERSPECTIVE;
+}
+
 void
 fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
 {
@@ -1584,27 +1601,71 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
       fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
 
-      /* For most messages, we need one reg of ignored data; the hardware
-       * requires mlen==1 even when there is no payload. in the per-slot
-       * offset case, we'll replace this with the proper source data.
-       */
       fs_reg src = vgrf(glsl_type::float_type);
-      int mlen = 1;     /* one reg unless overriden */
       fs_inst *inst;
 
       switch (instr->intrinsic) {
       case nir_intrinsic_interp_var_at_centroid:
          inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
                          dst_xy, src, fs_reg(0u));
+         setup_pixel_interpolater_instruction(this, instr, inst);
          break;
 
       case nir_intrinsic_interp_var_at_sample: {
-         /* XXX: We should probably handle non-constant sample id's */
          nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
-         assert(const_sample);
-         unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
-         inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
-                         fs_reg(msg_data));
+
+         if (const_sample) {
+            unsigned msg_data = const_sample->i[0] << 4;
+
+            inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
+                            fs_reg(msg_data));
+
+            setup_pixel_interpolater_instruction(this, instr, inst);
+         } else {
+            fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+                                       BRW_REGISTER_TYPE_UD);
+            fs_reg sample_id_reg;
+
+            if (nir_src_is_dynamically_uniform(instr->src[0])) {
+               sample_id_reg = vgrf(glsl_type::uint_type);
+               bld.SHL(sample_id_reg, sample_src, fs_reg(4u));
+               sample_id_reg = bld.emit_uniformize(sample_id_reg);
+               inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
+                               sample_id_reg);
+               setup_pixel_interpolater_instruction(this, instr, inst);
+            } else {
+               /* Make a loop that sends a message to the pixel interpolator
+                * for the sample number in each live channel. If there are
+                * multiple channels with the same sample number then these
+                * will be handled simultaneously with a single interation of
+                * the loop.
+                */
+               bld.emit(BRW_OPCODE_DO);
+
+               /* Get the next live sample number into sample_id_reg */
+               sample_id_reg = bld.emit_uniformize(sample_src);
+
+               /* Set the flag register so that we can perform the send
+                * message on all channels that have the same sample number
+                */
+               bld.CMP(bld.null_reg_ud(),
+                       sample_src, sample_id_reg,
+                       BRW_CONDITIONAL_EQ);
+               fs_reg msg_data = component(vgrf(glsl_type::uint_type), 0);
+               bld.SHL(msg_data, sample_id_reg, fs_reg(4u))
+                  ->force_writemask_all = true;
+               inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
+                               msg_data);
+               setup_pixel_interpolater_instruction(this, instr, inst);
+               set_predicate(BRW_PREDICATE_NORMAL, inst);
+
+               /* Continue the loop if there are any live channels left */
+               set_predicate_inv(BRW_PREDICATE_NORMAL,
+                                 true, /* inverse */
+                                 bld.emit(BRW_OPCODE_WHILE));
+            }
+         }
+
          break;
       }
 
@@ -1617,6 +1678,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
             inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
                             fs_reg(off_x | (off_y << 4)));
+            setup_pixel_interpolater_instruction(this, instr, inst);
          } else {
             src = vgrf(glsl_type::ivec2_type);
             fs_reg offset_src = retype(get_nir_src(instr->src[0]),
@@ -1646,9 +1708,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
                            bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
             }
 
-            mlen = 2 * dispatch_width / 8;
             inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
                             fs_reg(0u));
+            setup_pixel_interpolater_instruction(this,
+                                                 instr,
+                                                 inst,
+                                                 2 * dispatch_width / 8);
          }
          break;
       }
@@ -1657,12 +1722,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          unreachable("Invalid intrinsic");
       }
 
-      inst->mlen = mlen;
-      /* 2 floats per slot returned */
-      inst->regs_written = 2 * dispatch_width / 8;
-      inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
-                               INTERP_QUALIFIER_NOPERSPECTIVE;
-
       for (unsigned j = 0; j < instr->num_components; j++) {
          fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
          src.type = dest.type;
-- 
1.9.3



More information about the mesa-dev mailing list