[Mesa-dev] [PATCH v5] i965/fs: Handle non-const sample number in interpolateAtSample
Neil Roberts
neil at linux.intel.com
Fri Oct 9 04:52:03 PDT 2015
If a non-const sample number is given to interpolateAtSample it will
now generate an indirect send message with the sample ID similar to
how non-const sampler array indexing works. Previously non-const
values were ignored and instead it ended up using a constant 0 value.
The generator will try to determine if the sample ID is dynamically
uniform via nir_src_is_dynamically_uniform. If not it will query the
pixel interpolator in a loop, once for each different live sample
number. The next live sample number is found using emit_uniformize. If
multiple live channels have the same sample number then they will be
handled in a single iteration of the loop. The loop is necessary
because the indirect send message doesn't seem to have a way to
specify a different value for each fragment.
This fixes the following two Piglit tests:
arb_gpu_shader5-interpolateAtSample-nonconst
arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform
v2: Handle dynamically non-uniform sample ids.
v3: Remove the BREAK instruction and predicate the WHILE directly.
Make the tokens arrays const. (Matt Turner)
v4: Iterate over the live channels instead of each possible sample
number.
v5: Don't special case immediate values in
brw_pixel_interpolator_query. Make a better wrapper for the
function to set up the PI send instruction. Ensure that the SHL
instructions are scalar. (Francisco Jerez).
---
Many thanks to Francisco and Matt for the all the helpful feedback.
src/mesa/drivers/dri/i965/brw_eu.h | 2 +-
src/mesa/drivers/dri/i965/brw_eu_emit.c | 27 ++---
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +-
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 139 ++++++++++++++++++++-----
4 files changed, 130 insertions(+), 43 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 761aa0e..0ac1ad9 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
struct brw_reg mrf,
bool noperspective,
unsigned mode,
- unsigned data,
+ struct brw_reg data,
unsigned msg_length,
unsigned response_length);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index dc699bb..bf2fee9 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3212,26 +3212,29 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
struct brw_reg mrf,
bool noperspective,
unsigned mode,
- unsigned data,
+ struct brw_reg data,
unsigned msg_length,
unsigned response_length)
{
const struct brw_device_info *devinfo = p->devinfo;
- struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
-
- brw_set_dest(p, insn, dest);
- brw_set_src0(p, insn, mrf);
- brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
- msg_length, response_length,
- false /* header is never present for PI */,
- false);
+ struct brw_inst *insn;
+ const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current);
- brw_inst_set_pi_simd_mode(
- devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16);
+ /* brw_send_indirect_message will automatically use a direct send message
+ * if data is actually immediate.
+ */
+ insn = brw_send_indirect_message(p,
+ GEN7_SFID_PIXEL_INTERPOLATOR,
+ dest,
+ mrf,
+ vec1(data));
+ brw_inst_set_mlen(devinfo, insn, msg_length);
+ brw_inst_set_rlen(devinfo, insn, response_length);
+
+ brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */
brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
brw_inst_set_pi_message_type(devinfo, insn, mode);
- brw_inst_set_pi_message_data(devinfo, insn, data);
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6f8b75e..17e19cf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1377,15 +1377,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
struct brw_reg msg_data,
unsigned msg_type)
{
- assert(msg_data.file == BRW_IMMEDIATE_VALUE &&
- msg_data.type == BRW_REGISTER_TYPE_UD);
+ assert(msg_data.type == BRW_REGISTER_TYPE_UD);
brw_pixel_interpolator_query(p,
retype(dst, BRW_REGISTER_TYPE_UW),
src,
inst->pi_noperspective,
msg_type,
- msg_data.dw1.ud,
+ msg_data,
inst->mlen,
inst->regs_written);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 03fe680..bc0df68 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1180,6 +1180,36 @@ get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
}
}
+static fs_inst *
+emit_pixel_interpolater_send(const fs_builder &bld,
+ enum opcode opcode,
+ const fs_reg &dst,
+ const fs_reg &src,
+ const fs_reg &desc,
+ glsl_interp_qualifier interpolation)
+{
+ fs_inst *inst;
+ fs_reg payload;
+ int mlen;
+
+ if (src.file == BAD_FILE) {
+ /* Dummy payload */
+ payload = bld.vgrf(BRW_REGISTER_TYPE_F, 1);
+ mlen = 1;
+ } else {
+ payload = src;
+ mlen = 2 * bld.dispatch_width() / 8;
+ }
+
+ inst = bld.emit(opcode, dst, payload, desc);
+ inst->mlen = mlen;
+ /* 2 floats per slot returned */
+ inst->regs_written = 2 * bld.dispatch_width() / 8;
+ inst->pi_noperspective = interpolation == INTERP_QUALIFIER_NOPERSPECTIVE;
+
+ return inst;
+}
+
void
fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
{
@@ -1583,28 +1613,81 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
-
- /* For most messages, we need one reg of ignored data; the hardware
- * requires mlen==1 even when there is no payload. in the per-slot
- * offset case, we'll replace this with the proper source data.
- */
- fs_reg src = vgrf(glsl_type::float_type);
- int mlen = 1; /* one reg unless overriden */
- fs_inst *inst;
+ const glsl_interp_qualifier interpolation =
+ (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
switch (instr->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
- dst_xy, src, fs_reg(0u));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_CENTROID,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(0u),
+ interpolation);
break;
case nir_intrinsic_interp_var_at_sample: {
- /* XXX: We should probably handle non-constant sample id's */
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
- assert(const_sample);
- unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
- fs_reg(msg_data));
+
+ if (const_sample) {
+ unsigned msg_data = const_sample->i[0] << 4;
+
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(msg_data),
+ interpolation);
+ } else {
+ const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
+
+ if (nir_src_is_dynamically_uniform(instr->src[0])) {
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ msg_data,
+ interpolation);
+ } else {
+ /* Make a loop that sends a message to the pixel interpolater
+ * for the sample number in each live channel. If there are
+ * multiple channels with the same sample number then these
+ * will be handled simultaneously with a single interation of
+ * the loop.
+ */
+ bld.emit(BRW_OPCODE_DO);
+
+ /* Get the next live sample number into sample_id_reg */
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+
+ /* Set the flag register so that we can perform the send
+ * message on all channels that have the same sample number
+ */
+ bld.CMP(bld.null_reg_ud(),
+ sample_src, sample_id,
+ BRW_CONDITIONAL_EQ);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ fs_inst *inst =
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ msg_data,
+ interpolation);
+ set_predicate(BRW_PREDICATE_NORMAL, inst);
+
+ /* Continue the loop if there are any live channels left */
+ set_predicate_inv(BRW_PREDICATE_NORMAL,
+ true, /* inverse */
+ bld.emit(BRW_OPCODE_WHILE));
+ }
+ }
+
break;
}
@@ -1615,10 +1698,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
- fs_reg(off_x | (off_y << 4)));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(off_x | (off_y << 4)),
+ interpolation);
} else {
- src = vgrf(glsl_type::ivec2_type);
+ fs_reg src = vgrf(glsl_type::ivec2_type);
fs_reg offset_src = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_F);
for (int i = 0; i < 2; i++) {
@@ -1646,9 +1733,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
}
- mlen = 2 * dispatch_width / 8;
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
- fs_reg(0u));
+ const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
+ emit_pixel_interpolater_send(bld,
+ opcode,
+ dst_xy,
+ src,
+ fs_reg(0u),
+ interpolation);
}
break;
}
@@ -1657,12 +1748,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unreachable("Invalid intrinsic");
}
- inst->mlen = mlen;
- /* 2 floats per slot returned */
- inst->regs_written = 2 * dispatch_width / 8;
- inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
- INTERP_QUALIFIER_NOPERSPECTIVE;
-
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
src.type = dest.type;
--
1.9.3
More information about the mesa-dev
mailing list