[Mesa-dev] [PATCH] i965/fs: Take the sample mask into account in FIND_LIVE_CHANNEL
Jason Ekstrand
jason at jlekstrand.net
Wed Sep 14 06:09:11 UTC 2016
Just looking at the channel enables is not sufficient, at least not on Sky
Lake. Channels that are disabled by the sample_mask may show up in the
channel enable register as being enabled even if they are not executing.
This can cause FIND_LIVE_CHANNEL to return a channel that isn't actually
executing. In our handling of interpolateAtSample we do a clever trick
with emit_uniformize to call the interpolator once for each unique sample
id. Thanks to FIND_LIVE_CHANNEL returning a dead channel, we can get an
infinite loop which hangs the GPU.
Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
---
src/mesa/drivers/dri/i965/brw_eu.h | 3 ++-
src/mesa/drivers/dri/i965/brw_eu_emit.c | 22 +++++++++++++++-------
src/mesa/drivers/dri/i965/brw_fs_builder.h | 3 ++-
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +-
5 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 3e52764..9aaab78 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -488,7 +488,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
void
brw_find_live_channel(struct brw_codegen *p,
- struct brw_reg dst);
+ struct brw_reg dst,
+ struct brw_reg sample_mask);
void
brw_broadcast(struct brw_codegen *p,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 3b12030..f593a8d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3361,7 +3361,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
}
void
-brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
+brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
+ struct brw_reg sample_mask)
{
const struct gen_device_info *devinfo = p->devinfo;
const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
@@ -3377,13 +3378,20 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
if (devinfo->gen >= 8) {
/* Getting the first active channel index is easy on Gen8: Just find
- * the first bit set in the mask register. The same register exists
- * on HSW already but it reads back as all ones when the current
- * instruction has execution masking disabled, so it's kind of
- * useless.
+ * the first bit set in the mask register AND the sample mask. The
+ * same register exists on HSW already but it reads back as all ones
+ * when the current instruction has execution masking disabled, so
+ * it's kind of useless.
*/
- inst = brw_FBL(p, vec1(dst),
- retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
+ struct brw_reg mask_reg = retype(brw_mask_reg(0),
+ BRW_REGISTER_TYPE_UD);
+ if (sample_mask.file != BRW_IMMEDIATE_VALUE ||
+ sample_mask.ud != 0xffffffff) {
+ brw_AND(p, vec1(dst), mask_reg, sample_mask);
+ mask_reg = vec1(dst);
+ }
+
+ inst = brw_FBL(p, vec1(dst), mask_reg);
/* Quarter control has the effect of magically shifting the value of
* this register so you'll get the first active channel relative to
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index 483672f..45b5f88 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -407,7 +407,8 @@ namespace brw {
const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
const dst_reg dst = vgrf(src.type);
- ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+ ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index,
+ sample_mask_reg());
ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
return src_reg(component(dst, 0));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 2f4ba7b..d923b0b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2041,7 +2041,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
- brw_find_live_channel(p, dst);
+ brw_find_live_channel(p, dst, src[0]);
break;
case SHADER_OPCODE_BROADCAST:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 256abae..63fca6f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1863,7 +1863,7 @@ generate_code(struct brw_codegen *p,
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
- brw_find_live_channel(p, dst);
+ brw_find_live_channel(p, dst, brw_null_reg());
break;
case SHADER_OPCODE_BROADCAST:
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list