[Mesa-dev] [PATCH v2 41/41] SQUASH: i965/fs: Force a high register for the final FB write
Jason Ekstrand
jason at jlekstrand.net
Fri Sep 26 12:24:59 PDT 2014
v2: Renamed the array for the range mappings and added a comment.
---
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 34 ++++++++++++++++++++++-
src/mesa/drivers/dri/i965/intel_screen.h | 10 +++++++
2 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 246d27c..477efe1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -113,6 +113,10 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
class_sizes[class_count++] = 8;
}
+ memset(screen->wm_reg_sets[index].class_to_ra_reg_range, 0,
+ sizeof(screen->wm_reg_sets[index].class_to_ra_reg_range));
+ int *class_to_ra_reg_range = screen->wm_reg_sets[index].class_to_ra_reg_range;
+
/* Compute the total number of registers across all classes. */
int ra_reg_count = 0;
for (int i = 0; i < class_count; i++) {
@@ -131,6 +135,14 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
} else {
ra_reg_count += base_reg_count - (class_sizes[i] - 1);
}
+ /* Mark the last register. We'll fill in the beginnings later. */
+ class_to_ra_reg_range[class_sizes[i]] = ra_reg_count;
+ }
+
+ /* Fill out the rest of the range markers */
+ for (int i = 1; i < 17; ++i) {
+ if (class_to_ra_reg_range[i] == 0)
+ class_to_ra_reg_range[i] = class_to_ra_reg_range[i-1];
}
uint8_t *ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count);
@@ -504,9 +516,29 @@ fs_visitor::assign_regs(bool allow_spilling)
}
setup_payload_interference(g, payload_node_count, first_payload_node);
- if (brw->gen >= 7)
+ if (brw->gen >= 7) {
setup_mrf_hack_interference(g, first_mrf_hack_node);
+ foreach_in_list(fs_inst, inst, &instructions) {
+ /* When we do send-from-GRF for FB writes, we need to ensure that
+ * the last write instruction sends from a high register. This is
+ * because the vertex fetcher wants to start filling the low
+ * payload registers while the pixel data port is still working on
+ * writing out the memory. If we don't do this, we get rendering
+ * artifacts.
+ *
+ * We could just do "something high". Instead, we just pick the
+ * highest register that works.
+ */
+ if (inst->opcode == FS_OPCODE_FB_WRITE && inst->eot) {
+ int size = virtual_grf_sizes[inst->src[0].reg];
+ int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
+ ra_set_node_reg(g, inst->src[0].reg, reg);
+ break;
+ }
+ }
+ }
+
if (dispatch_width > 8) {
/* In 16-wide dispatch we have an issue where a compressed
* instruction is actually two instructions executed simultaneiously.
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index 945f6f5..88a84a2 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -90,6 +90,16 @@ struct intel_screen
int classes[16];
/**
+ * Mapping from classes to ra_reg ranges. Each of the per-size
+ * classes corresponds to a range of ra_reg nodes. This array stores
+ * those ranges in the form of first ra_reg in each class and the
+ * total number of ra_reg elements in the last array element. This
+ * way the range of the i'th class is given by:
+ * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
+ */
+ int class_to_ra_reg_range[17];
+
+ /**
* Mapping for register-allocated objects in *regs to the first
* GRF for that object.
*/
--
2.1.0
More information about the mesa-dev
mailing list