[Mesa-stable] [PATCH v3 2/3] i965/fs: detect different bit size accesses to uniforms to push them in proper locations
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Fri Feb 24 13:35:57 UTC 2017
Previously, if we had accesses with different sizes to the same uniform, we might not
push it aligned with the bigger one. This is a problem in BSW/BXT when we access
an array of DF uniform with both direct and indirect addressing because for the latter
we use 32-bit MOV INDIRECT instructions. However this problem can happen with other
generations and bitsizes.
Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
Cc: "17.0" <mesa-stable at lists.freedesktop.org>
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 50 ++++++++++++++++++++++++------------
1 file changed, 34 insertions(+), 16 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c713caa9b6..68e73cc5cd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1853,7 +1853,10 @@ fs_visitor::compact_virtual_grfs()
}
static void
-set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
+set_push_pull_constant_loc(unsigned uniform, int *chunk_start,
+ unsigned *max_chunk_bitsize,
+ bool contiguous, unsigned bitsize,
+ const unsigned target_bitsize,
int *push_constant_loc, int *pull_constant_loc,
unsigned *num_push_constants,
unsigned *num_pull_constants,
@@ -1865,11 +1868,23 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
if (*chunk_start < 0)
*chunk_start = uniform;
+ /* Keep track of the maximum bit size access in contiguous uniforms */
+ *max_chunk_bitsize = MAX2(*max_chunk_bitsize, bitsize);
+
/* If this element does not need to be contiguous with the next, we
* split at this point and everything between chunk_start and u forms a
* single chunk.
*/
if (!contiguous) {
+ /* If bitsize doesn't match the target one, skip it */
+ if (*max_chunk_bitsize != target_bitsize) {
+ /* FIXME: right now we only support 32 and 64-bit accesses */
+ assert(*max_chunk_bitsize == 4 || *max_chunk_bitsize == 8);
+ *max_chunk_bitsize = 0;
+ *chunk_start = -1;
+ return;
+ }
+
unsigned chunk_size = uniform - *chunk_start + 1;
/* Decide whether we should push or pull this parameter. In the
@@ -1887,6 +1902,7 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
pull_constant_loc[j] = (*num_pull_constants)++;
}
+ *max_chunk_bitsize = 0;
*chunk_start = -1;
}
}
@@ -1909,8 +1925,8 @@ fs_visitor::assign_constant_locations()
bool is_live[uniforms];
memset(is_live, 0, sizeof(is_live));
- bool is_live_64bit[uniforms];
- memset(is_live_64bit, 0, sizeof(is_live_64bit));
+ unsigned bitsize_access[uniforms];
+ memset(bitsize_access, 0, sizeof(bitsize_access));
/* For each uniform slot, a value of true indicates that the given slot and
* the next slot must remain contiguous. This is used to keep us from
@@ -1947,23 +1963,18 @@ fs_visitor::assign_constant_locations()
for (unsigned j = constant_nr; j < last; j++) {
is_live[j] = true;
contiguous[j] = true;
- if (type_sz(inst->src[i].type) == 8) {
- is_live_64bit[j] = true;
- }
+ bitsize_access[j] = MAX2(bitsize_access[j], type_sz(inst->src[i].type));
}
is_live[last] = true;
- if (type_sz(inst->src[i].type) == 8) {
- is_live_64bit[last] = true;
- }
+ bitsize_access[last] = MAX2(bitsize_access[last], type_sz(inst->src[i].type));
} else {
if (constant_nr >= 0 && constant_nr < (int) uniforms) {
int regs_read = inst->components_read(i) *
type_sz(inst->src[i].type) / 4;
for (int j = 0; j < regs_read; j++) {
is_live[constant_nr + j] = true;
- if (type_sz(inst->src[i].type) == 8) {
- is_live_64bit[constant_nr + j] = true;
- }
+ bitsize_access[constant_nr + j] =
+ MAX2(bitsize_access[constant_nr + j], type_sz(inst->src[i].type));
}
}
}
@@ -2002,13 +2013,17 @@ fs_visitor::assign_constant_locations()
memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc));
int chunk_start = -1;
+ unsigned max_chunk_bitsize = 0;
/* First push 64-bit uniforms to ensure they are properly aligned */
+ const unsigned uniform_64_bit_size = type_sz(BRW_REGISTER_TYPE_DF);
for (unsigned u = 0; u < uniforms; u++) {
- if (!is_live[u] || !is_live_64bit[u])
+ if (!is_live[u])
continue;
- set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
+ set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
+ contiguous[u], bitsize_access[u],
+ uniform_64_bit_size,
push_constant_loc, pull_constant_loc,
&num_push_constants, &num_pull_constants,
max_push_components, max_chunk_size,
@@ -2017,15 +2032,18 @@ fs_visitor::assign_constant_locations()
}
/* Then push the rest of uniforms */
+ const unsigned uniform_32_bit_size = type_sz(BRW_REGISTER_TYPE_F);
for (unsigned u = 0; u < uniforms; u++) {
- if (!is_live[u] || is_live_64bit[u])
+ if (!is_live[u])
continue;
/* Skip thread_local_id_index to put it in the last push register. */
if (thread_local_id_index == (int)u)
continue;
- set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
+ set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
+ contiguous[u], bitsize_access[u],
+ uniform_32_bit_size,
push_constant_loc, pull_constant_loc,
&num_push_constants, &num_pull_constants,
max_push_components, max_chunk_size,
--
2.11.0
More information about the mesa-stable
mailing list