[Mesa-stable] [PATCH v3 2/3] i965/fs: detect different bit size accesses to uniforms to push them in proper locations
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Wed Mar 1 07:43:47 UTC 2017
On 27/02/17 22:02, Francisco Jerez wrote:
> Samuel Iglesias Gonsálvez <siglesias at igalia.com> writes:
>
>> Previously, if we had accesses with different sizes to the same uniform, we might not
>> push it aligned with the bigger one. This is a problem in BSW/BXT when we access
>> an array of DF uniform with both direct and indirect addressing because for the latter
>> we use 32-bit MOV INDIRECT instructions. However this problem can happen with other
>> generations and bitsizes.
>>
>> Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
>> Cc: "17.0" <mesa-stable at lists.freedesktop.org>
>> ---
>> src/mesa/drivers/dri/i965/brw_fs.cpp | 50 ++++++++++++++++++++++++------------
>> 1 file changed, 34 insertions(+), 16 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index c713caa9b6..68e73cc5cd 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -1853,7 +1853,10 @@ fs_visitor::compact_virtual_grfs()
>> }
>>
>> static void
>> -set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
>> +set_push_pull_constant_loc(unsigned uniform, int *chunk_start,
>> + unsigned *max_chunk_bitsize,
>> + bool contiguous, unsigned bitsize,
>> + const unsigned target_bitsize,
>> int *push_constant_loc, int *pull_constant_loc,
>> unsigned *num_push_constants,
>> unsigned *num_pull_constants,
>
> I still feel like this function is growing over the top trying to
> achieve multiple unrelated things in one place, but if you just want to
> shuffle things around as little as possible for mesa-stable, fine:
>
Yes, that was my intention.
> Reviewed-by: Francisco Jerez <currojerez at riseup.net>
>
Thanks!
Sam
>> @@ -1865,11 +1868,23 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
>> if (*chunk_start < 0)
>> *chunk_start = uniform;
>>
>> + /* Keep track of the maximum bit size access in contiguous uniforms */
>> + *max_chunk_bitsize = MAX2(*max_chunk_bitsize, bitsize);
>> +
>> /* If this element does not need to be contiguous with the next, we
>> * split at this point and everything between chunk_start and u forms a
>> * single chunk.
>> */
>> if (!contiguous) {
>> + /* If bitsize doesn't match the target one, skip it */
>> + if (*max_chunk_bitsize != target_bitsize) {
>> + /* FIXME: right now we only support 32 and 64-bit accesses */
>> + assert(*max_chunk_bitsize == 4 || *max_chunk_bitsize == 8);
>> + *max_chunk_bitsize = 0;
>> + *chunk_start = -1;
>> + return;
>> + }
>> +
>> unsigned chunk_size = uniform - *chunk_start + 1;
>>
>> /* Decide whether we should push or pull this parameter. In the
>> @@ -1887,6 +1902,7 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
>> pull_constant_loc[j] = (*num_pull_constants)++;
>> }
>>
>> + *max_chunk_bitsize = 0;
>> *chunk_start = -1;
>> }
>> }
>> @@ -1909,8 +1925,8 @@ fs_visitor::assign_constant_locations()
>>
>> bool is_live[uniforms];
>> memset(is_live, 0, sizeof(is_live));
>> - bool is_live_64bit[uniforms];
>> - memset(is_live_64bit, 0, sizeof(is_live_64bit));
>> + unsigned bitsize_access[uniforms];
>> + memset(bitsize_access, 0, sizeof(bitsize_access));
>>
>> /* For each uniform slot, a value of true indicates that the given slot and
>> * the next slot must remain contiguous. This is used to keep us from
>> @@ -1947,23 +1963,18 @@ fs_visitor::assign_constant_locations()
>> for (unsigned j = constant_nr; j < last; j++) {
>> is_live[j] = true;
>> contiguous[j] = true;
>> - if (type_sz(inst->src[i].type) == 8) {
>> - is_live_64bit[j] = true;
>> - }
>> + bitsize_access[j] = MAX2(bitsize_access[j], type_sz(inst->src[i].type));
>> }
>> is_live[last] = true;
>> - if (type_sz(inst->src[i].type) == 8) {
>> - is_live_64bit[last] = true;
>> - }
>> + bitsize_access[last] = MAX2(bitsize_access[last], type_sz(inst->src[i].type));
>> } else {
>> if (constant_nr >= 0 && constant_nr < (int) uniforms) {
>> int regs_read = inst->components_read(i) *
>> type_sz(inst->src[i].type) / 4;
>> for (int j = 0; j < regs_read; j++) {
>> is_live[constant_nr + j] = true;
>> - if (type_sz(inst->src[i].type) == 8) {
>> - is_live_64bit[constant_nr + j] = true;
>> - }
>> + bitsize_access[constant_nr + j] =
>> + MAX2(bitsize_access[constant_nr + j], type_sz(inst->src[i].type));
>> }
>> }
>> }
>> @@ -2002,13 +2013,17 @@ fs_visitor::assign_constant_locations()
>> memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc));
>>
>> int chunk_start = -1;
>> + unsigned max_chunk_bitsize = 0;
>>
>> /* First push 64-bit uniforms to ensure they are properly aligned */
>> + const unsigned uniform_64_bit_size = type_sz(BRW_REGISTER_TYPE_DF);
>> for (unsigned u = 0; u < uniforms; u++) {
>> - if (!is_live[u] || !is_live_64bit[u])
>> + if (!is_live[u])
>> continue;
>>
>> - set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
>> + set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
>> + contiguous[u], bitsize_access[u],
>> + uniform_64_bit_size,
>> push_constant_loc, pull_constant_loc,
>> &num_push_constants, &num_pull_constants,
>> max_push_components, max_chunk_size,
>> @@ -2017,15 +2032,18 @@ fs_visitor::assign_constant_locations()
>> }
>>
>> /* Then push the rest of uniforms */
>> + const unsigned uniform_32_bit_size = type_sz(BRW_REGISTER_TYPE_F);
>> for (unsigned u = 0; u < uniforms; u++) {
>> - if (!is_live[u] || is_live_64bit[u])
>> + if (!is_live[u])
>> continue;
>>
>> /* Skip thread_local_id_index to put it in the last push register. */
>> if (thread_local_id_index == (int)u)
>> continue;
>>
>> - set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
>> + set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
>> + contiguous[u], bitsize_access[u],
>> + uniform_32_bit_size,
>> push_constant_loc, pull_constant_loc,
>> &num_push_constants, &num_pull_constants,
>> max_push_components, max_chunk_size,
>> --
>> 2.11.0
>>
>> _______________________________________________
>> mesa-stable mailing list
>> mesa-stable at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 862 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/mesa-stable/attachments/20170301/e9aca9b8/attachment.sig>
More information about the mesa-stable
mailing list