[Mesa-dev] [PATCH 27/57] i965/vec4: Drop backend_reg::in_range() in favor of regions_overlap().

Francisco Jerez currojerez at riseup.net
Fri Sep 9 20:12:53 UTC 2016


Iago Toral <itoral at igalia.com> writes:

> On Wed, 2016-09-07 at 18:48 -0700, Francisco Jerez wrote:
>> This makes sure that overlap checks are done correctly throughout the
>> back-end when the '*this' register starts before the register/size
>> pair provided as argument, and is actually less annoying to use than
>> in_range() at this point since regions_overlap() takes its size
>> arguments in bytes.
>> ---
>>  src/mesa/drivers/dri/i965/brw_shader.cpp                |  9 -----
>> ----
>>  src/mesa/drivers/dri/i965/brw_shader.h                  |  1 -
>>  src/mesa/drivers/dri/i965/brw_vec4.cpp                  | 14
>> ++++++++------
>>  src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp |  4 ++--
>>  src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp |  4 ++--
>>  5 files changed, 12 insertions(+), 20 deletions(-)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
>> b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> index e599235..951e6b2 100644
>> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> @@ -745,15 +745,6 @@ backend_reg::is_accumulator() const
>>  }
>>  
>>  bool
>> -backend_reg::in_range(const backend_reg &r, unsigned n) const
>> -{
>> -   return (file == r.file &&
>> -           nr == r.nr &&
>> -           offset >= r.offset &&
>> -           offset < r.offset + n * REG_SIZE);
>> -}
>> -
>> -bool
>>  backend_instruction::is_commutative() const
>>  {
>>     switch (opcode) {
>> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h
>> b/src/mesa/drivers/dri/i965/brw_shader.h
>> index 0de0808..ba2404a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_shader.h
>> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
>> @@ -62,7 +62,6 @@ struct backend_reg : private brw_reg
>>     bool is_negative_one() const;
>>     bool is_null() const;
>>     bool is_accumulator() const;
>> -   bool in_range(const backend_reg &r, unsigned n) const;
>>  
>>     /** Offset from the start of the (virtual) register in bytes. */
>>     uint16_t offset;
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index 561170c..eb2888f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -1143,7 +1143,8 @@ vec4_visitor::opt_register_coalesce()
>>                                                    inst) {
>>           _scan_inst = scan_inst;
>>  
>> -         if (inst->src[0].in_range(scan_inst->dst,
>> DIV_ROUND_UP(scan_inst->size_written, REG_SIZE))) {
>> +         if (regions_overlap(inst->src[0], inst->size_read(0),
>> +                             scan_inst->dst, scan_inst-
>> >size_written)) {
>>              /* Found something writing to the reg we want to
>> coalesce away. */
>>              if (to_mrf) {
>>                 /* SEND instructions can't have MRF as a destination.
>> */
>> @@ -1197,8 +1198,8 @@ vec4_visitor::opt_register_coalesce()
>>            */
>>  	 bool interfered = false;
>>  	 for (int i = 0; i < 3; i++) {
>> -            if (inst->src[0].in_range(scan_inst->src[i],
>> -                                      DIV_ROUND_UP(scan_inst-
>> >size_read(i), REG_SIZE)))
>> +            if (regions_overlap(inst->src[0], inst->size_read(0),
>> +                                scan_inst->src[i], scan_inst-
>> >size_read(i)))
>>  	       interfered = true;
>>  	 }
>>  	 if (interfered)
>> @@ -1207,7 +1208,8 @@ vec4_visitor::opt_register_coalesce()
>>           /* If somebody else writes the same channels of our
>> destination here,
>>            * we can't coalesce before that.
>>            */
>> -         if (inst->dst.in_range(scan_inst->dst,
>> DIV_ROUND_UP(scan_inst->size_written, REG_SIZE)) &&
>> +         if (regions_overlap(inst->dst, inst->size_written,
>> +                             scan_inst->dst, scan_inst-
>> >size_written) &&
>>               (inst->dst.writemask & scan_inst->dst.writemask) != 0)
>> {
>>              break;
>>           }
>> @@ -1223,8 +1225,8 @@ vec4_visitor::opt_register_coalesce()
>>              }
>>           } else {
>>              for (int i = 0; i < 3; i++) {
>> -               if (inst->dst.in_range(scan_inst->src[i],
>> -                                      DIV_ROUND_UP(scan_inst-
>> >size_read(i), REG_SIZE)))
>> +               if (regions_overlap(inst->dst, inst->size_written,
>> +                                   scan_inst->src[i], scan_inst-
>> >size_read(i)))
>>                    interfered = true;
>>              }
>>              if (interfered)
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
>> b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
>> index e74bc15..9977317 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
>> @@ -68,8 +68,8 @@ opt_cmod_propagation_local(bblock_t *block)
>>  
>>        bool read_flag = false;
>>        foreach_inst_in_block_reverse_starting_from(vec4_instruction,
>> scan_inst, inst) {
>> -         if (inst->src[0].in_range(scan_inst->dst,
>> -                                   DIV_ROUND_UP(scan_inst-
>> >size_written, REG_SIZE))) {
>> +         if (regions_overlap(inst->src[0], inst->size_read(0),
>> +                             scan_inst->dst, scan_inst-
>> >size_written)) {
>>              if ((scan_inst->predicate && scan_inst->opcode !=
>> BRW_OPCODE_SEL) ||
>>                  scan_inst->dst.offset / REG_SIZE != inst-
>> >src[0].offset / REG_SIZE ||
>>                  (scan_inst->dst.writemask != WRITEMASK_X &&
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
>> b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
>> index 777d252..fe76dea 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
>> @@ -72,8 +72,8 @@ is_channel_updated(vec4_instruction *inst, src_reg
>> *values[4], int ch)
>>     if (!src || src->file != VGRF)
>>        return false;
>>  
>> -   return (src->in_range(inst->dst, DIV_ROUND_UP(inst->size_written, 
>> REG_SIZE)) &&
>> -           inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle,
>> ch)));
>> +   return regions_overlap(*src, REG_SIZE, inst->dst, inst-
>> >size_written) &&
>> +          inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle,
>> ch));
>
> I suppose this (assuming that src always reads REG_SIZE) can be bogus,
> at least for fp64 programs, right? In any case, this would not be a
> problem introduced by this patch since the previous implementation
> would fail in the same case.
>
> If I am right, then I suppose that in order to fix this we would need
> to store the real size read for each value in copy_entry.
>
Yeah, that's very definitely bogus for both non-32b types or non-32B
copies.  Storing the size or number of components copied in copy_entry
may be useful but you'll likely also have to check the type sizes in
order to fix it properly.

>>  }
>>  
>>  static bool
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160909/7223e3a5/attachment-0001.sig>


More information about the mesa-dev mailing list