[Mesa-dev] [PATCH 27/57] i965/vec4: Drop backend_reg::in_range() in favor of regions_overlap().
Francisco Jerez
currojerez at riseup.net
Fri Sep 9 20:12:53 UTC 2016
Iago Toral <itoral at igalia.com> writes:
> On Wed, 2016-09-07 at 18:48 -0700, Francisco Jerez wrote:
>> This makes sure that overlap checks are done correctly throughout the
>> back-end when the '*this' register starts before the register/size
>> pair provided as argument, and is actually less annoying to use than
>> in_range() at this point since regions_overlap() takes its size
>> arguments in bytes.
>> ---
>> src/mesa/drivers/dri/i965/brw_shader.cpp | 9 -----
>> ----
>> src/mesa/drivers/dri/i965/brw_shader.h | 1 -
>> src/mesa/drivers/dri/i965/brw_vec4.cpp | 14
>> ++++++++------
>> src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp | 4 ++--
>> src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 4 ++--
>> 5 files changed, 12 insertions(+), 20 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
>> b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> index e599235..951e6b2 100644
>> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> @@ -745,15 +745,6 @@ backend_reg::is_accumulator() const
>> }
>>
>> bool
>> -backend_reg::in_range(const backend_reg &r, unsigned n) const
>> -{
>> - return (file == r.file &&
>> - nr == r.nr &&
>> - offset >= r.offset &&
>> - offset < r.offset + n * REG_SIZE);
>> -}
>> -
>> -bool
>> backend_instruction::is_commutative() const
>> {
>> switch (opcode) {
>> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h
>> b/src/mesa/drivers/dri/i965/brw_shader.h
>> index 0de0808..ba2404a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_shader.h
>> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
>> @@ -62,7 +62,6 @@ struct backend_reg : private brw_reg
>> bool is_negative_one() const;
>> bool is_null() const;
>> bool is_accumulator() const;
>> - bool in_range(const backend_reg &r, unsigned n) const;
>>
>> /** Offset from the start of the (virtual) register in bytes. */
>> uint16_t offset;
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index 561170c..eb2888f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -1143,7 +1143,8 @@ vec4_visitor::opt_register_coalesce()
>> inst) {
>> _scan_inst = scan_inst;
>>
>> - if (inst->src[0].in_range(scan_inst->dst,
>> DIV_ROUND_UP(scan_inst->size_written, REG_SIZE))) {
>> + if (regions_overlap(inst->src[0], inst->size_read(0),
>> + scan_inst->dst, scan_inst-
>> >size_written)) {
>> /* Found something writing to the reg we want to
>> coalesce away. */
>> if (to_mrf) {
>> /* SEND instructions can't have MRF as a destination.
>> */
>> @@ -1197,8 +1198,8 @@ vec4_visitor::opt_register_coalesce()
>> */
>> bool interfered = false;
>> for (int i = 0; i < 3; i++) {
>> - if (inst->src[0].in_range(scan_inst->src[i],
>> - DIV_ROUND_UP(scan_inst-
>> >size_read(i), REG_SIZE)))
>> + if (regions_overlap(inst->src[0], inst->size_read(0),
>> + scan_inst->src[i], scan_inst-
>> >size_read(i)))
>> interfered = true;
>> }
>> if (interfered)
>> @@ -1207,7 +1208,8 @@ vec4_visitor::opt_register_coalesce()
>> /* If somebody else writes the same channels of our
>> destination here,
>> * we can't coalesce before that.
>> */
>> - if (inst->dst.in_range(scan_inst->dst,
>> DIV_ROUND_UP(scan_inst->size_written, REG_SIZE)) &&
>> + if (regions_overlap(inst->dst, inst->size_written,
>> + scan_inst->dst, scan_inst-
>> >size_written) &&
>> (inst->dst.writemask & scan_inst->dst.writemask) != 0)
>> {
>> break;
>> }
>> @@ -1223,8 +1225,8 @@ vec4_visitor::opt_register_coalesce()
>> }
>> } else {
>> for (int i = 0; i < 3; i++) {
>> - if (inst->dst.in_range(scan_inst->src[i],
>> - DIV_ROUND_UP(scan_inst-
>> >size_read(i), REG_SIZE)))
>> + if (regions_overlap(inst->dst, inst->size_written,
>> + scan_inst->src[i], scan_inst-
>> >size_read(i)))
>> interfered = true;
>> }
>> if (interfered)
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
>> b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
>> index e74bc15..9977317 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
>> @@ -68,8 +68,8 @@ opt_cmod_propagation_local(bblock_t *block)
>>
>> bool read_flag = false;
>> foreach_inst_in_block_reverse_starting_from(vec4_instruction,
>> scan_inst, inst) {
>> - if (inst->src[0].in_range(scan_inst->dst,
>> - DIV_ROUND_UP(scan_inst-
>> >size_written, REG_SIZE))) {
>> + if (regions_overlap(inst->src[0], inst->size_read(0),
>> + scan_inst->dst, scan_inst-
>> >size_written)) {
>> if ((scan_inst->predicate && scan_inst->opcode !=
>> BRW_OPCODE_SEL) ||
>> scan_inst->dst.offset / REG_SIZE != inst-
>> >src[0].offset / REG_SIZE ||
>> (scan_inst->dst.writemask != WRITEMASK_X &&
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
>> b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
>> index 777d252..fe76dea 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
>> @@ -72,8 +72,8 @@ is_channel_updated(vec4_instruction *inst, src_reg
>> *values[4], int ch)
>> if (!src || src->file != VGRF)
>> return false;
>>
>> - return (src->in_range(inst->dst, DIV_ROUND_UP(inst->size_written,
>> REG_SIZE)) &&
>> - inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle,
>> ch)));
>> + return regions_overlap(*src, REG_SIZE, inst->dst, inst-
>> >size_written) &&
>> + inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle,
>> ch));
>
> I suppose this (assuming that src always reads REG_SIZE) can be bogus,
> at least for fp64 programs, right? In any case, this would not be a
> problem introduced by this patch since the previous implementation
> would fail in the same case.
>
> If I am right, then I suppose that in order to fix this we would need
> to store the real size read for each value in copy_entry.
>
Yeah, that's very definitely bogus for both non-32b types or non-32B
copies. Storing the size or number of components copied in copy_entry
may be useful but you'll likely also have to check the type sizes in
order to fix it properly.
>> }
>>
>> static bool
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160909/7223e3a5/attachment-0001.sig>
More information about the mesa-dev
mailing list