[Mesa-dev] [PATCH 15/19] i965/fs: Support register coalescing on LOAD_PAYLOAD operands.
Kenneth Graunke
kenneth at whitecape.org
Sun Jun 1 00:01:50 PDT 2014
On Friday, April 18, 2014 11:56:51 AM Matt Turner wrote:
> ---
> .../drivers/dri/i965/brw_fs_register_coalesce.cpp | 59
++++++++++++++++++----
> 1 file changed, 49 insertions(+), 10 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> index cb4ab57..a363c4c 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> @@ -46,7 +46,14 @@
> static bool
> is_nop_mov(const fs_inst *inst)
> {
> - if (inst->opcode == BRW_OPCODE_MOV) {
> + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
> + for (int i = 0; i < inst->sources; i++) {
> + if (!inst->dst.equals(inst->src[i])) {
I don't think this is what you want. You're not altering dst.reg_offset,
here, so dst.reg_offset will presumably always be 0.
For dst.equals(inst->src[i]) to be true for every source, then
dst.reg == src[i].reg for all sources.
dst.reg_offset == src[i].reg_offset for all sources.
which means src[i].reg_offset == 0 for all sources.
In other words, I think this detects LOAD_PAYLOAD operations that splats the
first component of a size > 1 VGRF dst to all components.
That's definitely not a nop mov.
I think if you add dst.reg_offset++ on each iteration of the loop, you'll be
okay here.
> + return false;
> + }
> + }
> + return true;
> + } else if (inst->opcode == BRW_OPCODE_MOV) {
> return inst->dst.equals(inst->src[0]);
> }
>
> @@ -54,9 +61,26 @@ is_nop_mov(const fs_inst *inst)
> }
>
> static bool
> +is_copy_payload(const fs_inst *inst)
> +{
> + const int reg = inst->src[0].reg;
> + if (inst->src[0].reg_offset != 0)
> + return false;
This looks okay, for both src[0] == reg_undef and where it's an actual value.
> +
> + for (int i = 1; i < inst->sources; i++) {
> + if (inst->src[i].reg != reg ||
> + inst->src[i].reg_offset != i) {
> + return false;
> + }
> + }
> + return true;
> +}
> +
> +static bool
> is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes)
> {
> - if (inst->opcode != BRW_OPCODE_MOV ||
> + if ((inst->opcode != BRW_OPCODE_MOV &&
> + inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) ||
> inst->is_partial_write() ||
> inst->saturate ||
> inst->src[0].file != GRF ||
> @@ -72,6 +96,12 @@ is_coalesce_candidate(const fs_inst *inst, const int
*virtual_grf_sizes)
> virtual_grf_sizes[inst->dst.reg])
> return false;
>
> + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
> + if (!is_copy_payload(inst)) {
> + return false;
> + }
> + }
> +
> return true;
> }
>
> @@ -144,10 +174,18 @@ fs_visitor::register_coalesce()
> if (reg_to != inst->dst.reg)
> continue;
>
> - const int offset = inst->src[0].reg_offset;
> - reg_to_offset[offset] = inst->dst.reg_offset;
> - mov[offset] = inst;
> - channels_remaining--;
> + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
> + for (int i = 0; i < src_size; i++) {
Isn't src_size always 1 for LOAD_PAYLOAD? Earlier:
src_size = virtual_grf_sizes[inst->src[0].reg];
Presumably you want inst->sources here...
> + reg_to_offset[i] = i;
> + }
> + mov[0] = inst;
> + channels_remaining -= inst->sources;
> + } else {
> + const int offset = inst->src[0].reg_offset;
> + reg_to_offset[offset] = inst->dst.reg_offset;
> + mov[offset] = inst;
> + channels_remaining--;
> + }
>
> if (channels_remaining)
> continue;
> @@ -169,15 +207,16 @@ fs_visitor::register_coalesce()
> continue;
>
> progress = true;
> + bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD;
>
> for (int i = 0; i < src_size; i++) {
> if (mov[i]) {
> mov[i]->opcode = BRW_OPCODE_NOP;
> mov[i]->conditional_mod = BRW_CONDITIONAL_NONE;
> mov[i]->dst = reg_undef;
> - mov[i]->src[0] = reg_undef;
> - mov[i]->src[1] = reg_undef;
> - mov[i]->src[2] = reg_undef;
> + for (int j = 0; j < mov[i]->sources; j++) {
> + mov[i]->src[j] = reg_undef;
> + }
> }
> }
>
> @@ -185,7 +224,7 @@ fs_visitor::register_coalesce()
> fs_inst *scan_inst = (fs_inst *)node;
>
> for (int i = 0; i < src_size; i++) {
> - if (mov[i]) {
> + if (mov[i] || was_load_payload) {
> if (scan_inst->dst.file == GRF &&
> scan_inst->dst.reg == reg_from &&
> scan_inst->dst.reg_offset == i) {
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140601/8a3fd73b/attachment-0001.sig>
More information about the mesa-dev
mailing list