[Mesa-dev] [PATCH 08/15] i965/vec4: Use MOV_INDIRECT instead of reladdr for indirect push constants

Jason Ekstrand jason at jlekstrand.net
Thu Dec 10 11:27:50 PST 2015


On Thu, Dec 10, 2015 at 1:25 AM, Michael Schellenberger Costa
<mschellenbergercosta at googlemail.com> wrote:
> Hi Jason,

Hi!  Please remember to reply-all so it goes to the list. :-)

> Am 10.12.2015 um 05:23 schrieb Jason Ekstrand:
>> This commit moves us to an instruction based model rather than a
>> register-based model for indirects.  This is more accurate anyway as we
>> have to emit instructions to resolve the reladdr.  It's also a lot simpler
>> because it gets rid of the recursive reladdr problem by design.
>>
>> One side-effect of this is that we need a whole new algorithm in
>> move_uniform_array_access_to_pull_constants.  This new algorithm is much
>> more straightforward than the old one and is fairly similar to what we're
>> already doing in the FS backend.
>> ---
>>  src/mesa/drivers/dri/i965/brw_vec4.cpp         |  2 +-
>>  src/mesa/drivers/dri/i965/brw_vec4.h           |  3 +-
>>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp     | 10 +--
>>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 86 ++++++++++++--------------
>>  4 files changed, 50 insertions(+), 51 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index a697bdf..e4a405b 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -775,7 +775,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
>>        dst_reg temp = dst_reg(this, glsl_type::vec4_type);
>>
>>        emit_pull_constant_load(block, inst, temp, inst->src[i],
>> -                              pull_constant_loc[uniform]);
>> +                              pull_constant_loc[uniform], src_reg());
>>
>>        inst->src[i].file = temp.file;
>>           inst->src[i].nr = temp.nr;
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
>> index f2e5ce1..e6d6c82 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
>> @@ -293,7 +293,8 @@ public:
>>     void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
>>                               dst_reg dst,
>>                               src_reg orig_src,
>> -                             int base_offset);
>> +                             int base_offset,
>> +                                src_reg indirect);
>>     void emit_pull_constant_load_reg(dst_reg dst,
>>                                      src_reg surf_index,
>>                                      src_reg offset,
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> index f965b39..58b6612 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> @@ -673,12 +673,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>           /* Offsets are in bytes but they should always be multiples of 16 */
>>           assert(const_offset->u[0] % 16 == 0);
>>           src.reg_offset = const_offset->u[0] / 16;
>> +
>> +         emit(MOV(dest, src));
>>        } else {
>> -         src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
>> -         src.reladdr = new(mem_ctx) src_reg(tmp);
>> -      }
>> +         src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
>>
>> -      emit(MOV(dest, src));
>> +         emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
>> +              indirect, brw_imm_ud(instr->const_index[1]));
>> +      }
>>        break;
>>     }
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> index 7712d34..e7ab536 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> @@ -1641,16 +1641,16 @@ vec4_visitor::move_grf_array_access_to_scratch()
>>  void
>>  vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
>>                                     dst_reg temp, src_reg orig_src,
>> -                                   int base_offset)
>> +                                   int base_offset, src_reg indirect)
>>  {
>>     int reg_offset = base_offset + orig_src.reg_offset;
>>     const unsigned index = prog_data->base.binding_table.pull_constants_start;
>>
>>     src_reg offset;
>> -   if (orig_src.reladdr) {
>> +   if (indirect.file != BAD_FILE) {
>>        offset = src_reg(this, glsl_type::int_type);
>>
>> -      emit_before(block, inst, ADD(dst_reg(offset), *orig_src.reladdr,
>> +      emit_before(block, inst, ADD(dst_reg(offset), indirect,
>>                                     brw_imm_d(reg_offset * 16)));
>>     } else if (devinfo->gen >= 8) {
>>        /* Store the offset in a GRF so we can send-from-GRF. */
>> @@ -1685,59 +1685,55 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
>>  {
>>     int pull_constant_loc[this->uniforms];
>>     memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
>> -   bool nested_reladdr;
>>
>> -   /* Walk through and find array access of uniforms.  Put a copy of that
>> -    * uniform in the pull constant buffer.
>> -    *
>> -    * Note that we don't move constant-indexed accesses to arrays.  No
>> -    * testing has been done of the performance impact of this choice.
>> +   /* First, walk through the instructions and determine which things need to
>> +    * be pulled.  We mark something as needing to bye pulled by setting
>
> to be pulled

Thanks.  Fixed!

>> +    * pull_constant_loc to 0.
>>      */
>> -   do {
>> -      nested_reladdr = false;
>> -
>> -      foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
>> -         for (int i = 0 ; i < 3; i++) {
>> -            if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
>> -               continue;
>> +   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
>> +      /* We only care about MOV_INDIRECT of a uniform */
>> +      if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
>> +          inst->src[0].file != UNIFORM)
>> +         continue;
>>
>> -            int uniform = inst->src[i].nr;
>> +      int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
>
> In 03/15 you have
> unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
>
> Maybe use the same type/name?

Yeah, we could.  We aren't terribly consistent about it.  In general
(I've edited most of the places recently), we use "uniform",
"location", and "constant_nr".  I can switch to one of the others if
you'd like, but we're not really consistent.

>>
>> -            if (inst->src[i].reladdr->reladdr)
>> -               nested_reladdr = true;  /* will need another pass */
>> +      for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++)
>> +         pull_constant_loc[uniform_nr + j] = 0;
>> +   }
>>
>> -            /* If this array isn't already present in the pull constant buffer,
>> -             * add it.
>> -             */
>> -            if (pull_constant_loc[uniform] == -1) {
>> -               const gl_constant_value **values =
>> -                  &stage_prog_data->param[uniform * 4];
>> +   /* Next, we walk the list of uniforms and assign real pull constant
>> +    * locations and set their corresponding entries in pull_param.
>> +    */
>> +   for (int j = 0; j < this->uniforms; j++) {
>> +      if (pull_constant_loc[j] < 0)
>> +         continue;
>>
>> -               pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
>> +      pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4;
>>
>> -               assert(uniform < uniform_array_size);
>> -               for (int j = 0; j < uniform_size[uniform] * 4; j++) {
>> -                  stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
>> -                     = values[j];
>> -               }
>> -            }
>> +      for (int i = 0; i < 4; i++) {
> Is that 4 correct?

Yes, each uniform is a vec4 so it takes up 4 slots.
--Jason

> Michael
>> +         stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
>> +            = stage_prog_data->param[j * 4 + i];
>> +      }
>> +   }
>>
>> -            /* Set up the annotation tracking for new generated instructions. */
>> -            base_ir = inst->ir;
>> -            current_annotation = inst->annotation;
>> +   /* Finally, we can walk through the instructions and lower MOV_INDIRECT
>> +    * instructions to actual uniform pulls.
>> +    */
>> +   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
>> +      /* We only care about MOV_INDIRECT of a uniform */
>> +      if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
>> +          inst->src[0].file != UNIFORM)
>> +         continue;
>>
>> -            dst_reg temp = dst_reg(this, glsl_type::vec4_type);
>> +      int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
>>
>> -            emit_pull_constant_load(block, inst, temp, inst->src[i],
>> -                                    pull_constant_loc[uniform]);
>> +      assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
>>
>> -            inst->src[i].file = temp.file;
>> -            inst->src[i].nr = temp.nr;
>> -            inst->src[i].reg_offset = temp.reg_offset;
>> -            inst->src[i].reladdr = NULL;
>> -         }
>> -      }
>> -   } while (nested_reladdr);
>> +      emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
>> +                              pull_constant_loc[uniform_nr], inst->src[1]);
>> +      inst->remove(block);
>> +   }
>>
>>     /* Now there are no accesses of the UNIFORM file with a reladdr, so
>>      * no need to track them as larger-than-vec4 objects.  This will be
>>


More information about the mesa-dev mailing list