[Mesa-dev] [PATCH 03/15] i965/fs: Use MOV_INDIRECT for all indirect uniform loads

Jason Ekstrand jason at jlekstrand.net
Thu Dec 10 11:23:09 PST 2015


On Wed, Dec 9, 2015 at 8:33 PM, Matt Turner <mattst88 at gmail.com> wrote:
> On Wed, Dec 9, 2015 at 8:23 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>> Instead of using reladdr, this commit changes the FS backend to emit a
>> MOV_INDIRECT whenever we need an indirect uniform load.  We also have to
>> rework some of the other bits of the backend to handle this new form of
>> uniform load.  The obvious change is that demote_pull_constants now acts
>> more like a lowering pass when it hits a MOV_INDIRECT.
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs.cpp     | 72 +++++++++++++++++++-------------
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 53 ++++++++++++++++++-----
>>  2 files changed, 86 insertions(+), 39 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index bf446d2..7cc03c5 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -1945,8 +1945,8 @@ fs_visitor::assign_constant_locations()
>>           if (inst->src[i].file != UNIFORM)
>>              continue;
>>
>> -         if (inst->src[i].reladdr) {
>> -            int uniform = inst->src[i].nr;
>> +         if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
>> +            int uniform = inst->src[0].nr;
>>
>>              /* If this array isn't already present in the pull constant buffer,
>>               * add it.
>> @@ -2028,49 +2028,63 @@ fs_visitor::assign_constant_locations()
>>  void
>>  fs_visitor::demote_pull_constants()
>>  {
>> -   foreach_block_and_inst (block, fs_inst, inst, cfg) {
>> +   const unsigned index = stage_prog_data->binding_table.pull_constants_start;
>> +
>> +   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
>> +      /* Set up the annotation tracking for new generated instructions. */
>> +      const fs_builder ibld(this, block, inst);
>> +
>>        for (int i = 0; i < inst->sources; i++) {
>>          if (inst->src[i].file != UNIFORM)
>>             continue;
>>
>> -         int pull_index;
>> +         /* We'll handle this case later */
>> +         if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0)
>> +            continue;
>> +
>>           unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
>> -         if (location >= uniforms) /* Out of bounds access */
>> -            pull_index = -1;
>> -         else
>> -            pull_index = pull_constant_loc[location];
>> +         if (location >= uniforms)
>> +            continue; /* Out of bounds access */
>> +
>> +         int pull_index = pull_constant_loc[location];
>>
>>           if (pull_index == -1)
>>             continue;
>>
>> -         /* Set up the annotation tracking for new generated instructions. */
>> -         const fs_builder ibld(this, block, inst);
>> -         const unsigned index = stage_prog_data->binding_table.pull_constants_start;
>> -         fs_reg dst = vgrf(glsl_type::float_type);
>> -
>>           assert(inst->src[i].stride == 0);
>>
>> -         /* Generate a pull load into dst. */
>> -         if (inst->src[i].reladdr) {
>> -            VARYING_PULL_CONSTANT_LOAD(ibld, dst,
>> -                                       brw_imm_ud(index),
>> -                                       *inst->src[i].reladdr,
>> -                                       pull_index * 4);
>> -            inst->src[i].reladdr = NULL;
>> -            inst->src[i].stride = 1;
>> -         } else {
>> -            const fs_builder ubld = ibld.exec_all().group(8, 0);
>> -            struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
>> -            ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
>> -                      dst, brw_imm_ud(index), offset);
>> -            inst->src[i].set_smear(pull_index & 3);
>> -         }
>> -         brw_mark_surface_used(prog_data, index);
>> +         fs_reg dst = vgrf(glsl_type::float_type);
>> +         const fs_builder ubld = ibld.exec_all().group(8, 0);
>> +         struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
>> +         ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
>> +                   dst, brw_imm_ud(index), offset);
>>
>>           /* Rewrite the instruction to use the temporary VGRF. */
>>           inst->src[i].file = VGRF;
>>           inst->src[i].nr = dst.nr;
>>           inst->src[i].reg_offset = 0;
>> +         inst->src[i].set_smear(pull_index & 3);
>> +
>> +         brw_mark_surface_used(prog_data, index);
>> +      }
>> +
>> +      if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
>> +          inst->src[0].file == UNIFORM) {
>> +
>> +         unsigned location = inst->src[0].nr + inst->src[0].reg_offset;
>> +         if (location >= uniforms)
>> +            continue; /* Out of bounds access */
>> +
>> +         int pull_index = pull_constant_loc[location];
>> +         assert(pull_index >= 0); /* This had better be pull */
>> +
>> +         VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
>> +                                    brw_imm_ud(index),
>> +                                    inst->src[1],
>> +                                    pull_index * 4);
>> +         inst->remove(block);
>> +
>> +         brw_mark_surface_used(prog_data, index);
>>        }
>>     }
>>     invalidate_live_intervals();
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 15d9b1c..bf239c3 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -1136,6 +1136,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
>>  {
>>     fs_reg image(UNIFORM, deref->var->data.driver_location / 4,
>>                  BRW_REGISTER_TYPE_UD);
>> +   fs_reg indirect;
>> +   unsigned indirect_max = 0;
>>
>>     for (const nir_deref *tail = &deref->deref; tail->child;
>>          tail = tail->child) {
>> @@ -1147,7 +1149,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
>>        image = offset(image, bld, base * element_size);
>>
>>        if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
>> -         fs_reg tmp = vgrf(glsl_type::int_type);
>> +         fs_reg tmp = vgrf(glsl_type::uint_type);
>>
>>           if (devinfo->gen == 7 && !devinfo->is_haswell) {
>>              /* IVB hangs when trying to access an invalid surface index with
>> @@ -1166,14 +1168,29 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
>>           }
>>
>>           bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4));
>> -         if (image.reladdr)
>> -            bld.ADD(*image.reladdr, *image.reladdr, tmp);
>> -         else
>> -            image.reladdr = new(mem_ctx) fs_reg(tmp);
>> +         if (indirect.file == BAD_FILE) {
>> +            indirect = tmp;
>> +            indirect_max = type_size_scalar(tail->type) - BRW_IMAGE_PARAM_SIZE;
>> +         } else {
>> +            bld.ADD(indirect, indirect, tmp);
>> +         }
>>        }
>>     }
>>
>> -   return image;
>> +   if (indirect.file == BAD_FILE) {
>> +      return image;
>> +   } else {
>> +      /* Emit a pile of MOVs to load the uniform into a temporary.  The
>> +       * dead-code elimination pass will get rid of what we don't use.
>> +       */
>> +      fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE);
>> +      for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) {
>> +         bld.emit(SHADER_OPCODE_MOV_INDIRECT,
>> +                  offset(tmp, bld, j), offset(image, bld, j),
>> +                  indirect, brw_imm_ud((indirect_max + 1) * 4));
>> +      }
>> +      return tmp;
>> +   }
>>  }
>>
>>  void
>> @@ -2302,12 +2319,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
>>           /* Offsets are in bytes but they should always be multiples of 4 */
>>           assert(const_offset->u[0] % 4 == 0);
>>           src.reg_offset = const_offset->u[0] / 4;
>> +
>> +         for (unsigned j = 0; j < instr->num_components; j++) {
>> +            bld.MOV(offset(dest, bld, j), offset(src, bld, j));
>> +         }
>>        } else {
>> -         src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
>> -      }
>> +         fs_reg indirect = retype(get_nir_src(instr->src[0]),
>> +                                  BRW_REGISTER_TYPE_UD);
>>
>> -      for (unsigned j = 0; j < instr->num_components; j++) {
>> -         bld.MOV(offset(dest, bld, j), offset(src, bld, j));
>> +         /* Se need to pass a size to the MOV_INDIRECT but we don't want it to
>
> s/Se/We/

Fixed. Thanks!


More information about the mesa-dev mailing list