[Mesa-dev] [PATCH 5/7] i965/fs: Don't renumber UNIFORM registers.

Kenneth Graunke kenneth at whitecape.org
Wed Mar 12 23:56:05 PDT 2014


On 03/12/2014 03:14 AM, Pohjolainen, Topi wrote:
> On Tue, Mar 11, 2014 at 11:48:54PM -0700, Kenneth Graunke wrote:
>> Previously, remove_dead_constants() would renumber the UNIFORM registers
>> to be sequential starting from zero, and the resulting register number
>> would be used directly as an index into the params[] array.
>>
>> This renumbering made it difficult to collect and save information about
>> pull constant locations, since setup_pull_constants() and
>> move_uniform_array_access_to_pull_constants() used different names.
>>
>> This patch generalizes setup_pull_constants() to decide whether each
>> uniform register should be a pull constant, push constant, or neither
>> (because it's unused).  Then, it stores mappings from UNIFORM register
>> numbers to params[] or pull_params[] indices in the push_constant_loc
>> and pull_constant_loc arrays.  (We already did this for pull constants.)
>>
>> Then, assign_curb_setup() just needs to consult the push_constant_loc
>> array to get the real index into the params[] array.
>>
>> This effectively folds all the remove_dead_constants() functionality
>> into assign_constant_locations(), while being less irritable to work
>> with.
>>
>> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs.cpp         | 187 +++++++++++----------------
>>  src/mesa/drivers/dri/i965/brw_fs.h           |  13 +-
>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |   3 +-
>>  3 files changed, 85 insertions(+), 118 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index 3c8237a..5e01e78 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -885,8 +885,8 @@ fs_visitor::import_uniforms(fs_visitor *v)
>>     hash_table_call_foreach(v->variable_ht,
>>  			   import_uniforms_callback,
>>  			   variable_ht);
>> -   this->params_remap = v->params_remap;
>> -   this->nr_params_remap = v->nr_params_remap;
>> +   this->push_constant_loc = v->push_constant_loc;
>> +   this->uniforms = v->uniforms;
>>  }
>>  
>>  /* Our support for uniforms is piggy-backed on the struct
>> @@ -1397,11 +1397,8 @@ fs_visitor::assign_curb_setup()
>>  {
>>     if (dispatch_width == 8) {
>>        c->prog_data.first_curbe_grf = c->nr_payload_regs;
>> -      stage_prog_data->nr_params = uniforms;
>>     } else {
>>        c->prog_data.first_curbe_grf_16 = c->nr_payload_regs;
>> -      /* Make sure we didn't try to sneak in an extra uniform */
>> -      assert(uniforms == 0);
>>     }
>>  
>>     c->prog_data.curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8;
>> @@ -1412,7 +1409,19 @@ fs_visitor::assign_curb_setup()
>>  
>>        for (unsigned int i = 0; i < 3; i++) {
>>  	 if (inst->src[i].file == UNIFORM) {
>> -	    int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
>> +	    int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
>> +            int constant_nr;
>> +            if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
>> +               constant_nr = push_constant_loc[uniform_nr];
>> +            } else {
>> +               /* Section 5.11 of the OpenGL 4.1 spec says:
>> +                * "Out-of-bounds reads return undefined values, which include
>> +                *  values from other variables of the active program or zero."
>> +                * Just return the first push constant.
>> +                */
>> +               constant_nr = 0;
>> +            }
>> +
>>  	    struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
>>  						  constant_nr / 8,
>>  						  constant_nr % 8);
>> @@ -1724,94 +1733,6 @@ fs_visitor::compact_virtual_grfs()
>>     }
>>  }
>>  
>> -bool
>> -fs_visitor::remove_dead_constants()
>> -{
>> -   if (dispatch_width == 8) {
>> -      this->params_remap = ralloc_array(mem_ctx, int, uniforms);
>> -      this->nr_params_remap = uniforms;
>> -
>> -      for (unsigned int i = 0; i < uniforms; i++)
>> -	 this->params_remap[i] = -1;
>> -
>> -      /* Find which params are still in use. */
>> -      foreach_list(node, &this->instructions) {
>> -	 fs_inst *inst = (fs_inst *)node;
>> -
>> -	 for (int i = 0; i < 3; i++) {
>> -	    int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
>> -
>> -	    if (inst->src[i].file != UNIFORM)
>> -	       continue;
>> -
>> -	    /* Section 5.11 of the OpenGL 4.3 spec says:
>> -	     *
>> -	     *     "Out-of-bounds reads return undefined values, which include
>> -	     *     values from other variables of the active program or zero."
>> -	     */
>> -	    if (constant_nr < 0 || constant_nr >= (int)uniforms) {
>> -	       constant_nr = 0;
>> -	    }
>> -
>> -	    /* For now, set this to non-negative.  We'll give it the
>> -	     * actual new number in a moment, in order to keep the
>> -	     * register numbers nicely ordered.
>> -	     */
>> -	    this->params_remap[constant_nr] = 0;
>> -	 }
>> -      }
>> -
>> -      /* Figure out what the new numbers for the params will be.  At some
>> -       * point when we're doing uniform array access, we're going to want
>> -       * to keep the distinction between .reg and .reg_offset, but for
>> -       * now we don't care.
>> -       */
>> -      unsigned int new_nr_params = 0;
>> -      for (unsigned int i = 0; i < uniforms; i++) {
>> -	 if (this->params_remap[i] != -1) {
>> -	    this->params_remap[i] = new_nr_params++;
>> -	 }
>> -      }
>> -
>> -      /* Update the list of params to be uploaded to match our new numbering. */
>> -      for (unsigned int i = 0; i < uniforms; i++) {
>> -	 int remapped = this->params_remap[i];
>> -
>> -	 if (remapped == -1)
>> -	    continue;
>> -
>> -	 stage_prog_data->param[remapped] = stage_prog_data->param[i];
>> -      }
>> -
>> -      uniforms = new_nr_params;
>> -   } else {
>> -      /* This should have been generated in the SIMD8 pass already. */
>> -      assert(this->params_remap);
>> -   }
>> -
>> -   /* Now do the renumbering of the shader to remove unused params. */
>> -   foreach_list(node, &this->instructions) {
>> -      fs_inst *inst = (fs_inst *)node;
>> -
>> -      for (int i = 0; i < 3; i++) {
>> -	 int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
>> -
>> -	 if (inst->src[i].file != UNIFORM)
>> -	    continue;
>> -
>> -	 /* as above alias to 0 */
>> -	 if (constant_nr < 0 || constant_nr >= (int)this->nr_params_remap) {
>> -	    constant_nr = 0;
>> -	 }
>> -	 assert(this->params_remap[constant_nr] != -1);
>> -	 inst->src[i].reg = this->params_remap[constant_nr];
>> -	 inst->src[i].reg_offset = 0;
>> -      }
>> -   }
>> -
>> -   return true;
>> -}
>> -
>>  /*
>>   * Implements array access of uniforms by inserting a
>>   * PULL_CONSTANT_LOAD instruction.
>> @@ -1872,8 +1793,7 @@ fs_visitor::move_uniform_array_access_to_pull_constants()
>>  }
>>  
>>  /**
>> - * Choose accesses from the UNIFORM file to demote to using the pull
>> - * constant buffer.
>> + * Assign UNIFORM file registers to either push constants or pull constants.
>>   *
>>   * We allow a fragment shader to have more than the specified minimum
>>   * maximum number of fragment shader uniform components (64).  If
>> @@ -1882,24 +1802,62 @@ fs_visitor::move_uniform_array_access_to_pull_constants()
>>   * update the program to load them.
>>   */
>>  void
>> -fs_visitor::setup_pull_constants()
>> +fs_visitor::assign_constant_locations()
>>  {
>> -   /* Only allow 16 registers (128 uniform components) as push constants. */
>> -   unsigned int max_uniform_components = 16 * 8;
>> -   if (uniforms <= max_uniform_components)
>> +   /* Only the first compile (SIMD8 mode) gets to decide on locations. */
>> +   if (dispatch_width != 8)
>>        return;
>>  
>> -   /* Just demote the end of the list.  We could probably do better
>> +   /* Find which UNIFORM registers are still in use. */
>> +   bool is_live[uniforms];
>> +   for (unsigned int i = 0; i < uniforms; i++) {
>> +      is_live[i] = false;
>> +   }
>> +
>> +   foreach_list(node, &this->instructions) {
>> +      fs_inst *inst = (fs_inst *) node;
>> +
>> +      for (int i = 0; i < 3; i++) {
>> +         if (inst->src[i].file != UNIFORM)
>> +            continue;
>> +
>> +         int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
>> +         if (constant_nr >= 0 && constant_nr < (int) uniforms)
>> +            is_live[constant_nr] = true;
>> +      }
>> +   }
>> +
>> +   /* Only allow 16 registers (128 uniform components) as push constants.
>> +    *
>> +    * Just demote the end of the list.  We could probably do better
>>      * here, demoting things that are rarely used in the program first.
>>      */
>> -   unsigned int pull_uniform_base = max_uniform_components;
>> +   unsigned int max_push_components = 16 * 8;
>> +   unsigned int num_push_constants = 0;
>>  
>> +   push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
>>     pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
>> +   for (unsigned int i = 0; i < uniforms; i++)
>> +      pull_constant_loc[i] = -1;
>> +
>>     for (unsigned int i = 0; i < uniforms; i++) {
>> -      if (i < pull_uniform_base) {
>> -         pull_constant_loc[i] = -1;
>> +      if (!is_live[i] || pull_constant_loc[i] != -1) {
> 
> I started wondering when the second condition fires and by inspecting code not
> visible in this patch it seems that it cannot. The loop addresses elements in
> the "pull_constant_loc[]" in order and never beyond current index "i".

You are correct.  At this point, it should never trigger.

It will be triggered after the next patch ("i965/fs: Use a single
instance of the pull_constant_loc array.").  I should have introduced it
in that patch, rather than here.

Would you like me to make that change?

Also, thanks for the careful review!

>> +         /* This UNIFORM register is either dead, or has already been demoted
>> +          * to a pull const.  Mark it as no longer living in the param[] array.
>> +          */
>> +         push_constant_loc[i] = -1;
>> +         continue;
>> +      }
>> +
>> +      if (num_push_constants < max_push_components) {
>> +         /* Retain as a push constant.  Record the location in the params[]
>> +          * array.
>> +          */
>> +         push_constant_loc[i] = num_push_constants++;
>>        } else {
>> -         pull_constant_loc[i] = -1;
>> +         /* Demote to a pull constant. */
>> +         push_constant_loc[i] = -1;
>> +
>>           /* If our constant is already being uploaded for reladdr purposes,
>>            * reuse it.
>>            */
> 
> This is the code not visible. I pasted here for clarity.
> 
>             for (unsigned int j = 0; j < stage_prog_data->nr_pull_params; j++) {
>                if (stage_prog_data->pull_param[j] == stage_prog_data->param[i]) {
>                   pull_constant_loc[i] = j;
>                   break;
>                }
>             }
>             if (pull_constant_loc[i] == -1) {
>                int pull_index = stage_prog_data->nr_pull_params++;
>                stage_prog_data->pull_param[pull_index] = stage_prog_data->param[i];
>                pull_constant_loc[i] = pull_index;
>             }
>          }
>       }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140312/8f3c0d48/attachment-0001.pgp>


More information about the mesa-dev mailing list