[Mesa-dev] [PATCH 18/27] i965/fs: Append ir_binop_ubo_load entries to the gather table

Pohjolainen, Topi topi.pohjolainen at intel.com
Thu May 7 08:36:00 PDT 2015


On Tue, Apr 28, 2015 at 11:08:15PM +0300, Abdiel Janulgue wrote:
> When the const block and offset are immediate values. Otherwise just
> fall-back to the previous method of uploading the UBO constant data to
> GRF using pull constants.
> 
> Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp         | 11 ++++
>  src/mesa/drivers/dri/i965/brw_fs.h           |  4 ++
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 86 +++++++++++++++++++++++++++-
>  3 files changed, 100 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 071ac59..031d807 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -2273,6 +2273,7 @@ fs_visitor::assign_constant_locations()
>     }
>  
>     stage_prog_data->nr_params = 0;
> +   stage_prog_data->nr_ubo_params = ubo_uniforms;
>  
>     unsigned const_reg_access[uniforms];
>     memset(const_reg_access, 0, sizeof(const_reg_access));
> @@ -2302,6 +2303,16 @@ fs_visitor::assign_constant_locations()
>        stage_prog_data->gather_table[p].channel_mask =
>           const_reg_access[i];
>     }
> +
> +   for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) {
> +      int p = stage_prog_data->nr_gather_table++;
> +      stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg;
> +      stage_prog_data->gather_table[p].channel_mask = this->ubo_gather_table[i].channel_mask;
> +      stage_prog_data->gather_table[p].const_block = this->ubo_gather_table[i].const_block;
> +      stage_prog_data->gather_table[p].const_offset = this->ubo_gather_table[i].const_offset;
> +      stage_prog_data->max_ubo_const_block = MAX2(stage_prog_data->max_ubo_const_block,
> +                                                  this->ubo_gather_table[i].const_block);

These are all overflowing 80 columns.

> +   }
>  }
>  
>  /**
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 32063f0..a48b2bb 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -417,6 +417,7 @@ public:
>     void setup_uniform_values(ir_variable *ir);
>     void setup_builtin_uniform_values(ir_variable *ir);
>     int implied_mrf_writes(fs_inst *inst);
> +   bool generate_ubo_gather_table(ir_expression* ir);
>  
>     virtual void dump_instructions();
>     virtual void dump_instructions(const char *name);
> @@ -445,6 +446,9 @@ public:
>     /** Total number of direct uniforms we can get from NIR */
>     unsigned num_direct_uniforms;
>  
> +   /** Number of ubo uniform variable components visited. */
> +   unsigned ubo_uniforms;
> +
>     /** Byte-offset for the next available spot in the scratch space buffer. */
>     unsigned last_scratch;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 4e99366..11e608b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -1179,11 +1179,18 @@ fs_visitor::visit(ir_expression *ir)
>        emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
>        break;
>     case ir_binop_ubo_load: {
> +      /* Use gather push constants if at all possible, otherwise just
> +       * fall back to pull constants for UBOs
> +       */
> +      if (generate_ubo_gather_table(ir))
> +         break;
> +
>        /* This IR node takes a constant uniform block and a constant or
>         * variable byte offset within the block and loads a vector from that.
>         */
>        ir_constant *const_uniform_block = ir->operands[0]->as_constant();
>        ir_constant *const_offset = ir->operands[1]->as_constant();
> +

Not part of this patch.

>        fs_reg surf_index;
>  
>        if (const_uniform_block) {
> @@ -4144,6 +4151,79 @@ fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
>     *reg = neg_result;
>  }
>  
> +bool
> +fs_visitor::generate_ubo_gather_table(ir_expression *ir)
> +{
> +   ir_constant *const_uniform_block = ir->operands[0]->as_constant();
> +   ir_constant *const_offset = ir->operands[1]->as_constant();

These are only used for reading, lets use constant pointers.

> +
> +   if (ir->operation != ir_binop_ubo_load ||
> +       !brw->has_resource_streamer        ||
> +       !brw->fs_ubo_gather                ||
> +       !const_uniform_block               ||

Not really the style used elsewhere, don't align "||".

> +       !const_offset)
> +      return false;
> +
> +  /* Only allow 16 registers (128 uniform components) as push constants.
> +   */

Move the comment closing to the previous line.

> +   unsigned int max_push_components = 16 * 8;
> +   unsigned param_index = uniforms + ubo_uniforms;

These could be both declared as const.

> +   if ((param_index + ir->type->vector_elements) >= max_push_components)
> +      return false;
> +
> +   fs_reg reg;
> +   if (dispatch_width == 16) {
> +      for (int i = 0; i < (int) this->nr_ubo_gather_table; i++) {
> +         if ((this->ubo_gather_table[i].const_block ==
> +              const_uniform_block->value.u[0]) &&
> +             (this->ubo_gather_table[i].const_offset ==
> +              const_offset->value.u[0])) {
> +            reg = fs_reg(UNIFORM, this->ubo_gather_table[i].reg);
> +            reg.type = brw_type_for_base_type(ir->type);
> +            break;
> +         }
> +      }
> +      assert(reg.file == UNIFORM);
> +   }
> +
> +   if (reg.file != UNIFORM) {
> +      reg = fs_reg(UNIFORM, param_index);
> +      int gather = this->nr_ubo_gather_table++;
> +
> +      assert(ir->type->vector_elements <= 4);
> +      ubo_uniforms += ir->type->vector_elements;
> +      this->ubo_gather_table[gather].reg = reg.reg;
> +      this->ubo_gather_table[gather].const_block =
> +         const_uniform_block->value.u[0];
> +      this->ubo_gather_table[gather].const_offset =
> +         const_offset->value.u[0];
> +      reg.type = brw_type_for_base_type(ir->type);
> +   }
> +
> +   if (ir->type->base_type == GLSL_TYPE_BOOL) {
> +

Extra new line.

> +      for (int i = 0; i < ir->type->vector_elements; i++) {
> +

Here also.

> +         /* The std140 packing rules don't allow vectors to cross 16-byte
> +          * boundaries, and a reg is 32 bytes.
> +          */
> +         assert(reg.subreg_offset < 32);
> +
> +         /* UBO bools are any nonzero value.  We consider bools to be
> +          * values with the low bit set to 1.  Convert them using CMP.
> +          */
> +         emit(CMP(result, reg, fs_reg(0u), BRW_CONDITIONAL_NZ));
> +
> +         result = offset(result, 1);
> +      }
> +      result.reg_offset = 0;
> +   } else {
> +      result = reg;
> +   }
> +
> +   return true;
> +}
> +
>  fs_visitor::fs_visitor(struct brw_context *brw,
>                         void *mem_ctx,
>                         const struct brw_wm_prog_key *key,
> @@ -4224,6 +4304,7 @@ fs_visitor::init()
>     this->regs_live_at_ip = NULL;
>  
>     this->uniforms = 0;
> +   this->ubo_uniforms = 0;
>     this->last_scratch = 0;
>     this->pull_constant_loc = NULL;
>     this->push_constant_loc = NULL;
> @@ -4231,8 +4312,11 @@ fs_visitor::init()
>     this->spilled_any_registers = false;
>     this->do_dual_src = false;
>  
> -   if (dispatch_width == 8)
> +   if (dispatch_width == 8) {
>        this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
> +      this->ubo_gather_table = rzalloc_array(mem_ctx, backend_visitor::gather_table,
> +                                             stage_prog_data->nr_params);
> +   }
>  }
>  
>  fs_visitor::~fs_visitor()
> -- 
> 1.9.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list