[Mesa-dev] [PATCH 3/3] i965: Fix stride field for the result of emit_uniformize().

Samuel Iglesias Gonsálvez siglesias at igalia.com
Thu Jul 16 23:14:45 PDT 2015


On 16/07/15 17:33, Francisco Jerez wrote:
> This is essentially the same problem fixed in an earlier patch for
> immediates.  Setting the stride to zero will be particularly useful
> for my future SIMD lowering pass, because we will be able to just
> check whether the stride of a source register is zero and skip
> emitting the copies required to unzip it in that case.
> 
> Instead of setting stride to zero in every caller of emit_uniformize()
> I've changed the function to return the result as its return value
> (previously it was being written into a caller-provided destination
> register), because this way we can enforce that the result is used with
> the correct regioning from the function itself.
> 
> The changes to the prototype of its VEC4 counterpart are mainly for
> the sake of symmetry, VEC4 registers don't have stride.
> ---
>  src/mesa/drivers/dri/i965/brw_fs_builder.h     | 16 +++++++++-------
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp       |  4 ++--
>  src/mesa/drivers/dri/i965/brw_vec4.h           |  7 +++++--
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 ++++++++++--------
>  4 files changed, 26 insertions(+), 19 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> index 34646d7..ab4680f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> @@ -350,17 +350,19 @@ namespace brw {
>        }
>  
>        /**
> -       * Copy any live channel from \p src to the first channel of \p dst.
> +       * Copy any live channel from \p src to the first channel of the result.
>         */
> -      void
> -      emit_uniformize(const dst_reg &dst, const src_reg &src) const
> +      src_reg
> +      emit_uniformize(const src_reg &src) const
>        {
>           const fs_builder ubld = exec_all();
> -         const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
> +         const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0);
> +         const dst_reg dst = component(vgrf(src.type), 0);
> +
> +         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
> +         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index);
>  
> -         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0));
> -         ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0),
> -                   src, component(chan_index, 0));
> +         return dst;

Shouldn't it be src_reg(dst) ?

With that fixed,

   Reviewed-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>

Sam

>        }
>  
>        /**
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 3099dc4..4e45118 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1386,7 +1386,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
>           surf_index = vgrf(glsl_type::uint_type);
>           bld.ADD(surf_index, get_nir_src(instr->src[0]),
>                   fs_reg(stage_prog_data->binding_table.ubo_start));
> -         bld.emit_uniformize(surf_index, surf_index);
> +         surf_index = bld.emit_uniformize(surf_index);
>  
>           /* Assume this may touch any UBO. It would be nice to provide
>            * a tighter bound, but the array information is already lowered away.
> @@ -1681,7 +1681,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
>           /* Emit code to evaluate the actual indexing expression */
>           sampler_reg = vgrf(glsl_type::uint_type);
>           bld.ADD(sampler_reg, src, fs_reg(sampler));
> -         bld.emit_uniformize(sampler_reg, sampler_reg);
> +         sampler_reg = bld.emit_uniformize(sampler_reg);
>           break;
>        }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 3643651..7bf027a 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -293,8 +293,11 @@ public:
>     void emit_lrp(const dst_reg &dst,
>                   const src_reg &x, const src_reg &y, const src_reg &a);
>  
> -   /** Copy any live channel from \p src to the first channel of \p dst. */
> -   void emit_uniformize(const dst_reg &dst, const src_reg &src);
> +   /**
> +    * Copy any live channel from \p src to the first channel of the
> +    * result.
> +    */
> +   src_reg emit_uniformize(const src_reg &src);
>  
>     void emit_block_move(dst_reg *dst, src_reg *src,
>                          const struct glsl_type *type, brw_predicate predicate);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index f351bf4..a6eee47 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1374,15 +1374,19 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
>        emit(pull);
>  }
>  
> -void
> -vec4_visitor::emit_uniformize(const dst_reg &dst, const src_reg &src)
> +src_reg
> +vec4_visitor::emit_uniformize(const src_reg &src)
>  {
>     const src_reg chan_index(this, glsl_type::uint_type);
> +   const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
> +                              src.type);
>  
>     emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
>        ->force_writemask_all = true;
>     emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
>        ->force_writemask_all = true;
> +
> +   return src_reg(dst);
>  }
>  
>  void
> @@ -1826,7 +1830,7 @@ vec4_visitor::visit(ir_expression *ir)
>           surf_index = src_reg(this, glsl_type::uint_type);
>           emit(ADD(dst_reg(surf_index), op[0],
>                    src_reg(prog_data->base.binding_table.ubo_start)));
> -         emit_uniformize(dst_reg(surf_index), surf_index);
> +         surf_index = emit_uniformize(surf_index);
>  
>           /* Assume this may touch any UBO. It would be nice to provide
>            * a tighter bound, but the array information is already lowered away.
> @@ -2522,11 +2526,9 @@ vec4_visitor::visit(ir_texture *ir)
>  
>        /* Emit code to evaluate the actual indexing expression */
>        nonconst_sampler_index->accept(this);
> -      dst_reg temp(this, glsl_type::uint_type);
> -      emit(ADD(temp, this->result, src_reg(sampler)));
> -      emit_uniformize(temp, src_reg(temp));
> -
> -      sampler_reg = src_reg(temp);
> +      src_reg temp(this, glsl_type::uint_type);
> +      emit(ADD(dst_reg(temp), this->result, src_reg(sampler)));
> +      sampler_reg = emit_uniformize(temp);
>     } else {
>        /* Single sampler, or constant array index; the indexing expression
>         * is just an immediate.
> 


More information about the mesa-dev mailing list