[Mesa-dev] [PATCH 3/3] i965: Fix stride field for the result of emit_uniformize().
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Thu Jul 16 23:14:45 PDT 2015
On 16/07/15 17:33, Francisco Jerez wrote:
> This is essentially the same problem fixed in an earlier patch for
> immediates. Setting the stride to zero will be particularly useful
> for my future SIMD lowering pass, because we will be able to just
> check whether the stride of a source register is zero and skip
> emitting the copies required to unzip it in that case.
>
> Instead of setting stride to zero in every caller of emit_uniformize()
> I've changed the function to return the result as its return value
> (previously it was being written into a caller-provided destination
> register), because this way we can enforce that the result is used with
> the correct regioning from the function itself.
>
> The changes to the prototype of its VEC4 counterpart are mainly for
> the sake of symmetry, VEC4 registers don't have stride.
> ---
> src/mesa/drivers/dri/i965/brw_fs_builder.h | 16 +++++++++-------
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 4 ++--
> src/mesa/drivers/dri/i965/brw_vec4.h | 7 +++++--
> src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 ++++++++++--------
> 4 files changed, 26 insertions(+), 19 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> index 34646d7..ab4680f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> @@ -350,17 +350,19 @@ namespace brw {
> }
>
> /**
> - * Copy any live channel from \p src to the first channel of \p dst.
> + * Copy any live channel from \p src to the first channel of the result.
> */
> - void
> - emit_uniformize(const dst_reg &dst, const src_reg &src) const
> + src_reg
> + emit_uniformize(const src_reg &src) const
> {
> const fs_builder ubld = exec_all();
> - const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
> + const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0);
> + const dst_reg dst = component(vgrf(src.type), 0);
> +
> + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
> + ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index);
>
> - ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0));
> - ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0),
> - src, component(chan_index, 0));
> + return dst;
Shouldn't it be src_reg(dst) ?
With that fixed,
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
Sam
> }
>
> /**
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 3099dc4..4e45118 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1386,7 +1386,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
> surf_index = vgrf(glsl_type::uint_type);
> bld.ADD(surf_index, get_nir_src(instr->src[0]),
> fs_reg(stage_prog_data->binding_table.ubo_start));
> - bld.emit_uniformize(surf_index, surf_index);
> + surf_index = bld.emit_uniformize(surf_index);
>
> /* Assume this may touch any UBO. It would be nice to provide
> * a tighter bound, but the array information is already lowered away.
> @@ -1681,7 +1681,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
> /* Emit code to evaluate the actual indexing expression */
> sampler_reg = vgrf(glsl_type::uint_type);
> bld.ADD(sampler_reg, src, fs_reg(sampler));
> - bld.emit_uniformize(sampler_reg, sampler_reg);
> + sampler_reg = bld.emit_uniformize(sampler_reg);
> break;
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 3643651..7bf027a 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -293,8 +293,11 @@ public:
> void emit_lrp(const dst_reg &dst,
> const src_reg &x, const src_reg &y, const src_reg &a);
>
> - /** Copy any live channel from \p src to the first channel of \p dst. */
> - void emit_uniformize(const dst_reg &dst, const src_reg &src);
> + /**
> + * Copy any live channel from \p src to the first channel of the
> + * result.
> + */
> + src_reg emit_uniformize(const src_reg &src);
>
> void emit_block_move(dst_reg *dst, src_reg *src,
> const struct glsl_type *type, brw_predicate predicate);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index f351bf4..a6eee47 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1374,15 +1374,19 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
> emit(pull);
> }
>
> -void
> -vec4_visitor::emit_uniformize(const dst_reg &dst, const src_reg &src)
> +src_reg
> +vec4_visitor::emit_uniformize(const src_reg &src)
> {
> const src_reg chan_index(this, glsl_type::uint_type);
> + const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
> + src.type);
>
> emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
> ->force_writemask_all = true;
> emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
> ->force_writemask_all = true;
> +
> + return src_reg(dst);
> }
>
> void
> @@ -1826,7 +1830,7 @@ vec4_visitor::visit(ir_expression *ir)
> surf_index = src_reg(this, glsl_type::uint_type);
> emit(ADD(dst_reg(surf_index), op[0],
> src_reg(prog_data->base.binding_table.ubo_start)));
> - emit_uniformize(dst_reg(surf_index), surf_index);
> + surf_index = emit_uniformize(surf_index);
>
> /* Assume this may touch any UBO. It would be nice to provide
> * a tighter bound, but the array information is already lowered away.
> @@ -2522,11 +2526,9 @@ vec4_visitor::visit(ir_texture *ir)
>
> /* Emit code to evaluate the actual indexing expression */
> nonconst_sampler_index->accept(this);
> - dst_reg temp(this, glsl_type::uint_type);
> - emit(ADD(temp, this->result, src_reg(sampler)));
> - emit_uniformize(temp, src_reg(temp));
> -
> - sampler_reg = src_reg(temp);
> + src_reg temp(this, glsl_type::uint_type);
> + emit(ADD(dst_reg(temp), this->result, src_reg(sampler)));
> + sampler_reg = emit_uniformize(temp);
> } else {
> /* Single sampler, or constant array index; the indexing expression
> * is just an immediate.
>
More information about the mesa-dev
mailing list