[Mesa-dev] [PATCH 3/3] i965/vec4: make offset() work in terms of a simd width and scalar components
Francisco Jerez
currojerez at riseup.net
Wed Oct 26 20:14:12 UTC 2016
Iago Toral Quiroga <itoral at igalia.com> writes:
> So that it has the same semantics as the scalar backend implementation. The
> helper will now take a simd width (which is always 8 in vec4 mode) and step
> as many scalar components as specified by that width, respecting the size of
> the scalar channels.
> ---
> src/mesa/drivers/dri/i965/brw_ir_vec4.h | 12 ++++++++----
> src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 2 +-
> src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp | 14 +++++++-------
> 3 files changed, 16 insertions(+), 12 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> index ef79e33..3fa64a8 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> @@ -100,11 +100,13 @@ byte_offset(src_reg reg, unsigned bytes)
> }
>
> static inline src_reg
> -offset(src_reg reg, unsigned delta)
> +offset(src_reg reg, unsigned width, unsigned delta)
> {
> assert(delta == 0 ||
> (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
You can drop this assertion now since byte_offset() is checking for the
same thing.
> - reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE);
> + unsigned bytes =
> + (reg.file != UNIFORM ? width : 4) * delta * type_sz(reg.type);
> + add_byte_offset(®, bytes);
> return reg;
> }
Now that we have a byte_offset() helper we should probably take
advantage of it here. It also seemed rather surprising at first glance
that the width argument was ignored for uniforms, but the reason would
be clear if you wrote down the expression for the vertical stride of the
region explicitly, like:
| static inline src_reg
| offset(const src_reg ®, unsigned width, unsigned delta)
| {
| const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
| const unsigned num_components = MAX2(width / 4 * stride, 4);
| return byte_offset(reg, num_components * type_sz(reg.type) * delta);
| }
Note that this also makes sure that we step by a whole multiple of a
vec4. With that changed [and the other offset function below] patch is:
Reviewed-by: Francisco Jerez <currojerez at riseup.net>
>
> @@ -176,11 +178,13 @@ byte_offset(dst_reg reg, unsigned bytes)
> }
>
> static inline dst_reg
> -offset(dst_reg reg, unsigned delta)
> +offset(dst_reg reg, unsigned width, unsigned delta)
> {
> assert(delta == 0 ||
> (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
> - reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE);
> + unsigned bytes =
> + (reg.file != UNIFORM ? width : 4) * delta * type_sz(reg.type);
> + add_byte_offset(®, bytes);
> return reg;
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index ba3bbdf..07e9249 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -256,7 +256,7 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
> dst_reg reg;
>
> reg = v->nir_locals[nir_reg->index];
> - reg = offset(reg, base_offset);
> + reg = offset(reg, 8, base_offset);
> if (indirect) {
> reg.reladdr =
> new(v->mem_ctx) src_reg(v->get_nir_src(*indirect,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
> index 19c685f..00c94fe 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
> @@ -42,9 +42,9 @@ namespace {
> DIV_ROUND_UP(size * dst_stride, 4));
>
> for (unsigned i = 0; i < size; ++i)
> - bld.MOV(writemask(offset(dst, i * dst_stride / 4),
> + bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
> 1 << (i * dst_stride % 4)),
> - swizzle(offset(src, i * src_stride / 4),
> + swizzle(offset(src, 8, i * src_stride / 4),
> brw_swizzle_for_mask(1 << (i * src_stride % 4))));
>
> return src_reg(dst);
> @@ -124,16 +124,16 @@ namespace brw {
> unsigned n = 0;
>
> if (header_sz)
> - bld.exec_all().MOV(offset(payload, n++),
> + bld.exec_all().MOV(offset(payload, 8, n++),
> retype(header, BRW_REGISTER_TYPE_UD));
>
> for (unsigned i = 0; i < addr_sz; i++)
> - bld.MOV(offset(payload, n++),
> - offset(retype(addr, BRW_REGISTER_TYPE_UD), i));
> + bld.MOV(offset(payload, 8, n++),
> + offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
>
> for (unsigned i = 0; i < src_sz; i++)
> - bld.MOV(offset(payload, n++),
> - offset(retype(src, BRW_REGISTER_TYPE_UD), i));
> + bld.MOV(offset(payload, 8, n++),
> + offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
>
> /* Reduce the dynamically uniform surface index to a single
> * scalar.
> --
> 2.7.4
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20161026/5448f9a0/attachment.sig>
More information about the mesa-dev
mailing list