[Mesa-dev] [PATCH 2/2] i965/tes/scalar: fix 64-bit indirect input loads
Timothy Arceri
timothy.arceri at collabora.com
Fri Jul 15 10:59:26 UTC 2016
On Fri, 2016-07-15 at 11:04 +0200, Iago Toral Quiroga wrote:
> We totally ignored this before because there were no piglit tests for
> indirect loads in tessellation stages with doubles.
>
> Cc: "12.0" <mesa-stable at lists.freedesktop.org>
> ---
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 86
> ++++++++++++++++++++++++--------
> 1 file changed, 64 insertions(+), 22 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 55383ff..7d5af78 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -2926,31 +2926,73 @@ fs_visitor::nir_emit_tes_intrinsic(const
> fs_builder &bld,
> }
> } else {
> /* Indirect indexing - use per-slot offsets as well. */
> - const fs_reg srcs[] = {
> - retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
> - indirect_offset
> - };
> - fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> - bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
>
> - if (first_component != 0) {
> - unsigned read_components =
> - instr->num_components + first_component;
> - fs_reg tmp = bld.vgrf(dest.type, read_components);
> - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
> tmp,
> - payload);
> - inst->regs_written = read_components;
> - for (unsigned i = 0; i < instr->num_components; i++) {
> - bld.MOV(offset(dest, bld, i),
> - offset(tmp, bld, i + first_component));
> + /* We can only read two double components with each URB
> read, so
> + * we send two read messages in that case, each one loading
> up to
> + * two double components.
> + */
> + unsigned num_iterations = 1;
> + unsigned num_components = instr->num_components;
> + fs_reg orig_dest = dest;
> + if (type_sz(dest.type) == 8) {
Hi,
Not your fault as its not currently enabled but can you add the
following here to allow component packing to work once it finally gets
enabled:
first_component = first_component / 2;
I believe that should be enought to keep it working once the other
patches land.
Also I don't think this patch is going to apply cleanly to stable as
the component packing cahnges only landed after branching.
Thanks,
Tim
> + if (instr->num_components > 2) {
> + num_iterations = 2;
> + num_components = 2;
> + }
> + fs_reg tmp = fs_reg(VGRF, alloc.allocate(4), dest.type);
> + dest = tmp;
> + }
> +
> + for (unsigned iter = 0; iter < num_iterations; iter++) {
> + const fs_reg srcs[] = {
> + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
> + indirect_offset
> + };
> + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
> +
> + if (first_component != 0) {
> + unsigned read_components =
> + num_components + first_component;
> + fs_reg tmp = bld.vgrf(dest.type, read_components);
> + inst =
> bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
> + payload);
> + for (unsigned i = 0; i < num_components; i++) {
> + bld.MOV(offset(dest, bld, i),
> + offset(tmp, bld, i + first_component));
> + }
> + } else {
> + inst =
> bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest,
> + payload);
> + }
> + inst->mlen = 2;
> + inst->offset = imm_offset;
> + inst->regs_written =
> + ((num_components + first_component) *
> type_sz(dest.type) / 4);
> +
> + /* If we are reading 64-bit data using 32-bit read
> messages we need
> + * build proper 64-bit data elements by shuffling the
> low and high
> + * 32-bit components around like we do for other things
> like UBOs
> + * or SSBOs.
> + */
> + if (type_sz(dest.type) == 8) {
> + shuffle_32bit_load_result_to_64bit_data(
> + bld, dest, retype(dest, BRW_REGISTER_TYPE_F),
> num_components);
> +
> + for (unsigned c = 0; c < num_components; c++) {
> + bld.MOV(offset(orig_dest, bld, iter * 2 + c),
> + offset(dest, bld, c));
> + }
> + }
> +
> + /* If we are loading double data and we need a second
> read message
> + * adjust the offset
> + */
> + if (num_iterations > 1) {
> + num_components = instr->num_components - 2;
> + imm_offset++;
> }
> - } else {
> - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
> dest,
> - payload);
> - inst->regs_written = instr->num_components;
> }
> - inst->mlen = 2;
> - inst->offset = imm_offset;
> }
> break;
> }
More information about the mesa-dev
mailing list