[Mesa-dev] [PATCH 2/2] i965/tes/scalar: fix 64-bit indirect input loads
Iago Toral
itoral at igalia.com
Fri Jul 15 11:16:41 UTC 2016
On Fri, 2016-07-15 at 20:59 +1000, Timothy Arceri wrote:
> On Fri, 2016-07-15 at 11:04 +0200, Iago Toral Quiroga wrote:
> >
> > We totally ignored this before because there were no piglit tests
> > for
> > indirect loads in tessellation stages with doubles.
> >
> > Cc: "12.0" <mesa-stable at lists.freedesktop.org>
> > ---
> > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 86
> > ++++++++++++++++++++++++--------
> > 1 file changed, 64 insertions(+), 22 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > index 55383ff..7d5af78 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > @@ -2926,31 +2926,73 @@ fs_visitor::nir_emit_tes_intrinsic(const
> > fs_builder &bld,
> > }
> > } else {
> > /* Indirect indexing - use per-slot offsets as well. */
> > - const fs_reg srcs[] = {
> > - retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
> > - indirect_offset
> > - };
> > - fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> > - bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
> >
> > - if (first_component != 0) {
> > - unsigned read_components =
> > - instr->num_components + first_component;
> > - fs_reg tmp = bld.vgrf(dest.type, read_components);
> > - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
> > tmp,
> > - payload);
> > - inst->regs_written = read_components;
> > - for (unsigned i = 0; i < instr->num_components; i++) {
> > - bld.MOV(offset(dest, bld, i),
> > - offset(tmp, bld, i + first_component));
> > + /* We can only read two double components with each URB
> > read, so
> > + * we send two read messages in that case, each one
> > loading
> > up to
> > + * two double components.
> > + */
> > + unsigned num_iterations = 1;
> > + unsigned num_components = instr->num_components;
> > + fs_reg orig_dest = dest;
> > + if (type_sz(dest.type) == 8) {
> Hi,
>
> Not your fault as its not currently enabled but can you add the
> following here to allow component packing to work once it finally
> gets
> enabled:
>
> first_component = first_component / 2;
>
> I believe that should be enought to keep it working once the other
> patches land.
I think this is already being done in master, right after the
assignment to first_component.
> Also I don't think this patch is going to apply cleanly to stable as
> the component packing cahnges only landed after branching.
>
> Thanks,
> Tim
>
> >
> > + if (instr->num_components > 2) {
> > + num_iterations = 2;
> > + num_components = 2;
> > + }
> > + fs_reg tmp = fs_reg(VGRF, alloc.allocate(4),
> > dest.type);
> > + dest = tmp;
> > + }
> > +
> > + for (unsigned iter = 0; iter < num_iterations; iter++) {
> > + const fs_reg srcs[] = {
> > + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
> > + indirect_offset
> > + };
> > + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> > + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
> > +
> > + if (first_component != 0) {
> > + unsigned read_components =
> > + num_components + first_component;
> > + fs_reg tmp = bld.vgrf(dest.type, read_components);
> > + inst =
> > bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
> > + payload);
> > + for (unsigned i = 0; i < num_components; i++) {
> > + bld.MOV(offset(dest, bld, i),
> > + offset(tmp, bld, i + first_component));
> > + }
> > + } else {
> > + inst =
> > bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest,
> > + payload);
> > + }
> > + inst->mlen = 2;
> > + inst->offset = imm_offset;
> > + inst->regs_written =
> > + ((num_components + first_component) *
> > type_sz(dest.type) / 4);
> > +
> > + /* If we are reading 64-bit data using 32-bit read
> > messages we need
> > + * build proper 64-bit data elements by shuffling the
> > low and high
> > + * 32-bit components around like we do for other
> > things
> > like UBOs
> > + * or SSBOs.
> > + */
> > + if (type_sz(dest.type) == 8) {
> > + shuffle_32bit_load_result_to_64bit_data(
> > + bld, dest, retype(dest, BRW_REGISTER_TYPE_F),
> > num_components);
> > +
> > + for (unsigned c = 0; c < num_components; c++) {
> > + bld.MOV(offset(orig_dest, bld, iter * 2 + c),
> > + offset(dest, bld, c));
> > + }
> > + }
> > +
> > + /* If we are loading double data and we need a second
> > read message
> > + * adjust the offset
> > + */
> > + if (num_iterations > 1) {
> > + num_components = instr->num_components - 2;
> > + imm_offset++;
> > }
> > - } else {
> > - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
> > dest,
> > - payload);
> > - inst->regs_written = instr->num_components;
> > }
> > - inst->mlen = 2;
> > - inst->offset = imm_offset;
> > }
> > break;
> > }
More information about the mesa-dev
mailing list