[Mesa-dev] [PATCH v2 25/30] i965/tcs/scalar: fix load input for doubles

Iago Toral itoral at igalia.com
Fri May 13 06:31:47 UTC 2016


On Thu, 2016-05-12 at 11:42 -0700, Kenneth Graunke wrote:
> On Thursday, May 12, 2016 1:36:02 PM PDT Samuel Iglesias Gonsálvez wrote:
> > From: Iago Toral Quiroga <itoral at igalia.com>
> > 
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 96 ++++++++++++++++++++++
> +---------
> >  1 file changed, 71 insertions(+), 25 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/
> dri/i965/brw_fs_nir.cpp
> > index bf6375b..87ec7db 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > @@ -2269,34 +2269,80 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder 
> &bld,
> >                    brw_imm_ud(4 * REG_SIZE));
> >        }
> >  
> > -      if (indirect_offset.file == BAD_FILE) {
> > -         /* Constant indexing - use global offset. */
> > -         inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
> > -         inst->offset = imm_offset;
> > -         inst->mlen = 1;
> > -         inst->base_mrf = -1;
> > -         inst->regs_written = instr->num_components;
> > -      } else {
> > -         /* Indirect indexing - use per-slot offsets as well. */
> > -         const fs_reg srcs[] = { icp_handle, indirect_offset };
> > -         fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> > -         bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
> > +      /* We can only read two double components with each URB read, so
> > +       * we send two read messages in that case, each one loading up to
> > +       * two double components.
> > +       */
> > +      unsigned num_iterations = 1;
> > +      unsigned num_components = instr->num_components;
> > +      fs_reg orig_dst = dst;
> > +      if (type_sz(dst.type) == 8) {
> > +         if (instr->num_components > 2) {
> > +            num_iterations = 2;
> > +            num_components = 2;
> > +         }
> >  
> > -         inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, 
> payload);
> > -         inst->offset = imm_offset;
> > -         inst->base_mrf = -1;
> > -         inst->mlen = 2;
> > -         inst->regs_written = instr->num_components;
> > +         fs_reg tmp = fs_reg(VGRF, alloc.allocate(4), dst.type);
> > +         dst = tmp;
> >        }
> >  
> > -      /* Copy the temporary to the destination to deal with writemasking.
> > -       *
> > -       * Also attempt to deal with gl_PointSize being in the .w component.
> > -       */
> > -      if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
> > -         inst->dst = bld.vgrf(dst.type, 4);
> > -         inst->regs_written = 4;
> > -         bld.MOV(dst, offset(inst->dst, bld, 3));
> > +      for (unsigned iter = 0; iter < num_iterations; iter++) {
> > +         if (indirect_offset.file == BAD_FILE) {
> > +            /* Constant indexing - use global offset. */
> > +            inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
> > +            inst->offset = imm_offset;
> > +            inst->mlen = 1;
> > +            inst->base_mrf = -1;
> > +         } else {
> > +            /* Indirect indexing - use per-slot offsets as well. */
> > +            const fs_reg srcs[] = { icp_handle, indirect_offset };
> > +            fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> > +            bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
> > +
> > +            inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, 
> payload);
> > +            inst->offset = imm_offset;
> > +            inst->base_mrf = -1;
> > +            inst->mlen = 2;
> > +         }
> > +         inst->regs_written = num_components * type_sz(dst.type) / 4;
> > +
> > +         /* If we are reading 64-bit data using 32-bit read messages we 
> need
> > +          * build proper 64-bit data elements by shuffling the low and high
> > +          * 32-bit components around like we do for other things like UBOs
> > +          * or SSBOs.
> > +          */
> > +         if (type_sz(dst.type) == 8) {
> > +            shuffle_32bit_load_result_to_64bit_data(
> > +               bld, dst, retype(dst, BRW_REGISTER_TYPE_F), num_components);
> > +
> > +            for (unsigned c = 0; c < num_components; c++) {
> > +               bld.MOV(offset(orig_dst, bld, iter * 2 + c),
> > +                       offset(dst, bld, c));
> > +            }
> > +         }
> > +
> > +         /* Copy the temporary to the destination to deal with 
> writemasking.
> > +          *
> > +          * Also attempt to deal with gl_PointSize being in the .w 
> component.
> > +          */
> > +         if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
> > +            assert(type_sz(dst.type) < 8);
> > +            inst->dst = bld.vgrf(dst.type, 4);
> > +            inst->regs_written = 4;
> > +            bld.MOV(dst, offset(inst->dst, bld, 3));
> > +         }
> > +
> > +         /* If we are loading double data and we need a second read message
> > +          * adjust the write offset
> > +          */
> > +         if (num_iterations > 1) {
> > +            num_components = instr->num_components - 2;
> > +            if (indirect_offset.file == BAD_FILE) {
> > +               imm_offset++;
> > +            } else {
> > +               bld.ADD(indirect_offset, indirect_offset, brw_imm_ud(1u));
> 
> I don't think this is safe - indirect_offset is some register containing
> the result of a NIR expression.  That expression might be used
> elsewhere.
> 
> We could fix this by doing:
> 
>    fs_reg new_indirect = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
>    bld.ADD(new_indirect, indirect_offset, brw_imm_ud(1u));
>    indirect_offset = new_indirect;

Ah, good catch, yes. I'll fix this.

> > +            }
> > +         }
> >        }
> >        break;
> >     }
> > 
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev




More information about the mesa-dev mailing list