[Mesa-dev] [PATCH v2 25/30] i965/tcs/scalar: fix load input for doubles
Kenneth Graunke
kenneth at whitecape.org
Thu May 12 18:42:25 UTC 2016
On Thursday, May 12, 2016 1:36:02 PM PDT Samuel Iglesias Gonsálvez wrote:
> From: Iago Toral Quiroga <itoral at igalia.com>
>
> ---
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 96 ++++++++++++++++++++++
+---------
> 1 file changed, 71 insertions(+), 25 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/
dri/i965/brw_fs_nir.cpp
> index bf6375b..87ec7db 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -2269,34 +2269,80 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder
&bld,
> brw_imm_ud(4 * REG_SIZE));
> }
>
> - if (indirect_offset.file == BAD_FILE) {
> - /* Constant indexing - use global offset. */
> - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
> - inst->offset = imm_offset;
> - inst->mlen = 1;
> - inst->base_mrf = -1;
> - inst->regs_written = instr->num_components;
> - } else {
> - /* Indirect indexing - use per-slot offsets as well. */
> - const fs_reg srcs[] = { icp_handle, indirect_offset };
> - fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> - bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
> + /* We can only read two double components with each URB read, so
> + * we send two read messages in that case, each one loading up to
> + * two double components.
> + */
> + unsigned num_iterations = 1;
> + unsigned num_components = instr->num_components;
> + fs_reg orig_dst = dst;
> + if (type_sz(dst.type) == 8) {
> + if (instr->num_components > 2) {
> + num_iterations = 2;
> + num_components = 2;
> + }
>
> - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst,
payload);
> - inst->offset = imm_offset;
> - inst->base_mrf = -1;
> - inst->mlen = 2;
> - inst->regs_written = instr->num_components;
> + fs_reg tmp = fs_reg(VGRF, alloc.allocate(4), dst.type);
> + dst = tmp;
> }
>
> - /* Copy the temporary to the destination to deal with writemasking.
> - *
> - * Also attempt to deal with gl_PointSize being in the .w component.
> - */
> - if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
> - inst->dst = bld.vgrf(dst.type, 4);
> - inst->regs_written = 4;
> - bld.MOV(dst, offset(inst->dst, bld, 3));
> + for (unsigned iter = 0; iter < num_iterations; iter++) {
> + if (indirect_offset.file == BAD_FILE) {
> + /* Constant indexing - use global offset. */
> + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
> + inst->offset = imm_offset;
> + inst->mlen = 1;
> + inst->base_mrf = -1;
> + } else {
> + /* Indirect indexing - use per-slot offsets as well. */
> + const fs_reg srcs[] = { icp_handle, indirect_offset };
> + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
> +
> + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst,
payload);
> + inst->offset = imm_offset;
> + inst->base_mrf = -1;
> + inst->mlen = 2;
> + }
> + inst->regs_written = num_components * type_sz(dst.type) / 4;
> +
> + /* If we are reading 64-bit data using 32-bit read messages we
need
> + * build proper 64-bit data elements by shuffling the low and high
> + * 32-bit components around like we do for other things like UBOs
> + * or SSBOs.
> + */
> + if (type_sz(dst.type) == 8) {
> + shuffle_32bit_load_result_to_64bit_data(
> + bld, dst, retype(dst, BRW_REGISTER_TYPE_F), num_components);
> +
> + for (unsigned c = 0; c < num_components; c++) {
> + bld.MOV(offset(orig_dst, bld, iter * 2 + c),
> + offset(dst, bld, c));
> + }
> + }
> +
> + /* Copy the temporary to the destination to deal with
writemasking.
> + *
> + * Also attempt to deal with gl_PointSize being in the .w
component.
> + */
> + if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
> + assert(type_sz(dst.type) < 8);
> + inst->dst = bld.vgrf(dst.type, 4);
> + inst->regs_written = 4;
> + bld.MOV(dst, offset(inst->dst, bld, 3));
> + }
> +
> + /* If we are loading double data and we need a second read message
> + * adjust the write offset
> + */
> + if (num_iterations > 1) {
> + num_components = instr->num_components - 2;
> + if (indirect_offset.file == BAD_FILE) {
> + imm_offset++;
> + } else {
> + bld.ADD(indirect_offset, indirect_offset, brw_imm_ud(1u));
I don't think this is safe - indirect_offset is some register containing
the result of a NIR expression. That expression might be used
elsewhere.
We could fix this by doing:
fs_reg new_indirect = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
bld.ADD(new_indirect, indirect_offset, brw_imm_ud(1u));
indirect_offset = new_indirect;
> + }
> + }
> }
> break;
> }
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160512/5e7cbf7b/attachment-0001.sig>
More information about the mesa-dev
mailing list