[Mesa-dev] [PATCH 86/95] i965/vec4/tes: fix input loading for 64bit data types

Iago Toral itoral at igalia.com
Tue Jul 26 09:24:18 UTC 2016


On Tue, 2016-07-19 at 12:41 +0200, Iago Toral Quiroga wrote:
> FIXME: We need to fix the case where not all the attributes fit
> in the push constant buffer

This FIXME note is obsolete and should be deleted.

> ---
>  src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 63
> +++++++++++++++++++++++-------
>  1 file changed, 48 insertions(+), 15 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> index 6639c86..8febc15 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
> @@ -180,6 +180,8 @@
> vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>        unsigned imm_offset = instr->const_index[0];
>        src_reg header = input_read_header;
>  
> +      bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
> +
>        if (indirect_offset.file != BAD_FILE) {
>           header = src_reg(this, glsl_type::uvec4_type);
>           emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
> @@ -190,27 +192,58 @@
> vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>            */
>           const unsigned max_push_slots = 24;
>           if (imm_offset < max_push_slots) {
> -            emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
> -                     src_reg(ATTR, imm_offset,
> glsl_type::ivec4_type)));
> +            const brw_reg_type dst_reg_type =
> +               is_64bit ? BRW_REGISTER_TYPE_DF :
> BRW_REGISTER_TYPE_D;
> +            const glsl_type *src_glsl_type =
> +               is_64bit ? glsl_type::dvec4_type :
> glsl_type::ivec4_type;
> +            emit(MOV(get_nir_dest(instr->dest, dst_reg_type),
> +                     src_reg(ATTR, imm_offset, src_glsl_type)));
>              prog_data->urb_read_length =
>                 MAX2(prog_data->urb_read_length,
> -                    DIV_ROUND_UP(imm_offset + 1, 2));
> +                    DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1),
> 2));
>              break;
>           }
>        }
>  
> -      dst_reg temp(this, glsl_type::ivec4_type);
> -      vec4_instruction *read =
> -         emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
> -      read->offset = imm_offset;
> -      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> -
> -      /* Copy to target.  We might end up with some funky writemasks
> landing
> -       * in here, but we really don't want them in the above pseudo-
> ops.
> -       */
> -      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
> -      dst.writemask = brw_writemask_for_size(instr->num_components);
> -      emit(MOV(dst, src_reg(temp)));
> +      if (!is_64bit) {
> +         dst_reg temp(this, glsl_type::ivec4_type);
> +         vec4_instruction *read =
> +            emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
> +         read->offset = imm_offset;
> +         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> +
> +         /* Copy to target.  We might end up with some funky
> writemasks landing
> +          * in here, but we really don't want them in the above
> pseudo-ops.
> +          */
> +         dst_reg dst = get_nir_dest(instr->dest,
> BRW_REGISTER_TYPE_D);
> +         dst.writemask = brw_writemask_for_size(instr-
> >num_components);
> +         emit(MOV(dst, src_reg(temp)));
> +      } else {
> +         /* For 64-bit we need to load twice as many 32-bit
> components, and for
> +          * dvec3/4 we need to emit 2 URB Read messages
> +          */
> +         dst_reg temp(this, glsl_type::dvec4_type);
> +         dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);
> +
> +         vec4_instruction *read =
> +            emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
> +         read->offset = imm_offset;
> +         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> +
> +         if (instr->num_components > 2) {
> +            read =
> +               emit(VEC4_OPCODE_URB_READ, offset(temp_d, 1),
> src_reg(header));
> +            read->offset = imm_offset + 1;
> +            read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> +         }
> +
> +         dst_reg shuffled(this, glsl_type::dvec4_type);
> +         shuffle_64bit_data(shuffled, src_reg(temp), false);
> +
> +         dst_reg dst = get_nir_dest(instr->dest,
> BRW_REGISTER_TYPE_DF);
> +         dst.writemask = brw_writemask_for_size(instr-
> >num_components);
> +         emit(MOV(dst, src_reg(shuffled)));
> +      }
>        break;
>     }
>     default:


More information about the mesa-dev mailing list