[Mesa-dev] [PATCH v3 19/24] i965/vec4: fix SIMD-with lowering for CMP/MOV instructions with conditional modifiers

Samuel Iglesias Gonsálvez siglesias at igalia.com
Fri Mar 17 11:09:42 UTC 2017


Please ignore this patch. I have found what was wrong with CMP/MOV
instructions with conditional modifiers and it is not HW related.

I plan to do more testing of the good solution and send a v4 on Monday
for review, which will include all the fixes suggested by Curro.

Thanks,

Sam 

On Tue, 2017-02-14 at 14:01 +0100, Samuel Iglesias Gonsálvez wrote:
> From: "Juan A. Suarez Romero" <jasuarez at igalia.com>
> 
> When splitting a CMP/MOV instruction with NULL dest, DF sources, and
> conditional modifier; we can't use directly the flag registers, as
> they will
> have the wrong results in IVB/BYT after the scalarization.
> 
> Rather, we need to store the result in a temporary register, and then
> use
> that register to set proper the flags values.
> 
> If a MOV has a null destination register and a conditional modifier,
> it
> can be replaced with a CMP against zero with the same conditional
> modifier. By doing this replacement, we can do the SIMD lowering
> without any problem.
> 
> v2:
> - Fix typo (Matt)
> 
> Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
> Signed-off-by: Juan A. Suarez Romero <jasuarez at igalia.com
> ---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 80
> +++++++++++++++++++++++++++++++---
>  1 file changed, 74 insertions(+), 6 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index adcde085305..819674e8cb9 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -2177,6 +2177,46 @@ vec4_visitor::lower_simd_width()
>         * value of the instruction's dst.
>         */
>        bool needs_temp = dst_src_regions_overlap(inst);
> +
> +      /* When splitting instructions with conditional modifiers and
> NULL
> +       * dest we can't rely directly on the flags to store the
> result. Rather,
> +       * we need first to enqueue the result in a temporary
> register, and then
> +       * move those values into flags.
> +       */
> +      bool inst_df_dst_null =
> +         inst->dst.is_null() && get_exec_type_size(inst) == 8 &&
> +         inst->conditional_mod != BRW_CONDITIONAL_NONE;
> +
> +      if (inst_df_dst_null) {
> +         /* If there are other DF instructions with NULL
> destination,
> +          * we need to verify if we can use the temporary register
> or
> +          * if we need an extra lowering step.
> +          */
> +         assert(inst->opcode == BRW_OPCODE_MOV ||
> +                inst->opcode == BRW_OPCODE_CMP);
> +
> +         /* Replace MOV.XX with null destination with the equivalent
> CMP.XX
> +          * with null destination, so we can lower it as explained
> before.
> +          */
> +         if (inst->opcode == BRW_OPCODE_MOV) {
> +            vec4_instruction *cmp =
> +               new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP,
> dst_null_df(),
> +                                             inst->src[0],
> +                                             setup_imm_df(0.0,
> block, inst));
> +            cmp->conditional_mod = inst->conditional_mod;
> +            cmp->exec_size = inst->exec_size;
> +            cmp->group = inst->group;
> +            cmp->size_written = inst->size_written;
> +            inst->insert_before(block, cmp);
> +            inst->remove(block);
> +            inst = cmp;
> +         }
> +      }
> +      dst_reg inst_dst;
> +      if (inst_df_dst_null)
> +         inst_dst =
> +            retype(dst_reg(VGRF, alloc.allocate(1)),
> BRW_REGISTER_TYPE_F);
> +
>        for (unsigned n = 0; n < inst->exec_size / lowered_width;
> n++)  {
>           unsigned channel_offset = lowered_width * n;
>  
> @@ -2199,7 +2239,7 @@ vec4_visitor::lower_simd_width()
>           bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE &&
> n > 0);
>           /* Compute split dst region */
>           dst_reg dst;
> -         if (needs_temp || d2f_pass) {
> +         if (needs_temp || d2f_pass || inst_df_dst_null) {
>              unsigned num_regs = DIV_ROUND_UP(size_written,
> REG_SIZE);
>              dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
>                           inst->dst.type);
> @@ -2229,24 +2269,52 @@ vec4_visitor::lower_simd_width()
>  
>           inst->insert_before(block, linst);
>  
> +         dst_reg d2f_dst;
> +         if (inst_df_dst_null) {
> +            unsigned num_regs = DIV_ROUND_UP(lowered_width,
> type_sz(BRW_REGISTER_TYPE_F));
> +            d2f_dst = retype(dst_reg(VGRF,
> alloc.allocate(num_regs)), BRW_REGISTER_TYPE_F);
> +            vec4_instruction *d2f = new(mem_ctx)
> vec4_instruction(VEC4_OPCODE_FROM_DOUBLE, d2f_dst, src_reg(dst));
> +            d2f->group = channel_offset;
> +            d2f->exec_size = lowered_width;
> +            d2f->size_written = lowered_width *
> type_sz(d2f_dst.type);
> +            d2f->predicate = inst->predicate;
> +            inst->insert_before(block, d2f);
> +         }
> +
>           /* If we used a temporary to store the result of the split
>            * instruction, copy the result to the original destination
>            */
> -         if (needs_temp || d2f_pass) {
> +         if (needs_temp || d2f_pass || inst_df_dst_null) {
>              vec4_instruction *mov;
> -            if (d2f_pass)
> +            if (d2f_pass) {
>                 mov = MOV(horiz_offset(inst->dst, n * type_sz(inst-
> >dst.type)), src_reg(dst));
> -            else
> +               mov->size_written = size_written;
> +            } else if (inst_df_dst_null) {
> +               mov = MOV(horiz_offset(inst_dst, n * 4),
> src_reg(d2f_dst));
> +               mov->size_written = lowered_width *
> type_sz(inst_dst.type);
> +            } else {
>                 mov = MOV(offset(inst->dst, lowered_width, n),
> src_reg(dst));
> +               mov->size_written = size_written;
> +            }
>              mov->exec_size = lowered_width;
>              mov->group = channel_offset;
> -            mov->size_written = size_written;
>              mov->predicate = inst->predicate;
>              inst->insert_before(block, mov);
>           }
>        }
>  
> -      inst->remove(block);
> +      /* For CMP.XX instruction, we need to set the flags correctly.
> We do
> +       * this by comparing the register that has the results against
> 0.0,
> +       * getting the values in the flags.
> +       */
> +      if (inst_df_dst_null) {
> +         inst->dst.type = BRW_REGISTER_TYPE_F;
> +         inst->src[0] = src_reg(inst_dst);
> +         inst->src[1] = brw_imm_f(0.0f);
> +         inst->conditional_mod = BRW_CONDITIONAL_NZ;
> +      } else {
> +         inst->remove(block);
> +      }
>        progress = true;
>     }
>  
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170317/a2fab67a/attachment.sig>


More information about the mesa-dev mailing list