[Mesa-dev] [PATCH v3 19/24] i965/vec4: fix SIMD-with lowering for CMP/MOV instructions with conditional modifiers
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Fri Mar 17 11:09:42 UTC 2017
Please ignore this patch. I have found what was wrong with CMP/MOV
instructions with conditional modifiers and it is not HW related.
I plan to do more testing of the good solution and send a v4 on Monday
for review, which will include all the fixes suggested by Curro.
Thanks,
Sam
On Tue, 2017-02-14 at 14:01 +0100, Samuel Iglesias Gonsálvez wrote:
> From: "Juan A. Suarez Romero" <jasuarez at igalia.com>
>
> When splitting a CMP/MOV instruction with NULL dest, DF sources, and
> conditional modifier; we can't use directly the flag registers, as
> they will
> have the wrong results in IVB/BYT after the scalarization.
>
> Rather, we need to store the result in a temporary register, and then
> use
> that register to set proper the flags values.
>
> If a MOV has a null destination register and a conditional modifier,
> it
> can be replaced with a CMP against zero with the same conditional
> modifier. By doing this replacement, we can do the SIMD lowering
> without any problem.
>
> v2:
> - Fix typo (Matt)
>
> Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
> Signed-off-by: Juan A. Suarez Romero <jasuarez at igalia.com
> ---
> src/mesa/drivers/dri/i965/brw_vec4.cpp | 80
> +++++++++++++++++++++++++++++++---
> 1 file changed, 74 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index adcde085305..819674e8cb9 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -2177,6 +2177,46 @@ vec4_visitor::lower_simd_width()
> * value of the instruction's dst.
> */
> bool needs_temp = dst_src_regions_overlap(inst);
> +
> + /* When splitting instructions with conditional modifiers and
> NULL
> + * dest we can't rely directly on the flags to store the
> result. Rather,
> + * we need first to enqueue the result in a temporary
> register, and then
> + * move those values into flags.
> + */
> + bool inst_df_dst_null =
> + inst->dst.is_null() && get_exec_type_size(inst) == 8 &&
> + inst->conditional_mod != BRW_CONDITIONAL_NONE;
> +
> + if (inst_df_dst_null) {
> + /* If there are other DF instructions with NULL
> destination,
> + * we need to verify if we can use the temporary register
> or
> + * if we need an extra lowering step.
> + */
> + assert(inst->opcode == BRW_OPCODE_MOV ||
> + inst->opcode == BRW_OPCODE_CMP);
> +
> + /* Replace MOV.XX with null destination with the equivalent
> CMP.XX
> + * with null destination, so we can lower it as explained
> before.
> + */
> + if (inst->opcode == BRW_OPCODE_MOV) {
> + vec4_instruction *cmp =
> + new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP,
> dst_null_df(),
> + inst->src[0],
> + setup_imm_df(0.0,
> block, inst));
> + cmp->conditional_mod = inst->conditional_mod;
> + cmp->exec_size = inst->exec_size;
> + cmp->group = inst->group;
> + cmp->size_written = inst->size_written;
> + inst->insert_before(block, cmp);
> + inst->remove(block);
> + inst = cmp;
> + }
> + }
> + dst_reg inst_dst;
> + if (inst_df_dst_null)
> + inst_dst =
> + retype(dst_reg(VGRF, alloc.allocate(1)),
> BRW_REGISTER_TYPE_F);
> +
> for (unsigned n = 0; n < inst->exec_size / lowered_width;
> n++) {
> unsigned channel_offset = lowered_width * n;
>
> @@ -2199,7 +2239,7 @@ vec4_visitor::lower_simd_width()
> bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE &&
> n > 0);
> /* Compute split dst region */
> dst_reg dst;
> - if (needs_temp || d2f_pass) {
> + if (needs_temp || d2f_pass || inst_df_dst_null) {
> unsigned num_regs = DIV_ROUND_UP(size_written,
> REG_SIZE);
> dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
> inst->dst.type);
> @@ -2229,24 +2269,52 @@ vec4_visitor::lower_simd_width()
>
> inst->insert_before(block, linst);
>
> + dst_reg d2f_dst;
> + if (inst_df_dst_null) {
> + unsigned num_regs = DIV_ROUND_UP(lowered_width,
> type_sz(BRW_REGISTER_TYPE_F));
> + d2f_dst = retype(dst_reg(VGRF,
> alloc.allocate(num_regs)), BRW_REGISTER_TYPE_F);
> + vec4_instruction *d2f = new(mem_ctx)
> vec4_instruction(VEC4_OPCODE_FROM_DOUBLE, d2f_dst, src_reg(dst));
> + d2f->group = channel_offset;
> + d2f->exec_size = lowered_width;
> + d2f->size_written = lowered_width *
> type_sz(d2f_dst.type);
> + d2f->predicate = inst->predicate;
> + inst->insert_before(block, d2f);
> + }
> +
> /* If we used a temporary to store the result of the split
> * instruction, copy the result to the original destination
> */
> - if (needs_temp || d2f_pass) {
> + if (needs_temp || d2f_pass || inst_df_dst_null) {
> vec4_instruction *mov;
> - if (d2f_pass)
> + if (d2f_pass) {
> mov = MOV(horiz_offset(inst->dst, n * type_sz(inst-
> >dst.type)), src_reg(dst));
> - else
> + mov->size_written = size_written;
> + } else if (inst_df_dst_null) {
> + mov = MOV(horiz_offset(inst_dst, n * 4),
> src_reg(d2f_dst));
> + mov->size_written = lowered_width *
> type_sz(inst_dst.type);
> + } else {
> mov = MOV(offset(inst->dst, lowered_width, n),
> src_reg(dst));
> + mov->size_written = size_written;
> + }
> mov->exec_size = lowered_width;
> mov->group = channel_offset;
> - mov->size_written = size_written;
> mov->predicate = inst->predicate;
> inst->insert_before(block, mov);
> }
> }
>
> - inst->remove(block);
> + /* For CMP.XX instruction, we need to set the flags correctly.
> We do
> + * this by comparing the register that has the results against
> 0.0,
> + * getting the values in the flags.
> + */
> + if (inst_df_dst_null) {
> + inst->dst.type = BRW_REGISTER_TYPE_F;
> + inst->src[0] = src_reg(inst_dst);
> + inst->src[1] = brw_imm_f(0.0f);
> + inst->conditional_mod = BRW_CONDITIONAL_NZ;
> + } else {
> + inst->remove(block);
> + }
> progress = true;
> }
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170317/a2fab67a/attachment.sig>
More information about the mesa-dev
mailing list