[Mesa-stable] [Mesa-dev] [PATCH 02/10] intel/fs: Implement quad swizzles on ICL+.
Iago Toral
itoral at igalia.com
Tue Jan 8 07:00:23 UTC 2019
On Mon, 2019-01-07 at 11:58 -0800, Francisco Jerez wrote:
> Iago Toral <itoral at igalia.com> writes:
>
> > On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
> > > Align16 is no longer a thing, so a new implementation is provided
> > > using Align1 instead. Not all possible swizzles can be
> > > represented
> > > as
> > > a single Align1 region, but some fast paths are provided for
> > > frequently used swizzles that can be represented efficiently in
> > > Align1
> > > mode.
> > >
> > > Fixes ~90 subgroup quad swap Vulkan CTS tests.
> > >
> > > Cc: mesa-stable at lists.freedesktop.org
> > > ---
> > > src/intel/compiler/brw_fs.cpp | 25 +++++++-
> > > src/intel/compiler/brw_fs.h | 4 ++
> > > src/intel/compiler/brw_fs_generator.cpp | 82
> > > ++++++++++++++++++++---
> > > --
> > > 3 files changed, 93 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/src/intel/compiler/brw_fs.cpp
> > > b/src/intel/compiler/brw_fs.cpp
> > > index 2f0f0151219..97544fdf465 100644
> > > --- a/src/intel/compiler/brw_fs.cpp
> > > +++ b/src/intel/compiler/brw_fs.cpp
> > > @@ -315,6 +315,20 @@ fs_inst::has_source_and_destination_hazard()
> > > const
> > > * may stomp all over it.
> > > */
> > > return true;
> > > + case SHADER_OPCODE_QUAD_SWIZZLE:
> > > + switch (src[1].ud) {
> >
> > Maybe it is worth adding a small comment here indicating that these
> > are
> > the cases where we implement the opcode as a single instruction and
> > refer to the generator for details?
> >
>
> Yeah, fixed up locally.
>
> > > + case BRW_SWIZZLE_XXXX:
> > > + case BRW_SWIZZLE_YYYY:
> > > + case BRW_SWIZZLE_ZZZZ:
> > > + case BRW_SWIZZLE_WWWW:
> > > + case BRW_SWIZZLE_XXZZ:
> > > + case BRW_SWIZZLE_YYWW:
> > > + case BRW_SWIZZLE_XYXY:
> > > + case BRW_SWIZZLE_ZWZW:
> > > + return false;
> > > + default:
> > > + return !is_uniform(src[0]);
> >
> > Shouldn't this be:
> >
> > return !is_uniform(src[0]) ||
> > (devinfo->gen < 11 && type_sz(src.type) == 4);
> >
> > Since in that case we also implement the opcode with a single
> > ALIGN16
> > instruction.
> >
>
> Not really. Maybe you mean "!is_uniform(src[0]) &&
> (devinfo->gen >= 11 || type_sz(src.type) != 4)" instead?
Ah yes, that's what I meant.
> That would be
> somewhat more accurate than the expression in my patch, but
> unfortunately the devinfo pointer is not available here. I wouldn't
> mind plumbing it through but patch is meant for mesa-stable, and it
> shouldn't affect correctness to be more strict than necessary
> regarding
> source/destination hazards.
Right, I didn't realize we didn't have devinfo available here. I guess
it is fine, the only drawback is that we might add a little bit more of
register pressure in (probably very few) cases in those gens, but that
is a very small issue I guess and it can be improved in a future patch
if we want.
> > > + }
> > > default:
> > > /* The SIMD16 compressed instruction
> > > *
> > > @@ -5579,9 +5593,14 @@ get_lowered_simd_width(const struct
> > > gen_device_info *devinfo,
> > > case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
> > > return MIN2(8, inst->exec_size);
> > >
> > > - case SHADER_OPCODE_QUAD_SWIZZLE:
> > > - return 8;
> > > -
> > > + case SHADER_OPCODE_QUAD_SWIZZLE: {
> > > + const unsigned swiz = inst->src[1].ud;
> > > + return (is_uniform(inst->src[0]) ?
> > > + get_fpu_lowered_simd_width(devinfo, inst) :
> > > + devinfo->gen < 11 && type_sz(inst->src[0].type) ==
> > > 4 ?
> > > 8 :
> > > + swiz == BRW_SWIZZLE_XYXY || swiz ==
> > > BRW_SWIZZLE_ZWZW ?
> > > 4 :
> > > + get_fpu_lowered_simd_width(devinfo, inst));
> > > + }
> > > case SHADER_OPCODE_MOV_INDIRECT: {
> > > /* From IVB and HSW PRMs:
> > > *
> > > diff --git a/src/intel/compiler/brw_fs.h
> > > b/src/intel/compiler/brw_fs.h
> > > index 53d9b6ce7bf..dc36ecc21ac 100644
> > > --- a/src/intel/compiler/brw_fs.h
> > > +++ b/src/intel/compiler/brw_fs.h
> > > @@ -480,6 +480,10 @@ private:
> > > struct brw_reg src,
> > > struct brw_reg idx);
> > >
> > > + void generate_quad_swizzle(const fs_inst *inst,
> > > + struct brw_reg dst, struct brw_reg
> > > src,
> > > + unsigned swiz);
> > > +
> > > bool patch_discard_jumps_to_fb_writes();
> > >
> > > const struct brw_compiler *compiler;
> > > diff --git a/src/intel/compiler/brw_fs_generator.cpp
> > > b/src/intel/compiler/brw_fs_generator.cpp
> > > index 08dd83dded7..84627e83132 100644
> > > --- a/src/intel/compiler/brw_fs_generator.cpp
> > > +++ b/src/intel/compiler/brw_fs_generator.cpp
> > > @@ -582,6 +582,72 @@ fs_generator::generate_shuffle(fs_inst
> > > *inst,
> > > }
> > > }
> > >
> > > +void
> > > +fs_generator::generate_quad_swizzle(const fs_inst *inst,
> > > + struct brw_reg dst, struct
> > > brw_reg src,
> > > + unsigned swiz)
> > > +{
> > > + /* Requires a quad. */
> > > + assert(inst->exec_size >= 4);
> > > +
> > > + if (src.file == BRW_IMMEDIATE_VALUE ||
> > > + has_scalar_region(src)) {
> > > + /* The value is uniform across all channels */
> > > + brw_MOV(p, dst, src);
> > > +
> > > + } else if (devinfo->gen < 11 && type_sz(src.type) == 4) {
> > > + /* This only works on 8-wide 32-bit values */
> > > + assert(inst->exec_size == 8);
> > > + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
> > > + assert(src.vstride == src.width + 1);
> > > + brw_set_default_access_mode(p, BRW_ALIGN_16);
> > > + struct brw_reg swiz_src = stride(src, 4, 4, 1);
> > > + swiz_src.swizzle = swiz;
> > > + brw_MOV(p, dst, swiz_src);
> > > +
> >
> > Extra blank line.
> >
> > > + } else {
> > > + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
> > > + assert(src.vstride == src.width + 1);
> > > + const struct brw_reg src_0 = suboffset(src,
> > > BRW_GET_SWZ(swiz,
> > > 0));
> > > +
> > > + switch (swiz) {
> > > + case BRW_SWIZZLE_XXXX:
> > > + case BRW_SWIZZLE_YYYY:
> > > + case BRW_SWIZZLE_ZZZZ:
> > > + case BRW_SWIZZLE_WWWW:
> > > + brw_MOV(p, dst, stride(src_0, 4, 4, 0));
> > > + break;
> > > +
> > > + case BRW_SWIZZLE_XXZZ:
> > > + case BRW_SWIZZLE_YYWW:
> > > + brw_MOV(p, dst, stride(src_0, 2, 2, 0));
> > > + break;
> > > +
> > > + case BRW_SWIZZLE_XYXY:
> > > + case BRW_SWIZZLE_ZWZW:
> > > + assert(inst->exec_size == 4);
> > > + brw_MOV(p, dst, stride(src_0, 0, 2, 1));
> > > + break;
> > > +
> > > + default:
> > > + assert(inst->force_writemask_all);
> > > + brw_set_default_exec_size(p, cvt(inst->exec_size / 4) -
> > > 1);
> > > +
> > > + for (unsigned c = 0; c < 4; c++) {
> > > + brw_inst *insn = brw_MOV(
> > > + p, stride(suboffset(dst, c),
> > > + 4 * inst->dst.stride, 1, 4 * inst-
> > > > dst.stride),
> > >
> > > + stride(suboffset(src, BRW_GET_SWZ(swiz, c)), 4,
> > > 1,
> > > 0));
> > > +
> > > + brw_inst_set_no_dd_clear(devinfo, insn, c < 3);
> > > + brw_inst_set_no_dd_check(devinfo, insn, c > 0);
> > > + }
> > > +
> > > + break;
> > > + }
> > > + }
> > > +}
> > > +
> > > void
> > > fs_generator::generate_urb_read(fs_inst *inst,
> > > struct brw_reg dst,
> > > @@ -2303,23 +2369,9 @@ fs_generator::generate_code(const cfg_t
> > > *cfg,
> > > int dispatch_width)
> > > break;
> > >
> > > case SHADER_OPCODE_QUAD_SWIZZLE:
> > > - /* This only works on 8-wide 32-bit values */
> > > - assert(inst->exec_size == 8);
> > > - assert(type_sz(src[0].type) == 4);
> > > - assert(inst->force_writemask_all);
> > > assert(src[1].file == BRW_IMMEDIATE_VALUE);
> > > assert(src[1].type == BRW_REGISTER_TYPE_UD);
> > > -
> > > - if (src[0].file == BRW_IMMEDIATE_VALUE ||
> > > - (src[0].vstride == 0 && src[0].hstride == 0)) {
> > > - /* The value is uniform across all channels */
> > > - brw_MOV(p, dst, src[0]);
> > > - } else {
> > > - brw_set_default_access_mode(p, BRW_ALIGN_16);
> > > - struct brw_reg swiz_src = stride(src[0], 4, 4, 1);
> > > - swiz_src.swizzle = inst->src[1].ud;
> > > - brw_MOV(p, dst, swiz_src);
> > > - }
> > > + generate_quad_swizzle(inst, dst, src[0], src[1].ud);
> > > break;
> > >
> > > case SHADER_OPCODE_CLUSTER_BROADCAST: {
> >
> > _______________________________________________
> > mesa-stable mailing list
> > mesa-stable at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-stable
More information about the mesa-stable
mailing list