[Mesa-stable] [Mesa-dev] [PATCH 02/10] intel/fs: Implement quad swizzles on ICL+.
Francisco Jerez
currojerez at riseup.net
Mon Jan 7 19:58:07 UTC 2019
Iago Toral <itoral at igalia.com> writes:
> On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
>> Align16 is no longer a thing, so a new implementation is provided
>> using Align1 instead. Not all possible swizzles can be represented
>> as
>> a single Align1 region, but some fast paths are provided for
>> frequently used swizzles that can be represented efficiently in
>> Align1
>> mode.
>>
>> Fixes ~90 subgroup quad swap Vulkan CTS tests.
>>
>> Cc: mesa-stable at lists.freedesktop.org
>> ---
>> src/intel/compiler/brw_fs.cpp | 25 +++++++-
>> src/intel/compiler/brw_fs.h | 4 ++
>> src/intel/compiler/brw_fs_generator.cpp | 82 ++++++++++++++++++++---
>> --
>> 3 files changed, 93 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_fs.cpp
>> b/src/intel/compiler/brw_fs.cpp
>> index 2f0f0151219..97544fdf465 100644
>> --- a/src/intel/compiler/brw_fs.cpp
>> +++ b/src/intel/compiler/brw_fs.cpp
>> @@ -315,6 +315,20 @@ fs_inst::has_source_and_destination_hazard()
>> const
>> * may stomp all over it.
>> */
>> return true;
>> + case SHADER_OPCODE_QUAD_SWIZZLE:
>> + switch (src[1].ud) {
>
> Maybe it is worth adding a small comment here indicating that these are
> the cases where we implement the opcode as a single instruction and
> refer to the generator for details?
>
Yeah, fixed up locally.
>> + case BRW_SWIZZLE_XXXX:
>> + case BRW_SWIZZLE_YYYY:
>> + case BRW_SWIZZLE_ZZZZ:
>> + case BRW_SWIZZLE_WWWW:
>> + case BRW_SWIZZLE_XXZZ:
>> + case BRW_SWIZZLE_YYWW:
>> + case BRW_SWIZZLE_XYXY:
>> + case BRW_SWIZZLE_ZWZW:
>> + return false;
>> + default:
>> + return !is_uniform(src[0]);
>
> Shouldn't this be:
>
> return !is_uniform(src[0]) ||
> (devinfo->gen < 11 && type_sz(src.type) == 4);
>
> Since in that case we also implement the opcode with a single ALIGN16
> instruction.
>
Not really. Maybe you mean "!is_uniform(src[0]) &&
(devinfo->gen >= 11 || type_sz(src.type) != 4)" instead? That would be
somewhat more accurate than the expression in my patch, but
unfortunately the devinfo pointer is not available here. I wouldn't
mind plumbing it through but patch is meant for mesa-stable, and it
shouldn't affect correctness to be more strict than necessary regarding
source/destination hazards.
>> + }
>> default:
>> /* The SIMD16 compressed instruction
>> *
>> @@ -5579,9 +5593,14 @@ get_lowered_simd_width(const struct
>> gen_device_info *devinfo,
>> case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
>> return MIN2(8, inst->exec_size);
>>
>> - case SHADER_OPCODE_QUAD_SWIZZLE:
>> - return 8;
>> -
>> + case SHADER_OPCODE_QUAD_SWIZZLE: {
>> + const unsigned swiz = inst->src[1].ud;
>> + return (is_uniform(inst->src[0]) ?
>> + get_fpu_lowered_simd_width(devinfo, inst) :
>> + devinfo->gen < 11 && type_sz(inst->src[0].type) == 4 ?
>> 8 :
>> + swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ?
>> 4 :
>> + get_fpu_lowered_simd_width(devinfo, inst));
>> + }
>> case SHADER_OPCODE_MOV_INDIRECT: {
>> /* From IVB and HSW PRMs:
>> *
>> diff --git a/src/intel/compiler/brw_fs.h
>> b/src/intel/compiler/brw_fs.h
>> index 53d9b6ce7bf..dc36ecc21ac 100644
>> --- a/src/intel/compiler/brw_fs.h
>> +++ b/src/intel/compiler/brw_fs.h
>> @@ -480,6 +480,10 @@ private:
>> struct brw_reg src,
>> struct brw_reg idx);
>>
>> + void generate_quad_swizzle(const fs_inst *inst,
>> + struct brw_reg dst, struct brw_reg
>> src,
>> + unsigned swiz);
>> +
>> bool patch_discard_jumps_to_fb_writes();
>>
>> const struct brw_compiler *compiler;
>> diff --git a/src/intel/compiler/brw_fs_generator.cpp
>> b/src/intel/compiler/brw_fs_generator.cpp
>> index 08dd83dded7..84627e83132 100644
>> --- a/src/intel/compiler/brw_fs_generator.cpp
>> +++ b/src/intel/compiler/brw_fs_generator.cpp
>> @@ -582,6 +582,72 @@ fs_generator::generate_shuffle(fs_inst *inst,
>> }
>> }
>>
>> +void
>> +fs_generator::generate_quad_swizzle(const fs_inst *inst,
>> + struct brw_reg dst, struct
>> brw_reg src,
>> + unsigned swiz)
>> +{
>> + /* Requires a quad. */
>> + assert(inst->exec_size >= 4);
>> +
>> + if (src.file == BRW_IMMEDIATE_VALUE ||
>> + has_scalar_region(src)) {
>> + /* The value is uniform across all channels */
>> + brw_MOV(p, dst, src);
>> +
>> + } else if (devinfo->gen < 11 && type_sz(src.type) == 4) {
>> + /* This only works on 8-wide 32-bit values */
>> + assert(inst->exec_size == 8);
>> + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
>> + assert(src.vstride == src.width + 1);
>> + brw_set_default_access_mode(p, BRW_ALIGN_16);
>> + struct brw_reg swiz_src = stride(src, 4, 4, 1);
>> + swiz_src.swizzle = swiz;
>> + brw_MOV(p, dst, swiz_src);
>> +
>
> Extra blank line.
>
>> + } else {
>> + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
>> + assert(src.vstride == src.width + 1);
>> + const struct brw_reg src_0 = suboffset(src, BRW_GET_SWZ(swiz,
>> 0));
>> +
>> + switch (swiz) {
>> + case BRW_SWIZZLE_XXXX:
>> + case BRW_SWIZZLE_YYYY:
>> + case BRW_SWIZZLE_ZZZZ:
>> + case BRW_SWIZZLE_WWWW:
>> + brw_MOV(p, dst, stride(src_0, 4, 4, 0));
>> + break;
>> +
>> + case BRW_SWIZZLE_XXZZ:
>> + case BRW_SWIZZLE_YYWW:
>> + brw_MOV(p, dst, stride(src_0, 2, 2, 0));
>> + break;
>> +
>> + case BRW_SWIZZLE_XYXY:
>> + case BRW_SWIZZLE_ZWZW:
>> + assert(inst->exec_size == 4);
>> + brw_MOV(p, dst, stride(src_0, 0, 2, 1));
>> + break;
>> +
>> + default:
>> + assert(inst->force_writemask_all);
>> + brw_set_default_exec_size(p, cvt(inst->exec_size / 4) - 1);
>> +
>> + for (unsigned c = 0; c < 4; c++) {
>> + brw_inst *insn = brw_MOV(
>> + p, stride(suboffset(dst, c),
>> + 4 * inst->dst.stride, 1, 4 * inst-
>> >dst.stride),
>> + stride(suboffset(src, BRW_GET_SWZ(swiz, c)), 4, 1,
>> 0));
>> +
>> + brw_inst_set_no_dd_clear(devinfo, insn, c < 3);
>> + brw_inst_set_no_dd_check(devinfo, insn, c > 0);
>> + }
>> +
>> + break;
>> + }
>> + }
>> +}
>> +
>> void
>> fs_generator::generate_urb_read(fs_inst *inst,
>> struct brw_reg dst,
>> @@ -2303,23 +2369,9 @@ fs_generator::generate_code(const cfg_t *cfg,
>> int dispatch_width)
>> break;
>>
>> case SHADER_OPCODE_QUAD_SWIZZLE:
>> - /* This only works on 8-wide 32-bit values */
>> - assert(inst->exec_size == 8);
>> - assert(type_sz(src[0].type) == 4);
>> - assert(inst->force_writemask_all);
>> assert(src[1].file == BRW_IMMEDIATE_VALUE);
>> assert(src[1].type == BRW_REGISTER_TYPE_UD);
>> -
>> - if (src[0].file == BRW_IMMEDIATE_VALUE ||
>> - (src[0].vstride == 0 && src[0].hstride == 0)) {
>> - /* The value is uniform across all channels */
>> - brw_MOV(p, dst, src[0]);
>> - } else {
>> - brw_set_default_access_mode(p, BRW_ALIGN_16);
>> - struct brw_reg swiz_src = stride(src[0], 4, 4, 1);
>> - swiz_src.swizzle = inst->src[1].ud;
>> - brw_MOV(p, dst, swiz_src);
>> - }
>> + generate_quad_swizzle(inst, dst, src[0], src[1].ud);
>> break;
>>
>> case SHADER_OPCODE_CLUSTER_BROADCAST: {
>
> _______________________________________________
> mesa-stable mailing list
> mesa-stable at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-stable/attachments/20190107/408f5ef4/attachment.sig>
More information about the mesa-stable
mailing list