[Mesa-dev] [PATCH 21/21] i965/fs: Expose arbitrary channel execution groups to the IR.
Jason Ekstrand
jason at jlekstrand.net
Tue May 24 23:14:41 UTC 2016
On Tue, May 24, 2016 at 12:18 AM, Francisco Jerez <currojerez at riseup.net>
wrote:
> This generalizes the current fs_inst::force_sechalf flag to allow
> specifying channel enable groups other than 0 or 8. At some point it
> will likely make sense to fix the vec4 generator to support arbitrary
> execution groups and then move the definition of fs_inst::group into
> backend_instruction (e.g. so we can do FP64 in the VEC4 back-end).
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 20
> ++++++++------------
> src/mesa/drivers/dri/i965/brw_fs_builder.h | 14 +++++++++++---
> src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 4 ++--
> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 7 ++++---
> src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 2 +-
> src/mesa/drivers/dri/i965/brw_ir_fs.h | 20
> +++++++++-----------
> 6 files changed, 35 insertions(+), 32 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index a59cd3c..92caeaa 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -3638,7 +3638,7 @@ fs_visitor::lower_integer_multiplication()
> mul->src[1].stride *= 2;
>
> } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
> - inst->force_sechalf) {
> + inst->group > 0) {
> /* Among other things the quarter control bits influence which
> * accumulator register is used by the hardware for
> instructions
> * that access the accumulator implicitly (e.g. MACH). A
> @@ -3655,7 +3655,7 @@ fs_visitor::lower_integer_multiplication()
> * to get the result masked correctly according to the current
> * channel enables.
> */
> - mach->force_sechalf = false;
> + mach->group = 0;
> mach->force_writemask_all = true;
> mach->dst = ibld.vgrf(inst->dst.type);
> ibld.MOV(inst->dst, mach->dst);
> @@ -3791,8 +3791,8 @@ lower_fb_write_logical_send(const fs_builder &bld,
> fs_inst *inst,
> sample_mask.stride *= 2;
>
> bld.exec_all().annotate("FB write oMask")
> - .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
> - inst->force_sechalf),
> + .MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW),
> + inst->group),
> sample_mask);
> length++;
> }
> @@ -5017,10 +5017,10 @@ fs_visitor::lower_simd_width()
> * execution size of the builder to the highest of both for now
> so
> * we're sure that both cases can be handled.
> */
> + const unsigned max_width = MAX2(inst->exec_size, lower_width);
> const fs_builder ibld = bld.at(block, inst)
> .exec_all(inst->force_writemask_all)
> - .group(MAX2(inst->exec_size,
> lower_width),
> - inst->force_sechalf);
> + .group(max_width, inst->group /
> max_width);
>
> /* Split the copies in chunks of the execution width of either
> the
> * original or the lowered instruction, whichever is lower.
> @@ -5352,12 +5352,8 @@ fs_visitor::dump_instruction(backend_instruction
> *be_inst, FILE *file)
> if (inst->force_writemask_all)
> fprintf(file, "NoMask ");
>
> - if (dispatch_width == 16 && inst->exec_size == 8) {
> - if (inst->force_sechalf)
> - fprintf(file, "2ndhalf ");
> - else
> - fprintf(file, "1sthalf ");
> - }
> + if (inst->exec_size != dispatch_width)
> + fprintf(file, "group%d ", inst->group);
>
> fprintf(file, "\n");
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h
> b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> index b50dda4..c1d13a2 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> @@ -72,7 +72,7 @@ namespace brw {
> fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
> shader(shader), block(block), cursor(inst),
> _dispatch_width(inst->exec_size),
> - _group(inst->force_sechalf ? 8 : 0),
> + _group(inst->group),
> force_writemask_all(inst->force_writemask_all)
> {
> annotation.str = inst->annotation;
> @@ -168,6 +168,15 @@ namespace brw {
> }
>
> /**
> + * Get the channel group in use.
> + */
> + unsigned
> + group() const
> + {
> + return _group;
> + }
> +
> + /**
> * Allocate a virtual register of natural vector size (one for this
> IR)
> * and SIMD width. \p n gives the amount of space to allocate in
> * dispatch_width units (which is just enough space for one logical
> @@ -353,9 +362,8 @@ namespace brw {
> assert(inst->exec_size <= 32);
> assert(inst->exec_size == dispatch_width() ||
> force_writemask_all);
> - assert(_group == 0 || _group == 8);
>
> - inst->force_sechalf = (_group == 8);
> + inst->group = _group;
> inst->force_writemask_all = force_writemask_all;
> inst->annotation = annotation.str;
> inst->ir = annotation.ir;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> index 9c39106..159bf5d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> @@ -162,7 +162,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool
> *negate)
> return a->opcode == b->opcode &&
> a->force_writemask_all == b->force_writemask_all &&
> a->exec_size == b->exec_size &&
> - a->force_sechalf == b->force_sechalf &&
> + a->group == b->group &&
> a->saturate == b->saturate &&
> a->predicate == b->predicate &&
> a->predicate_inverse == b->predicate_inverse &&
> @@ -215,7 +215,7 @@ create_copy_instr(const fs_builder &bld, fs_inst
> *inst, fs_reg src, bool negate)
> copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
> } else {
> copy = bld.MOV(inst->dst, src);
> - copy->force_sechalf = inst->force_sechalf;
> + copy->group = inst->group;
> copy->force_writemask_all = inst->force_writemask_all;
> copy->src[0].negate = negate;
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index 804c639..914ec9b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -212,7 +212,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
> if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
> msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
> else if (prog_data->dual_src_blend) {
> - if (!inst->force_sechalf)
> + if (!inst->group)
> msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
> else
> msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
>
This doesn't look right. I suppose it's correct for SIMD16-only and that
you have a follow-on patch for SIMD32 but it seems wrong. If it's an easy
one-liner, maybe just squash it in?
> @@ -1071,7 +1071,7 @@ fs_generator::generate_scratch_write(fs_inst *inst,
> struct brw_reg src)
> brw_set_default_compression(p, lower_size > 8);
>
> for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
> - brw_set_default_group(p, (inst->force_sechalf ? 8 : 0) + lower_size
> * i);
> + brw_set_default_group(p, inst->group + lower_size * i);
>
> brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
> retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
> @@ -1615,7 +1615,7 @@ fs_generator::generate_code(const cfg_t *cfg, int
> dispatch_width)
> const bool compressed =
> inst->dst.component_size(inst->exec_size) > REG_SIZE;
> brw_set_default_compression(p, compressed);
> - brw_set_default_group(p, inst->force_sechalf ? 8 : 0);
> + brw_set_default_group(p, inst->group);
>
> for (unsigned int i = 0; i < inst->sources; i++) {
> src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,
> @@ -1643,6 +1643,7 @@ fs_generator::generate_code(const cfg_t *cfg, int
> dispatch_width)
> brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
>
> assert(inst->force_writemask_all || inst->exec_size >= 8);
> + assert(inst->force_writemask_all || inst->group % inst->exec_size
> == 0);
> assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
> assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
> index 8613725..8cd897f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
> @@ -163,7 +163,7 @@ fs_visitor::opt_peephole_sel()
> /* Check that the MOVs are the right form. */
> if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||
> then_mov[i]->exec_size != else_mov[i]->exec_size ||
> - then_mov[i]->force_sechalf != else_mov[i]->force_sechalf ||
> + then_mov[i]->group != else_mov[i]->group ||
> then_mov[i]->force_writemask_all !=
> else_mov[i]->force_writemask_all ||
> then_mov[i]->is_partial_write() ||
> else_mov[i]->is_partial_write() ||
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h
> b/src/mesa/drivers/dri/i965/brw_ir_fs.h
> index cdb4464..6a93e81 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
> @@ -289,22 +289,20 @@ public:
> */
> uint8_t exec_size;
>
> + /**
> + * Channel group from the hardware execution and predication mask that
> + * should be applied to the instruction. The subset of channel enable
> + * signals (calculated from the EU control flow and predication state)
> + * given by [group, group + exec_size[ will be used to mask GRF writes
> and
>
If you're intending to indicate a half-open interval, I believe the
notation more people will be familiar with is "[group, group + exec_size)"
> + * any other side effects of the instruction.
> + */
> + uint8_t group;
> +
> bool eot:1;
> - bool force_sechalf:1;
> bool pi_noperspective:1; /**< Pixel interpolator noperspective flag
> */
> };
>
> /**
> - * Set second-half quarter control on \p inst.
> - */
> -static inline fs_inst *
> -set_sechalf(fs_inst *inst)
> -{
> - inst->force_sechalf = true;
> - return inst;
> -}
> -
> -/**
> * Make the execution of \p inst dependent on the evaluation of a possibly
> * inverted predicate.
> */
> --
> 2.7.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160524/12d69743/attachment-0001.html>
More information about the mesa-dev
mailing list