[Mesa-stable] [PATCH v3 01/48] intel/fs: Pass builders instead of blocks into emit_[un]zip
Andres Gomez
agomez at igalia.com
Tue Nov 14 14:33:26 UTC 2017
Jason, after checking more in detail these 3 nominated commits from
this series:
fcd4adb9d08094520fb8d118d3448b04c6ec1fd1 intel/fs: Pass builders instead of blocks into emit_[un]zip.
0d905597fe2997c89022c76cdf84dc4fba5eb055 intel/fs: Be more explicit about our placement of [un]zip.
6c00240bc650805e0b66aa6e17dbe69bbe41e446 intel/fs: Don't stomp f0.1 in SIMD16 ballot.
They seem more like doing a code refactoring than fixes for specific
problems (?)
WDYT?
On Wed, 2017-10-25 at 16:25 -0700, Jason Ekstrand wrote:
> This makes it far more explicit where we're inserting the instructions
> rather than the magic "before and after" stuff that the emit_[un]zip
> helpers did based on block and inst.
>
> Cc: mesa-stable at lists.freedesktop.org
> ---
> src/intel/compiler/brw_fs.cpp | 50 ++++++++++++++++++++++++-------------------
> 1 file changed, 28 insertions(+), 22 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 4616529..49ca58d 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -5025,8 +5025,7 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)
> * will be emitted before the given \p inst in \p block.
> */
> static fs_reg
> -emit_unzip(const fs_builder &lbld, bblock_t *block, fs_inst *inst,
> - unsigned i)
> +emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i)
> {
> /* Specified channel group from the source region. */
> const fs_reg src = horiz_offset(inst->src[i], lbld.group());
> @@ -5041,8 +5040,7 @@ emit_unzip(const fs_builder &lbld, bblock_t *block, fs_inst *inst,
> const fs_reg tmp = lbld.vgrf(inst->src[i].type, inst->components_read(i));
>
> for (unsigned k = 0; k < inst->components_read(i); ++k)
> - cbld.at(block, inst)
> - .MOV(offset(tmp, lbld, k), offset(src, inst->exec_size, k));
> + cbld.MOV(offset(tmp, lbld, k), offset(src, inst->exec_size, k));
>
> return tmp;
>
> @@ -5112,36 +5110,43 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
> * be emitted around the given \p inst in \p block.
> */
> static fs_reg
> -emit_zip(const fs_builder &lbld, bblock_t *block, fs_inst *inst)
> +emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
> + fs_inst *inst)
> {
> - /* Builder of the right width to perform the copy avoiding uninitialized
> - * data if the lowered execution size is greater than the original
> - * execution size of the instruction.
> - */
> - const fs_builder cbld = lbld.group(MIN2(lbld.dispatch_width(),
> - inst->exec_size), 0);
> + assert(lbld_before.dispatch_width() == lbld_after.dispatch_width());
> + assert(lbld_before.group() == lbld_after.group());
>
> /* Specified channel group from the destination region. */
> - const fs_reg dst = horiz_offset(inst->dst, lbld.group());
> + const fs_reg dst = horiz_offset(inst->dst, lbld_after.group());
> const unsigned dst_size = inst->size_written /
> inst->dst.component_size(inst->exec_size);
>
> - if (needs_dst_copy(lbld, inst)) {
> - const fs_reg tmp = lbld.vgrf(inst->dst.type, dst_size);
> + if (needs_dst_copy(lbld_after, inst)) {
> + const fs_reg tmp = lbld_after.vgrf(inst->dst.type, dst_size);
>
> if (inst->predicate) {
> /* Handle predication by copying the original contents of
> * the destination into the temporary before emitting the
> * lowered instruction.
> */
> - for (unsigned k = 0; k < dst_size; ++k)
> - cbld.at(block, inst)
> - .MOV(offset(tmp, lbld, k), offset(dst, inst->exec_size, k));
> + for (unsigned k = 0; k < dst_size; ++k) {
> + lbld_before.group(MIN2(lbld_before.dispatch_width(),
> + inst->exec_size), 0)
> + .MOV(offset(tmp, lbld_before, k),
> + offset(dst, inst->exec_size, k));
> + }
> }
>
> - for (unsigned k = 0; k < dst_size; ++k)
> - cbld.at(block, inst->next)
> - .MOV(offset(dst, inst->exec_size, k), offset(tmp, lbld, k));
> + for (unsigned k = 0; k < dst_size; ++k) {
> + /* Use a builder of the right width to perform the copy avoiding
> + * uninitialized data if the lowered execution size is greater than
> + * the original execution size of the instruction.
> + */
> + lbld_after.group(MIN2(lbld_after.dispatch_width(),
> + inst->exec_size), 0)
> + .MOV(offset(dst, inst->exec_size, k),
> + offset(tmp, lbld_after, k));
> + }
>
> return tmp;
>
> @@ -5197,9 +5202,10 @@ fs_visitor::lower_simd_width()
> const fs_builder lbld = ibld.group(lower_width, i);
>
> for (unsigned j = 0; j < inst->sources; j++)
> - split_inst.src[j] = emit_unzip(lbld, block, inst, j);
> + split_inst.src[j] = emit_unzip(lbld.at(block, inst), inst, j);
>
> - split_inst.dst = emit_zip(lbld, block, inst);
> + split_inst.dst = emit_zip(lbld.at(block, inst),
> + lbld.at(block, inst->next), inst);
> split_inst.size_written =
> split_inst.dst.component_size(lower_width) * dst_size;
>
--
Br,
Andres
More information about the mesa-stable
mailing list