[Mesa-dev] [PATCH 21/21] i965/fs: Expose arbitrary channel execution groups to the IR.
Francisco Jerez
currojerez at riseup.net
Tue May 24 07:18:58 UTC 2016
This generalizes the current fs_inst::force_sechalf flag to allow
specifying channel enable groups other than 0 or 8. At some point it
will likely make sense to fix the vec4 generator to support arbitrary
execution groups and then move the definition of fs_inst::group into
backend_instruction (e.g. so we can do FP64 in the VEC4 back-end).
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 20 ++++++++------------
src/mesa/drivers/dri/i965/brw_fs_builder.h | 14 +++++++++++---
src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 4 ++--
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 7 ++++---
src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_ir_fs.h | 20 +++++++++-----------
6 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a59cd3c..92caeaa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3638,7 +3638,7 @@ fs_visitor::lower_integer_multiplication()
mul->src[1].stride *= 2;
} else if (devinfo->gen == 7 && !devinfo->is_haswell &&
- inst->force_sechalf) {
+ inst->group > 0) {
/* Among other things the quarter control bits influence which
* accumulator register is used by the hardware for instructions
* that access the accumulator implicitly (e.g. MACH). A
@@ -3655,7 +3655,7 @@ fs_visitor::lower_integer_multiplication()
* to get the result masked correctly according to the current
* channel enables.
*/
- mach->force_sechalf = false;
+ mach->group = 0;
mach->force_writemask_all = true;
mach->dst = ibld.vgrf(inst->dst.type);
ibld.MOV(inst->dst, mach->dst);
@@ -3791,8 +3791,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
sample_mask.stride *= 2;
bld.exec_all().annotate("FB write oMask")
- .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
- inst->force_sechalf),
+ .MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW),
+ inst->group),
sample_mask);
length++;
}
@@ -5017,10 +5017,10 @@ fs_visitor::lower_simd_width()
* execution size of the builder to the highest of both for now so
* we're sure that both cases can be handled.
*/
+ const unsigned max_width = MAX2(inst->exec_size, lower_width);
const fs_builder ibld = bld.at(block, inst)
.exec_all(inst->force_writemask_all)
- .group(MAX2(inst->exec_size, lower_width),
- inst->force_sechalf);
+ .group(max_width, inst->group / max_width);
/* Split the copies in chunks of the execution width of either the
* original or the lowered instruction, whichever is lower.
@@ -5352,12 +5352,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
if (inst->force_writemask_all)
fprintf(file, "NoMask ");
- if (dispatch_width == 16 && inst->exec_size == 8) {
- if (inst->force_sechalf)
- fprintf(file, "2ndhalf ");
- else
- fprintf(file, "1sthalf ");
- }
+ if (inst->exec_size != dispatch_width)
+ fprintf(file, "group%d ", inst->group);
fprintf(file, "\n");
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index b50dda4..c1d13a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -72,7 +72,7 @@ namespace brw {
fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
shader(shader), block(block), cursor(inst),
_dispatch_width(inst->exec_size),
- _group(inst->force_sechalf ? 8 : 0),
+ _group(inst->group),
force_writemask_all(inst->force_writemask_all)
{
annotation.str = inst->annotation;
@@ -168,6 +168,15 @@ namespace brw {
}
/**
+ * Get the channel group in use.
+ */
+ unsigned
+ group() const
+ {
+ return _group;
+ }
+
+ /**
* Allocate a virtual register of natural vector size (one for this IR)
* and SIMD width. \p n gives the amount of space to allocate in
* dispatch_width units (which is just enough space for one logical
@@ -353,9 +362,8 @@ namespace brw {
assert(inst->exec_size <= 32);
assert(inst->exec_size == dispatch_width() ||
force_writemask_all);
- assert(_group == 0 || _group == 8);
- inst->force_sechalf = (_group == 8);
+ inst->group = _group;
inst->force_writemask_all = force_writemask_all;
inst->annotation = annotation.str;
inst->ir = annotation.ir;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 9c39106..159bf5d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -162,7 +162,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
return a->opcode == b->opcode &&
a->force_writemask_all == b->force_writemask_all &&
a->exec_size == b->exec_size &&
- a->force_sechalf == b->force_sechalf &&
+ a->group == b->group &&
a->saturate == b->saturate &&
a->predicate == b->predicate &&
a->predicate_inverse == b->predicate_inverse &&
@@ -215,7 +215,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
} else {
copy = bld.MOV(inst->dst, src);
- copy->force_sechalf = inst->force_sechalf;
+ copy->group = inst->group;
copy->force_writemask_all = inst->force_writemask_all;
copy->src[0].negate = negate;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 804c639..914ec9b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -212,7 +212,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
else if (prog_data->dual_src_blend) {
- if (!inst->force_sechalf)
+ if (!inst->group)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
@@ -1071,7 +1071,7 @@ fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)
brw_set_default_compression(p, lower_size > 8);
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
- brw_set_default_group(p, (inst->force_sechalf ? 8 : 0) + lower_size * i);
+ brw_set_default_group(p, inst->group + lower_size * i);
brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
@@ -1615,7 +1615,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
const bool compressed =
inst->dst.component_size(inst->exec_size) > REG_SIZE;
brw_set_default_compression(p, compressed);
- brw_set_default_group(p, inst->force_sechalf ? 8 : 0);
+ brw_set_default_group(p, inst->group);
for (unsigned int i = 0; i < inst->sources; i++) {
src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,
@@ -1643,6 +1643,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
assert(inst->force_writemask_all || inst->exec_size >= 8);
+ assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
index 8613725..8cd897f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
@@ -163,7 +163,7 @@ fs_visitor::opt_peephole_sel()
/* Check that the MOVs are the right form. */
if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||
then_mov[i]->exec_size != else_mov[i]->exec_size ||
- then_mov[i]->force_sechalf != else_mov[i]->force_sechalf ||
+ then_mov[i]->group != else_mov[i]->group ||
then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all ||
then_mov[i]->is_partial_write() ||
else_mov[i]->is_partial_write() ||
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index cdb4464..6a93e81 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -289,22 +289,20 @@ public:
*/
uint8_t exec_size;
+ /**
+ * Channel group from the hardware execution and predication mask that
+ * should be applied to the instruction. The subset of channel enable
+ * signals (calculated from the EU control flow and predication state)
+ * given by [group, group + exec_size[ will be used to mask GRF writes and
+ * any other side effects of the instruction.
+ */
+ uint8_t group;
+
bool eot:1;
- bool force_sechalf:1;
bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */
};
/**
- * Set second-half quarter control on \p inst.
- */
-static inline fs_inst *
-set_sechalf(fs_inst *inst)
-{
- inst->force_sechalf = true;
- return inst;
-}
-
-/**
* Make the execution of \p inst dependent on the evaluation of a possibly
* inverted predicate.
*/
--
2.7.3
More information about the mesa-dev
mailing list