[Mesa-dev] [PATCH 2/5] i965: Turn BRW_MAX_MRF into a macro that accepts a hardware generation
Iago Toral Quiroga
itoral at igalia.com
Fri Sep 18 01:08:49 PDT 2015
There are some bug reports about shaders failing to compile in gen6
because MRF 14 is used when we need to spill. For example:
https://bugs.freedesktop.org/show_bug.cgi?id=86469
https://bugs.freedesktop.org/show_bug.cgi?id=90631
Discussion in bugzilla pointed to the fact that gen6 might actually have
24 MRF registers available instead of 16, so we could use other MRF
registers and avoid these conflicts (we still need to investigate why
some shaders need up to MRF 14 anyway, since this is not expected).
Notice that the hardware docs are not clear about this fact:
SNB PRM Vol4 Part2's "Table 5-4. MRF Registers Available in Device
Hardware" says "Number per Thread" - "24 registers"
However, SNB PRM Vol4 Part1, 1.6.1 Message Register File (MRF) says:
"Normal threads should construct their messages in m1..m15. (...)
Regardless of actual hardware implementation, the thread should
not assume th at MRF addresses above m15 wrap to legal MRF registers."
Therefore experimentation was necessary to evaluate if we had these extra
MRF registers available or not. This was tested in gen6 using MRF
registers 21..23 for spilling and doing a full piglit run (all.py) forcing
spilling of everything on the FS backend. It was also tested by doing
spilling of everything on both the FS and the VS backends with a piglit run
of shader.py. In both cases no regressions were observed. In fact, many of
these tests where helped in the cases where we forced spilling, since that
triggered the same underlying problem described in the bug reports. Here are
some results using INTEL_DEBUG=spill_fs,spill_vec4 for a shader.py run on
gen6 hardware:
Using MRFs 13..15 for spilling:
crash: 2, fail: 113, pass: 6621, skip: 5461
Using MRFs 21..23 for spilling:
crash: 2, fail: 12, pass: 6722, skip: 5461
This patch sets the ground for later patches to implement spilling
using MRF registers 21..23 in gen6.
---
src/mesa/drivers/dri/i965/brw_eu_emit.c | 6 +++---
src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++--
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 12 ++++++------
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 16 ++++++++--------
src/mesa/drivers/dri/i965/brw_ir_vec4.h | 2 +-
src/mesa/drivers/dri/i965/brw_reg.h | 2 +-
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 4 ++--
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 10 +++++-----
8 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 23a120e..6a4e316 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -147,7 +147,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
const struct brw_device_info *devinfo = p->devinfo;
if (dest.file == BRW_MESSAGE_REGISTER_FILE)
- assert(dest.nr < BRW_MAX_MRF);
+ assert(dest.nr < BRW_MAX_MRF(devinfo->gen));
else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
assert(dest.nr < 128);
@@ -311,7 +311,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
const struct brw_device_info *devinfo = p->devinfo;
if (reg.file == BRW_MESSAGE_REGISTER_FILE)
- assert(reg.nr < BRW_MAX_MRF);
+ assert(reg.nr < BRW_MAX_MRF(devinfo->gen));
else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
@@ -2485,7 +2485,7 @@ void brw_urb_WRITE(struct brw_codegen *p,
insn = next_insn(p, BRW_OPCODE_SEND);
- assert(msg_length < BRW_MAX_MRF);
+ assert(msg_length < BRW_MAX_MRF(devinfo->gen));
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b4d0567..225a312 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2795,7 +2795,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
{
int write_len = inst->regs_written;
int first_write_grf = inst->dst.reg;
- bool needs_dep[BRW_MAX_MRF];
+ bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
memset(needs_dep, false, sizeof(needs_dep));
@@ -2866,7 +2866,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
{
int write_len = inst->regs_written;
int first_write_grf = inst->dst.reg;
- bool needs_dep[BRW_MAX_MRF];
+ bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
memset(needs_dep, false, sizeof(needs_dep));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index a84cfc9..6cbd42f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -48,13 +48,13 @@ static uint32_t brw_file_from_reg(fs_reg *reg)
}
static struct brw_reg
-brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg)
+brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
{
struct brw_reg brw_reg;
switch (reg->file) {
case MRF:
- assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF);
+ assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF(gen));
/* Fallthrough */
case GRF:
if (reg->stride == 0) {
@@ -420,7 +420,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst)
brw_fb_WRITE(p,
16 /* dispatch_width */,
brw_message_reg(inst->base_mrf),
- brw_reg_from_fs_reg(inst, &inst->src[0]),
+ brw_reg_from_fs_reg(inst, &inst->src[0], devinfo->gen),
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
inst->target,
inst->mlen,
@@ -1538,7 +1538,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < inst->sources; i++) {
- src[i] = brw_reg_from_fs_reg(inst, &inst->src[i]);
+ src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen);
/* The accumulator result appears to get used for the
* conditional modifier generation. When negating a UD
@@ -1550,7 +1550,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
inst->src[i].type != BRW_REGISTER_TYPE_UD ||
!inst->src[i].negate);
}
- dst = brw_reg_from_fs_reg(inst, &inst->dst);
+ dst = brw_reg_from_fs_reg(inst, &inst->dst, devinfo->gen);
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1560,7 +1560,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
brw_set_default_acc_write_control(p, inst->writes_accumulator);
brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
- assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF);
+ assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
switch (inst->exec_size) {
case 1:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 570b4fe..21fb3de 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -478,7 +478,7 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
{
int reg_width = v->dispatch_width / 8;
- memset(mrf_used, 0, BRW_MAX_MRF * sizeof(bool));
+ memset(mrf_used, 0, BRW_MAX_MRF(v->devinfo->gen) * sizeof(bool));
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
if (inst->dst.file == MRF) {
@@ -509,11 +509,11 @@ static void
setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
int first_mrf_node, int *first_used_mrf)
{
- bool mrf_used[BRW_MAX_MRF];
+ bool mrf_used[BRW_MAX_MRF(v->devinfo->gen)];
get_used_mrfs(v, mrf_used);
- *first_used_mrf = BRW_MAX_MRF;
- for (int i = 0; i < BRW_MAX_MRF; i++) {
+ *first_used_mrf = BRW_MAX_MRF(v->devinfo->gen);
+ for (int i = 0; i < BRW_MAX_MRF(v->devinfo->gen); i++) {
/* Mark each MRF reg node as being allocated to its physical register.
*
* The alternative would be to have per-physical-register classes, which
@@ -593,7 +593,7 @@ fs_visitor::assign_regs(bool allow_spilling)
setup_payload_interference(g, payload_node_count, first_payload_node);
if (devinfo->gen >= 7) {
- int first_used_mrf = BRW_MAX_MRF;
+ int first_used_mrf = BRW_MAX_MRF(devinfo->gen);
setup_mrf_hack_interference(this, g, first_mrf_hack_node,
&first_used_mrf);
@@ -616,7 +616,7 @@ fs_visitor::assign_regs(bool allow_spilling)
* register early enough in the register file that we don't
* conflict with any used MRF hack registers.
*/
- reg -= BRW_MAX_MRF - first_used_mrf;
+ reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
ra_set_node_reg(g, inst->src[0].reg, reg);
break;
@@ -853,10 +853,10 @@ fs_visitor::spill_reg(int spill_reg)
* SIMD16 mode, because we'd stomp the FB writes.
*/
if (!spilled_any_registers) {
- bool mrf_used[BRW_MAX_MRF];
+ bool mrf_used[BRW_MAX_MRF(devinfo->gen)];
get_used_mrfs(this, mrf_used);
- for (int i = spill_base_mrf; i < BRW_MAX_MRF; i++) {
+ for (int i = spill_base_mrf; i < BRW_MAX_MRF(devinfo->gen); i++) {
if (mrf_used[i]) {
fail("Register spilling not supported with m%d used", i);
return;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 966a410..6e8b161 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -161,7 +161,7 @@ public:
const src_reg &src1 = src_reg(),
const src_reg &src2 = src_reg());
- struct brw_reg get_dst(void);
+ struct brw_reg get_dst(unsigned gen);
struct brw_reg get_src(const struct brw_vue_prog_data *prog_data, int i);
dst_reg dst;
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 06d9269..87e7e01 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -70,7 +70,7 @@ struct brw_device_info;
#define GEN7_MRF_HACK_START 112
/** Number of message register file registers */
-#define BRW_MAX_MRF 16
+#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index b49961f..4e43e5c 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -762,7 +762,7 @@ fs_instruction_scheduler::calculate_deps()
* GRF registers.
*/
schedule_node *last_grf_write[grf_count * 16];
- schedule_node *last_mrf_write[BRW_MAX_MRF];
+ schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
schedule_node *last_conditional_mod[2] = { NULL, NULL };
schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
@@ -1035,7 +1035,7 @@ void
vec4_instruction_scheduler::calculate_deps()
{
schedule_node *last_grf_write[grf_count];
- schedule_node *last_mrf_write[BRW_MAX_MRF];
+ schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
schedule_node *last_conditional_mod = NULL;
schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 0a95530..d5943d2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -34,7 +34,7 @@ extern "C" {
namespace brw {
struct brw_reg
-vec4_instruction::get_dst(void)
+vec4_instruction::get_dst(unsigned gen)
{
struct brw_reg brw_reg;
@@ -46,7 +46,7 @@ vec4_instruction::get_dst(void)
break;
case MRF:
- assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF);
+ assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(gen));
brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
brw_reg = retype(brw_reg, dst.type);
brw_reg.dw1.bits.writemask = dst.writemask;
@@ -490,7 +490,7 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, get_element_ud(inst->get_dst(), 0),
+ brw_MOV(p, get_element_ud(inst->get_dst(devinfo->gen), 0),
get_element_ud(inst->get_src(this->prog_data, 0), 0));
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
@@ -1126,7 +1126,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
for (unsigned int i = 0; i < 3; i++) {
src[i] = inst->get_src(this->prog_data, i);
}
- dst = inst->get_dst();
+ dst = inst->get_dst(devinfo->gen);
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1135,7 +1135,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
brw_set_default_mask_control(p, inst->force_writemask_all);
brw_set_default_acc_write_control(p, inst->writes_accumulator);
- assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF);
+ assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
unsigned pre_emit_nr_insn = p->nr_insn;
--
1.9.1
More information about the mesa-dev
mailing list