[Mesa-dev] [PATCH] r600g/sb: implement r600 gpr index workaround.
Dave Airlie
airlied at gmail.com
Mon Dec 8 22:56:57 PST 2014
From: Dave Airlie <airlied at redhat.com>
r600, rv610 and rv630 all have a bug in their GPR indexing
and how the hw inserts access to PV.
If the base index for the src is the same as the dst gpr
in a previous group, then it will use PV instead of using
the indexed gpr correctly.
The workaround is to insert a NOP when you detect this.
This is half the fix, there is also a problem where the
dst gpr is indexed and a subsequent src reads it, that
is next.
Signed-off-by: Dave Airlie <airlied at redhat.com>
---
src/gallium/drivers/r600/sb/sb_bc.h | 2 ++
src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 50 +++++++++++++++++++++-----
src/gallium/drivers/r600/sb/sb_context.cpp | 2 ++
src/gallium/drivers/r600/sb/sb_ir.h | 1 +
src/gallium/drivers/r600/sb/sb_pass.h | 5 +--
5 files changed, 50 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index d03da98..6d3dc4d 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -616,6 +616,8 @@ public:
unsigned num_slots;
bool uses_mova_gpr;
+ bool r6xx_gpr_index_workaround;
+
bool stack_workaround_8xx;
bool stack_workaround_9xx;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 3f362c4..c55d2cf 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -38,6 +38,18 @@
namespace r600_sb {
+void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
+
+ alu_group_node *g = sh.create_alu_group();
+ alu_node *a = sh.create_alu();
+
+ a->bc.set_op(ALU_OP0_NOP);
+ a->bc.last = 1;
+
+ g->push_back(a);
+ b4->insert_before(g);
+}
+
int bc_finalizer::run() {
run_on(sh.root);
@@ -211,12 +223,12 @@ void bc_finalizer::finalize_if(region_node* r) {
}
void bc_finalizer::run_on(container_node* c) {
-
+ node *prev_node = NULL;
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
node *n = *I;
if (n->is_alu_group()) {
- finalize_alu_group(static_cast<alu_group_node*>(n));
+ finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
} else {
if (n->is_alu_clause()) {
cf_node *c = static_cast<cf_node*>(n);
@@ -251,19 +263,26 @@ void bc_finalizer::run_on(container_node* c) {
if (n->is_container())
run_on(static_cast<container_node*>(n));
}
+ prev_node = n;
}
}
-void bc_finalizer::finalize_alu_group(alu_group_node* g) {
+void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
alu_node *last = NULL;
+ alu_group_node *prev_g = NULL;
+ bool add_nop = false;
+ if (prev_node && prev_node->is_alu_group()) {
+ prev_g = static_cast<alu_group_node*>(prev_node);
+ }
+ for (int i = 0; i < 5; i++)
+ g->dst_slot_regs[i] = -1;
for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
alu_node *n = static_cast<alu_node*>(*I);
unsigned slot = n->bc.slot;
-
value *d = n->dst.empty() ? NULL : n->dst[0];
-
+ bool local_nop;
if (d && d->is_special_reg()) {
assert(n->bc.op_ptr->flags & AF_MOVA);
d = NULL;
@@ -286,6 +305,7 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) {
n->bc.dst_rel = 0;
}
+ g->dst_slot_regs[slot] = n->bc.dst_gpr;
n->bc.write_mask = d != NULL;
n->bc.last = 0;
@@ -299,17 +319,24 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) {
update_ngpr(n->bc.dst_gpr);
- finalize_alu_src(g, n);
+ local_nop = finalize_alu_src(g, n, prev_g);
+ if (local_nop)
+ add_nop = true;
last = n;
}
+ if (add_nop) {
+ if (sh.get_ctx().r6xx_gpr_index_workaround) {
+ insert_rv6xx_load_ar_workaround(g);
+ }
+ }
last->bc.last = 1;
}
-void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
+bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
vvec &sv = a->src;
-
+ bool add_nop = false;
FBC_DUMP(
sblog << "finalize_alu_src: ";
dump::dump_op(a);
@@ -336,6 +363,12 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
if (!v->rel->is_const()) {
src.rel = 1;
update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
+ if (prev) {
+ for (int i = 0; i < 5; i++) {
+ if (prev->dst_slot_regs[i] == src.sel)
+ add_nop = true;
+ }
+ }
} else
src.rel = 0;
@@ -398,6 +431,7 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
while (si < 3) {
a->bc.src[si++].sel = 0;
}
+ return add_nop;
}
void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
diff --git a/src/gallium/drivers/r600/sb/sb_context.cpp b/src/gallium/drivers/r600/sb/sb_context.cpp
index 8e11428..5dba85b 100644
--- a/src/gallium/drivers/r600/sb/sb_context.cpp
+++ b/src/gallium/drivers/r600/sb/sb_context.cpp
@@ -61,6 +61,8 @@ int sb_context::init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass) {
uses_mova_gpr = is_r600() && chip != HW_CHIP_RV670;
+ r6xx_gpr_index_workaround = is_r600() && chip != HW_CHIP_RV670 && chip != HW_CHIP_RS780 && chip != HW_CHIP_RS880;
+
switch (chip) {
case HW_CHIP_RV610:
case HW_CHIP_RS780:
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h
index 711c2eb..cab097d 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -960,6 +960,7 @@ public:
return F - literals.begin();
}
+ int dst_slot_regs[5];
friend class shader;
};
diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h
index 812d14a..0346df1 100644
--- a/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/src/gallium/drivers/r600/sb/sb_pass.h
@@ -695,8 +695,9 @@ public:
void run_on(container_node *c);
- void finalize_alu_group(alu_group_node *g);
- void finalize_alu_src(alu_group_node *g, alu_node *a);
+ void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
+ void finalize_alu_group(alu_group_node *g, node *prev_node);
+ bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node);
void emit_set_grad(fetch_node* f);
void finalize_fetch(fetch_node *f);
--
2.1.0
More information about the mesa-dev
mailing list