[Mesa-dev] [PATCH] r600g: move barrier and end_of_program bits from output to cf struct (v2)
Dave Airlie
airlied at gmail.com
Thu Jan 30 18:11:03 PST 2014
From: Vadim Girlin <vadimgirlin at gmail.com>
v2: fix regression on r600 NOP instructions.
Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>
Signed-off-by: Dave Airlie <airlied at redhat.com>
Fix regression since eop moving
---
src/gallium/drivers/r600/eg_asm.c | 10 ++++++----
src/gallium/drivers/r600/r600_asm.c | 24 +++++++++++++-----------
src/gallium/drivers/r600/r600_asm.h | 4 ++--
src/gallium/drivers/r600/r600_shader.c | 26 +++++++++++++-------------
4 files changed, 34 insertions(+), 30 deletions(-)
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index fffc436..42e78c0 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -86,11 +86,11 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode);
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
- bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
id++;
} else if (cfop->flags & CF_STRM) {
/* MEM_STREAM instructions */
@@ -99,12 +99,12 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size);
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
- bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
id++;
} else {
/* branch, loop, call, return instructions */
@@ -118,6 +118,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
return 0;
}
+#if 0
void eg_bytecode_export_read(struct r600_bytecode *bc,
struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
{
@@ -138,3 +139,4 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
}
+#endif
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 86f79e2..7f7731c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -193,7 +193,6 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
(output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
- bc->cf_last->output.end_of_program |= output->end_of_program;
bc->cf_last->op = bc->cf_last->output.op = output->op;
bc->cf_last->output.gpr = output->gpr;
bc->cf_last->output.array_base = output->array_base;
@@ -203,7 +202,6 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
} else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
- bc->cf_last->output.end_of_program |= output->end_of_program;
bc->cf_last->op = bc->cf_last->output.op = output->op;
bc->cf_last->output.burst_count += output->burst_count;
return 0;
@@ -215,6 +213,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
return r;
bc->cf_last->op = output->op;
memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
+ bc->cf_last->barrier = 1;
return 0;
}
@@ -1532,18 +1531,18 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
} else if (cfop->flags & CF_STRM) {
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
} else {
@@ -1551,7 +1550,8 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COND(cf->cond) |
- S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+ S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+ S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
return 0;
}
@@ -1932,9 +1932,9 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
print_indent(o, 67);
fprintf(stderr, " ES:%X ", cf->output.elem_size);
- if (!cf->output.barrier)
+ if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
- if (cf->output.end_of_program)
+ if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
} else if (r600_isa_cf(cf->op)->flags & CF_STRM) {
@@ -1968,9 +1968,9 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
fprintf(stderr, " ES:%i ", cf->output.elem_size);
if (cf->output.array_size != 0xFFF)
fprintf(stderr, "AS:%i ", cf->output.array_size);
- if (!cf->output.barrier)
+ if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
- if (cf->output.end_of_program)
+ if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
} else {
@@ -2486,6 +2486,7 @@ void r600_bytecode_alu_read(struct r600_bytecode *bc,
}
}
+#if 0
void r600_bytecode_export_read(struct r600_bytecode *bc,
struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
{
@@ -2506,3 +2507,4 @@ void r600_bytecode_export_read(struct r600_bytecode *bc,
output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
}
+#endif
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 82c6c8d..3bfbcb2 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -115,7 +115,6 @@ struct r600_bytecode_output {
unsigned array_size;
unsigned comp_mask;
unsigned type;
- unsigned end_of_program;
unsigned op;
@@ -126,7 +125,6 @@ struct r600_bytecode_output {
unsigned swizzle_z;
unsigned swizzle_w;
unsigned burst_count;
- unsigned barrier;
};
struct r600_bytecode_kcache {
@@ -148,6 +146,8 @@ struct r600_bytecode_cf {
struct r600_bytecode_kcache kcache[4];
unsigned r6xx_uses_waterfall;
unsigned eg_alu_extended;
+ unsigned barrier;
+ unsigned end_of_program;
struct list_head alu;
struct list_head tex;
struct list_head vtx;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 5fd445e..32d2aa7 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -939,7 +939,6 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
output.array_base = so->output[i].dst_offset - so->output[i].start_component;
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
output.burst_count = 1;
- output.barrier = 1;
/* array_size is an upper limit for the burst_count
* with MEM_STREAM instructions */
output.array_size = 0xFFF;
@@ -1384,7 +1383,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
output[j].swizzle_z = 2;
output[j].swizzle_w = 3;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = -1;
output[j].op = CF_OP_EXPORT;
switch (ctx.type) {
@@ -1445,7 +1443,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
output[j].swizzle_z = 2;
output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].array_base = k;
output[j].op = CF_OP_EXPORT;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
@@ -1492,7 +1489,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
output[j].swizzle_z = 7;
output[j].swizzle_w = 7;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
output[j].array_base = next_pos_base;
output[j].op = CF_OP_EXPORT;
@@ -1509,7 +1505,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
output[j].swizzle_z = 7;
output[j].swizzle_w = 7;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
@@ -1526,7 +1521,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
output[j].swizzle_z = 7;
output[j].swizzle_w = 7;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
@@ -1537,11 +1531,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
/* set export done on last export of each type */
for (i = noutput - 1, output_done = 0; i >= 0; i--) {
- if (ctx.bc->chip_class < CAYMAN) {
- if (i == (noutput - 1)) {
- output[i].end_of_program = 1;
- }
- }
if (!(output_done & (1 << output[i].type))) {
output_done |= (1 << output[i].type);
output[i].op = CF_OP_EXPORT_DONE;
@@ -1555,9 +1544,20 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
goto out_err;
}
}
+
/* add program end */
- if (!use_llvm && ctx.bc->chip_class == CAYMAN)
- cm_bytecode_add_cf_end(ctx.bc);
+ if (!use_llvm) {
+ if (ctx.bc->chip_class == CAYMAN)
+ cm_bytecode_add_cf_end(ctx.bc);
+ else {
+ const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op);
+
+ if (last->flags & CF_CLAUSE)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+
+ ctx.bc->cf_last->end_of_program = 1;
+ }
+ }
/* check GPR limit - we have 124 = 128 - 4
* (4 are reserved as alu clause temporary registers) */
--
1.8.3.1
More information about the mesa-dev
mailing list