[Mesa-dev] [PATCH] r600g: don't reserve more stack space than required v2
Vadim Girlin
vadimgirlin at gmail.com
Sun Feb 17 22:49:02 PST 2013
Overcautious stack reservation caused significant loss of performance.
v2: fix stack depth computation
Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>
---
src/gallium/drivers/r600/r600_asm.c | 4 +--
src/gallium/drivers/r600/r600_asm.h | 10 ++----
src/gallium/drivers/r600/r600_shader.c | 64 ++++++++++++----------------------
3 files changed, 27 insertions(+), 51 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 3632aa5..e891921 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1524,8 +1524,8 @@ int r600_bytecode_build(struct r600_bytecode *bc)
unsigned addr;
int i, r;
- if (bc->callstack[0].max > 0)
- bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
+ if (bc->max_stack_depth > 0)
+ bc->nstack = (bc->max_stack_depth >> 2) + 1;
if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) {
bc->nstack = 1;
}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 03cd238..93f2dc9 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -173,12 +173,6 @@ struct r600_cf_stack_entry {
};
#define SQ_MAX_CALL_DEPTH 0x00000020
-struct r600_cf_callstack {
- unsigned fc_sp_before_entry;
- int sub_desc_index;
- int current;
- int max;
-};
#define AR_HANDLE_NORMAL 0
#define AR_HANDLE_RV6XX 1 /* except RV670 */
@@ -199,8 +193,8 @@ struct r600_bytecode {
uint32_t *bytecode;
uint32_t fc_sp;
struct r600_cf_stack_entry fc_stack[32];
- unsigned call_sp;
- struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
+ int stack_depth;
+ int max_stack_depth;
unsigned ar_loaded;
unsigned ar_reg;
unsigned ar_chan;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 59a7f92..77c8c77 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -234,7 +234,7 @@ struct r600_shader_tgsi_instruction {
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
-static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
+static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
static int tgsi_else(struct r600_shader_ctx *ctx);
static int tgsi_endif(struct r600_shader_ctx *ctx);
@@ -412,7 +412,7 @@ static void llvm_if(struct r600_shader_ctx *ctx)
{
r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
fc_pushlevel(ctx, FC_IF);
- callstack_check_depth(ctx, FC_PUSH_VPM, 0);
+ callstack_push(ctx, FC_PUSH_VPM);
}
static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx)
@@ -5521,62 +5521,45 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
return 0;
}
-static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
+static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
{
switch(reason) {
case FC_PUSH_VPM:
- ctx->bc->callstack[ctx->bc->call_sp].current--;
+ ctx->bc->stack_depth--;
break;
case FC_PUSH_WQM:
case FC_LOOP:
- ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
+ ctx->bc->stack_depth -= 4;
break;
case FC_REP:
/* TOODO : for 16 vp asic should -= 2; */
- ctx->bc->callstack[ctx->bc->call_sp].current --;
+ ctx->bc->stack_depth--;
break;
}
+
+ assert(ctx->bc->stack_depth >= 0);
}
-static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
+static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
{
- if (check_max_only) {
- int diff;
- switch (reason) {
- case FC_PUSH_VPM:
- diff = 1;
- break;
- case FC_PUSH_WQM:
- diff = 4;
- break;
- default:
- assert(0);
- diff = 0;
- }
- if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
- ctx->bc->callstack[ctx->bc->call_sp].max) {
- ctx->bc->callstack[ctx->bc->call_sp].max =
- ctx->bc->callstack[ctx->bc->call_sp].current + diff;
- }
- return;
- }
+ int diff;
switch (reason) {
case FC_PUSH_VPM:
- ctx->bc->callstack[ctx->bc->call_sp].current++;
+ diff = 1;
break;
case FC_PUSH_WQM:
case FC_LOOP:
- ctx->bc->callstack[ctx->bc->call_sp].current += 4;
- break;
- case FC_REP:
- ctx->bc->callstack[ctx->bc->call_sp].current++;
+ diff = 4;
break;
+ default:
+ assert(0);
+ diff = 0;
}
- if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
- ctx->bc->callstack[ctx->bc->call_sp].max) {
- ctx->bc->callstack[ctx->bc->call_sp].max =
- ctx->bc->callstack[ctx->bc->call_sp].current;
+ ctx->bc->stack_depth += diff;
+
+ if (ctx->bc->stack_depth > ctx->bc->max_stack_depth) {
+ ctx->bc->max_stack_depth = ctx->bc->stack_depth;
}
}
@@ -5664,7 +5647,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx)
fc_pushlevel(ctx, FC_IF);
- callstack_check_depth(ctx, FC_PUSH_VPM, 0);
+ callstack_push(ctx, FC_PUSH_VPM);
return 0;
}
@@ -5694,7 +5677,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
}
fc_poplevel(ctx);
- callstack_decrease_current(ctx, FC_PUSH_VPM);
+ callstack_pop(ctx, FC_PUSH_VPM);
return 0;
}
@@ -5707,7 +5690,7 @@ static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
fc_pushlevel(ctx, FC_LOOP);
/* check stack depth */
- callstack_check_depth(ctx, FC_LOOP, 0);
+ callstack_push(ctx, FC_LOOP);
return 0;
}
@@ -5736,7 +5719,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx)
}
/* XXX add LOOPRET support */
fc_poplevel(ctx);
- callstack_decrease_current(ctx, FC_LOOP);
+ callstack_pop(ctx, FC_LOOP);
return 0;
}
@@ -5759,7 +5742,6 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
fc_set_mid(ctx, fscp);
- callstack_check_depth(ctx, FC_PUSH_VPM, 1);
return 0;
}
--
1.8.1.2
More information about the mesa-dev
mailing list