[Mesa-dev] [PATCH 5/5] gallivm: implement switch opcode
sroland at vmware.com
sroland at vmware.com
Thu Apr 18 19:13:58 PDT 2013
From: Roland Scheidegger <sroland at vmware.com>
Should be able to handle all things which make this tricky to implement.
Fallthroughs, including most notably into/out of default, should be handled
correctly but are quite a mess.
If we see largely unoptimized switches in the wild should probably think
about some "real" switch optimization pass, e.g. things like this:
switch
case1
someinst
brk
case2
default
case3
someinst
brk
case4
someinst
endswitch
are legal, but the pointless case2/case3 statements not only cause condition
evaluation but will turn this into a "fake" fallthrough case (because mask for
case2 is already updated when default is encountered) requiring executing code
twice.
---
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 17 ++
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 2 +
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 320 +++++++++++++++++++-
3 files changed, 327 insertions(+), 12 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 175b6a9..a3cc76e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -253,6 +253,23 @@ struct lp_exec_mask {
int cond_stack_size;
LLVMValueRef cond_mask;
+ boolean break_type_stack[LP_MAX_TGSI_NESTING];
+ boolean break_type;
+
+ struct {
+ LLVMValueRef switch_val;
+ LLVMValueRef switch_mask;
+ LLVMValueRef switch_mask_default;
+ boolean switch_in_default;
+ unsigned switch_pc;
+ } switch_stack[LP_MAX_TGSI_NESTING];
+ int switch_stack_size;
+ LLVMValueRef switch_val;
+ LLVMValueRef switch_mask;
+ LLVMValueRef switch_mask_default;
+ boolean switch_in_default;
+ unsigned switch_pc;
+
LLVMBasicBlockRef loop_block;
LLVMValueRef cont_mask;
LLVMValueRef break_mask;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index e1c362b..dc7c090 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -865,6 +865,8 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_CASE].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 680a9c1..251b5c5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -78,9 +78,11 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context
mask->cond_stack_size = 0;
mask->loop_stack_size = 0;
mask->call_stack_size = 0;
+ mask->switch_stack_size = 0;
mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
- mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
+ mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
+ mask->cond_mask = mask->switch_mask =
LLVMConstAllOnes(mask->int_vec_type);
mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
@@ -110,6 +112,13 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask)
} else
mask->exec_mask = mask->cond_mask;
+ if (mask->switch_stack_size) {
+ mask->exec_mask = LLVMBuildAnd(builder,
+ mask->exec_mask,
+ mask->switch_mask,
+ "switchmask");
+ }
+
if (mask->call_stack_size || mask->ret_in_main) {
mask->exec_mask = LLVMBuildAnd(builder,
mask->exec_mask,
@@ -120,6 +129,7 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask)
mask->has_mask = (mask->cond_stack_size > 0 ||
mask->loop_stack_size > 0 ||
mask->call_stack_size > 0 ||
+ mask->switch_stack_size > 0 ||
mask->ret_in_main);
}
@@ -181,6 +191,10 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask)
assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
+ mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
+ mask->break_type;
+ mask->break_type = 0;
+
mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
@@ -200,16 +214,51 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask)
lp_exec_mask_update(mask);
}
-static void lp_exec_break(struct lp_exec_mask *mask)
+static void lp_exec_break(struct lp_exec_mask *mask,
+ struct lp_build_tgsi_context * bld_base)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
- LLVMValueRef exec_mask = LLVMBuildNot(builder,
- mask->exec_mask,
- "break");
- mask->break_mask = LLVMBuildAnd(builder,
- mask->break_mask,
- exec_mask, "break_full");
+ if (mask->break_type == 0) {
+ LLVMValueRef exec_mask = LLVMBuildNot(builder,
+ mask->exec_mask,
+ "break");
+
+ mask->break_mask = LLVMBuildAnd(builder,
+ mask->break_mask,
+ exec_mask, "break_full");
+ }
+ else {
+ unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
+ boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
+ opcode == TGSI_OPCODE_CASE);
+
+
+ if (mask->switch_in_default) {
+ /*
+ * stop default execution but only if this is an unconditional switch.
+ * (The condition here is not perfect since dead code after break is
+ * allowed but should be sufficient since false negatives are just
+ * unoptimized - so we don't have to pre-evaluate that).
+ */
+ if(break_always && mask->switch_pc) {
+ bld_base->pc = mask->switch_pc;
+ return;
+ }
+ }
+
+ if (break_always) {
+ mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
+ }
+ else {
+ LLVMValueRef exec_mask = LLVMBuildNot(builder,
+ mask->exec_mask,
+ "break");
+ mask->switch_mask = LLVMBuildAnd(builder,
+ mask->switch_mask,
+ exec_mask, "break_switch");
+ }
+ }
lp_exec_mask_update(mask);
}
@@ -223,9 +272,16 @@ static void lp_exec_break_condition(struct lp_exec_mask *mask,
cond, "cond_mask");
cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
- mask->break_mask = LLVMBuildAnd(builder,
- mask->break_mask,
- cond_mask, "breakc_full");
+ if (mask->break_type == 0) {
+ mask->break_mask = LLVMBuildAnd(builder,
+ mask->break_mask,
+ cond_mask, "breakc_full");
+ }
+ else {
+ mask->switch_mask = LLVMBuildAnd(builder,
+ mask->switch_mask,
+ cond_mask, "breakc_switch");
+ }
lp_exec_mask_update(mask);
}
@@ -312,10 +368,201 @@ static void lp_exec_endloop(struct gallivm_state *gallivm,
mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
+ mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_switch(struct lp_exec_mask *mask,
+ LLVMValueRef switchval)
+{
+ mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
+ mask->break_type;
+ mask->break_type = 1;
+
+ mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
+ mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
+ mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
+ mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
+ mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
+ mask->switch_stack_size++;
+
+ mask->switch_val = switchval;
+ mask->switch_mask = LLVMConstNull(mask->int_vec_type);
+ mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
+ mask->switch_in_default = false;
+ mask->switch_pc = 0;
lp_exec_mask_update(mask);
}
+static void lp_exec_endswitch(struct lp_exec_mask *mask,
+ struct lp_build_tgsi_context * bld_base)
+{
+ LLVMBuilderRef builder = mask->bld->gallivm->builder;
+
+ /* check if there's deferred default if so do it now */
+ if (mask->switch_pc && !mask->switch_in_default) {
+ LLVMValueRef prevmask, defaultmask;
+ unsigned tmp_pc;
+ prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
+ defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
+ mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
+ mask->switch_in_default = true;
+
+ lp_exec_mask_update(mask);
+
+ assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
+ TGSI_OPCODE_DEFAULT);
+
+ tmp_pc = bld_base->pc;
+ bld_base->pc = mask->switch_pc;
+ /*
+ * re-purpose switch_pc to point to here again, since we stop execution of
+ * the deferred default after next break.
+ */
+ mask->switch_pc = tmp_pc - 1;
+
+ return;
+ }
+
+ else if (mask->switch_pc && mask->switch_in_default) {
+ assert(bld_base->pc == mask->switch_pc + 1);
+ }
+
+ mask->switch_stack_size--;
+ mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
+ mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
+ mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
+ mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
+ mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
+
+ mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_case(struct lp_exec_mask *mask,
+ LLVMValueRef caseval)
+{
+ LLVMBuilderRef builder = mask->bld->gallivm->builder;
+
+ LLVMValueRef casemask, prevmask;
+
+ /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
+ if (!mask->switch_in_default) {
+ prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
+ casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
+ mask->switch_mask_default = LLVMBuildOr(builder, casemask,
+ mask->switch_mask_default, "sw_default_mask");
+ casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
+ mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
+
+ lp_exec_mask_update(mask);
+ }
+}
+
+
+static boolean default_analyse_is_last(struct lp_exec_mask *mask,
+ struct lp_build_tgsi_context * bld_base,
+ int *default_pc_start)
+{
+ unsigned pc = bld_base->pc;
+ unsigned curr_switch_stack = mask->switch_stack_size;
+
+ /* skip over case statements which are together with default */
+ while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
+ pc++;
+ }
+
+ while (pc != -1 && pc < bld_base->num_instructions) {
+ unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
+ switch (opcode) {
+ case TGSI_OPCODE_CASE:
+ if (curr_switch_stack == mask->switch_stack_size) {
+ *default_pc_start = pc - 1;
+ return false;
+ }
+ break;
+ case TGSI_OPCODE_SWITCH:
+ curr_switch_stack++;
+ break;
+ case TGSI_OPCODE_ENDSWITCH:
+ if (curr_switch_stack == mask->switch_stack_size) {
+ *default_pc_start = pc - 1;
+ return true;
+ }
+ curr_switch_stack--;
+ break;
+ }
+ pc++;
+ }
+ /* should never arrive here */
+ assert(0);
+ return true;
+}
+
+static void lp_exec_default(struct lp_exec_mask *mask,
+ struct lp_build_tgsi_context * bld_base)
+{
+ LLVMBuilderRef builder = mask->bld->gallivm->builder;
+
+ int default_exec_pc;
+ boolean default_is_last;
+
+ /*
+ * This is a messy opcode, because it may not be always at the end and
+ * there can be fallthrough in and out of it.
+ */
+
+ default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
+ /*
+ * If it is last statement in switch (note that case statements appearing
+ * "at the same time" as default don't change that) everything is just fine,
+ * update switch mask and go on. This means we can handle default with
+ * fallthrough INTO it without overhead, if it is last.
+ */
+ if (default_is_last) {
+ LLVMValueRef prevmask, defaultmask;
+ prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
+ defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
+ defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
+ mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
+ mask->switch_in_default = true;
+
+ lp_exec_mask_update(mask);
+ }
+ else {
+ /*
+ * Technically, "case" immediately before default isn't really a
+ * fallthrough, however we still have to count them as such as we
+ * already have updated the masks.
+ * If that happens in practice could add a switch optimizer pass
+ * which just gets rid of all case statements appearing together with
+ * default (or could do switch analysis at switch start time instead).
+ */
+ unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
+ boolean ft_into = (opcode != TGSI_OPCODE_BRK ||
+ opcode != TGSI_OPCODE_SWITCH);
+ /*
+ * If it is not last statement and there was no fallthrough into it,
+ * we record the PC and continue execution at next case (again, those
+ * case encountered at the same time don't count). At endswitch
+ * time, we update switchmask, and go back executing the code we skipped
+ * until the next break (possibly re-executing some code with changed mask
+ * if there was a fallthrough out of default).
+ * Finally, if it is not last statement and there was a fallthrough into it,
+ * do the same as with the former case, except instead of skipping the code
+ * just execute it without updating the mask, then go back and re-execute.
+ */
+ mask->switch_pc = bld_base->pc;
+ if (!ft_into) {
+ bld_base->pc = default_exec_pc;
+ }
+ }
+}
+
+
/* stores val into an address pointed to by dst.
* mask->exec_mask is used to figure out which bits of val
* should be stored into the address
@@ -369,6 +616,7 @@ static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
if (mask->cond_stack_size == 0 &&
mask->loop_stack_size == 0 &&
+ mask->switch_stack_size == 0 &&
mask->call_stack_size == 0) {
/* returning from main() */
*pc = -1;
@@ -2407,7 +2655,7 @@ brk_emit(
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- lp_exec_break(&bld->exec_mask);
+ lp_exec_break(&bld->exec_mask, bld_base);
}
static void
@@ -2458,6 +2706,50 @@ uif_emit(
}
static void
+case_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ lp_exec_case(&bld->exec_mask, emit_data->args[0]);
+}
+
+static void
+default_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ lp_exec_default(&bld->exec_mask, bld_base);
+}
+
+static void
+switch_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
+}
+
+static void
+endswitch_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ lp_exec_endswitch(&bld->exec_mask, bld_base);
+}
+
+static void
bgnloop_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -2794,13 +3086,16 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
@@ -2808,6 +3103,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
--
1.7.9.5
More information about the mesa-dev
mailing list