[Mesa-dev] [PATCH] gallivm: fix opcode and function nesting

Roland Scheidegger sroland at vmware.com
Wed Jan 29 09:41:23 PST 2014


Am 28.01.2014 23:08, schrieb Zack Rusin:
> gallivm soa code supported only a single level of nesting for
> control flow opcodes (if, switch, loops...) but the d3d10 spec
> clearly states that those are nested within functions. To support
> nesting of conditionals inside functions we need to store the
> nesting data inside function contexts and keep a stack of those.
> Furthermore we make sure that if nesting for subroutines is deeper
> than 32 then we simply ignore all subsequent 'call' invocations.
Hmm I thought nesting worked just fine, except for the fact that when
using just one stack we'd have needed a much larger one. Wasn't that
true? (Not arguing about using per-function stacks, just curious.)


> 
> Signed-off-by: Zack Rusin <zackr at vmware.com>
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.h     |  72 ++---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 377 ++++++++++++++++--------
>  2 files changed, 292 insertions(+), 157 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index 4f988b8..839ab85 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -260,49 +260,51 @@ struct lp_exec_mask {
>  
>     LLVMTypeRef int_vec_type;
>  
> -   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
> -   int cond_stack_size;
> -   LLVMValueRef cond_mask;
> -
> -   /* keep track if break belongs to switch or loop */
> -   enum lp_exec_mask_break_type break_type_stack[LP_MAX_TGSI_NESTING];
> -   enum lp_exec_mask_break_type break_type;
> +   LLVMValueRef exec_mask;
>  
> -   struct {
> -      LLVMValueRef switch_val;
> -      LLVMValueRef switch_mask;
> -      LLVMValueRef switch_mask_default;
> -      boolean switch_in_default;
> -      unsigned switch_pc;
> -   } switch_stack[LP_MAX_TGSI_NESTING];
> -   int switch_stack_size;
> -   LLVMValueRef switch_val;
> +   LLVMValueRef ret_mask;
> +   LLVMValueRef cond_mask;
>     LLVMValueRef switch_mask;         /* current switch exec mask */
> -   LLVMValueRef switch_mask_default; /* reverse of switch mask used for default */
> -   boolean switch_in_default;        /* if switch exec is currently in default */
> -   unsigned switch_pc;               /* when used points to default or endswitch-1 */
> -
> -   LLVMBasicBlockRef loop_block;
>     LLVMValueRef cont_mask;
>     LLVMValueRef break_mask;
> -   LLVMValueRef break_var;
> -   struct {
> -      LLVMBasicBlockRef loop_block;
> -      LLVMValueRef cont_mask;
> -      LLVMValueRef break_mask;
> -      LLVMValueRef break_var;
> -   } loop_stack[LP_MAX_TGSI_NESTING];
> -   int loop_stack_size;
>  
> -   LLVMValueRef ret_mask;
> -   struct {
> +   struct function_ctx {
>        int pc;
>        LLVMValueRef ret_mask;
> -   } call_stack[LP_MAX_TGSI_NESTING];
> -   int call_stack_size;
>  
> -   LLVMValueRef exec_mask;
> -   LLVMValueRef loop_limiter;
> +      LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
> +      int cond_stack_size;
> +
> +      /* keep track if break belongs to switch or loop */
> +      enum lp_exec_mask_break_type break_type_stack[LP_MAX_TGSI_NESTING];
> +      enum lp_exec_mask_break_type break_type;
> +
> +      struct {
> +         LLVMValueRef switch_val;
> +         LLVMValueRef switch_mask;
> +         LLVMValueRef switch_mask_default;
> +         boolean switch_in_default;
> +         unsigned switch_pc;
> +      } switch_stack[LP_MAX_TGSI_NESTING];
> +      int switch_stack_size;
> +      LLVMValueRef switch_val;
> +      LLVMValueRef switch_mask_default; /* reverse of switch mask used for default */
> +      boolean switch_in_default;        /* if switch exec is currently in default */
> +      unsigned switch_pc;               /* when used points to default or endswitch-1 */
> +
> +      LLVMValueRef loop_limiter;
> +      LLVMBasicBlockRef loop_block;
> +      LLVMValueRef break_var;
> +      struct {
> +         LLVMBasicBlockRef loop_block;
> +         LLVMValueRef cont_mask;
> +         LLVMValueRef break_mask;
> +         LLVMValueRef break_var;
> +      } loop_stack[LP_MAX_TGSI_NESTING];
> +      int loop_stack_size;
> +
> +   } *function_stack;
> +   int function_stack_size;
>  };
>  
>  struct lp_build_tgsi_inst_list
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index f01b50c..52e1b51 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -66,6 +66,10 @@
>  #include "lp_bld_sample.h"
>  #include "lp_bld_struct.h"
>  
> +/* SM 4.0 says that subroutines can nest 32 deep and 
> + * we need one more for our main function */
> +#define LP_MAX_NUM_FUNCS 33
> +
>  #define DUMP_GS_EMITS 0
>  
>  /*
> @@ -98,38 +102,108 @@ emit_dump_reg(struct gallivm_state *gallivm,
>     lp_build_print_value(gallivm, buf, value);
>  }
>  
> +static INLINE struct function_ctx *
> +func_ctx(struct lp_exec_mask *mask)
> +{
> +   assert(mask->function_stack_size > 0);
> +   assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
> +   return &mask->function_stack[mask->function_stack_size - 1];
> +}
>  
> -static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
> +static INLINE boolean
> +mask_has_loop(struct lp_exec_mask *mask)
>  {
> -   LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
> -   LLVMBuilderRef builder = bld->gallivm->builder;
> +   int i;
> +   for (i = mask->function_stack_size - 1; i >= 0; --i) {
> +      const struct function_ctx *ctx = &mask->function_stack[i];
> +      if (ctx->loop_stack_size > 0)
> +         return TRUE;
> +   }
> +   return FALSE;
> +}
> +
> +static INLINE boolean
> +mask_has_switch(struct lp_exec_mask *mask)
> +{
> +   int i;
> +   for (i = mask->function_stack_size - 1; i >= 0; --i) {
> +      const struct function_ctx *ctx = &mask->function_stack[i];
> +      if (ctx->switch_stack_size > 0)
> +         return TRUE;
> +   }
> +   return FALSE;
> +}
> +
> +static INLINE boolean
> +mask_has_cond(struct lp_exec_mask *mask)
> +{
> +   int i;
> +   for (i = mask->function_stack_size - 1; i >= 0; --i) {
> +      const struct function_ctx *ctx = &mask->function_stack[i];
> +      if (ctx->cond_stack_size > 0)
> +         return TRUE;
> +   }
> +   return FALSE;
> +}
>  
> +
> +static void
> +lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
> +{
> +   LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
> +   LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx =  &mask->function_stack[function_idx];
> +
> +   ctx->cond_stack_size = 0;
> +   ctx->loop_stack_size = 0;
> +   ctx->switch_stack_size = 0;
> +
> +   if (function_idx == 0) {
> +      ctx->ret_mask = mask->ret_mask;
> +   }
> +
> +   ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
> +                                       int_type, "looplimiter");
> +   LLVMBuildStore(
> +      builder,
> +      LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
> +      ctx->loop_limiter);
> +}
> +
> +static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
> +{
>     mask->bld = bld;
>     mask->has_mask = FALSE;
>     mask->ret_in_main = FALSE;
> -   mask->cond_stack_size = 0;
> -   mask->loop_stack_size = 0;
> -   mask->call_stack_size = 0;
> -   mask->switch_stack_size = 0;
> +   /* For the main function */
> +   mask->function_stack_size = 1;
>  
>     mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
>     mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
>           mask->cond_mask = mask->switch_mask =
>           LLVMConstAllOnes(mask->int_vec_type);
>  
> -   mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
> +   mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
> +                                 sizeof(mask->function_stack[0]));
> +   lp_exec_mask_function_init(mask, 0);
> +}
>  
> -   LLVMBuildStore(
> -      builder,
> -      LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
> -      mask->loop_limiter);
> +static void
> +lp_exec_mask_fini(struct lp_exec_mask *mask)
> +{
> +   FREE(mask->function_stack);
>  }
>  
>  static void lp_exec_mask_update(struct lp_exec_mask *mask)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   boolean has_loop_mask = mask_has_loop(mask);
> +   boolean has_cond_mask = mask_has_cond(mask);
> +   boolean has_switch_mask = mask_has_switch(mask);
> +   boolean has_ret_mask = mask->function_stack_size > 1 ||
> +         mask->ret_in_main;
>  
> -   if (mask->loop_stack_size) {
> +   if (has_loop_mask) {
>        /*for loops we need to update the entire mask at runtime */
>        LLVMValueRef tmp;
>        assert(mask->break_mask);
> @@ -144,37 +218,40 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask)
>     } else
>        mask->exec_mask = mask->cond_mask;
>  
> -   if (mask->switch_stack_size) {
> +   if (has_switch_mask) {
>        mask->exec_mask = LLVMBuildAnd(builder,
>                                       mask->exec_mask,
>                                       mask->switch_mask,
>                                       "switchmask");
>     }
>  
> -   if (mask->call_stack_size || mask->ret_in_main) {
> +   if (has_ret_mask) {
>        mask->exec_mask = LLVMBuildAnd(builder,
>                                       mask->exec_mask,
>                                       mask->ret_mask,
>                                       "callmask");
>     }
>  
> -   mask->has_mask = (mask->cond_stack_size > 0 ||
> -                     mask->loop_stack_size > 0 ||
> -                     mask->call_stack_size > 0 ||
> -                     mask->switch_stack_size > 0 ||
> -                     mask->ret_in_main);
> +   mask->has_mask = (has_cond_mask ||
> +                     has_loop_mask ||
> +                     has_switch_mask ||
> +                     has_ret_mask);
>  }
>  
>  static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
>                                     LLVMValueRef val)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
>  
> -   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
> -   if (mask->cond_stack_size == 0) {
> +   if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
> +      ctx->cond_stack_size++;
> +      return;
> +   }
> +   if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
>        assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
>     }
> -   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
> +   ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
>     assert(LLVMTypeOf(val) == mask->int_vec_type);
>     mask->cond_mask = LLVMBuildAnd(builder,
>                                    mask->cond_mask,
> @@ -186,12 +263,15 @@ static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
>  static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
>     LLVMValueRef prev_mask;
>     LLVMValueRef inv_mask;
>  
> -   assert(mask->cond_stack_size);
> -   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
> -   if (mask->cond_stack_size == 1) {
> +   assert(ctx->cond_stack_size);
> +   if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
> +      return;
> +   prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
> +   if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
>        assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
>     }
>  
> @@ -205,43 +285,44 @@ static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
>  
>  static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
>  {
> -   assert(mask->cond_stack_size);
> -   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
> +   struct function_ctx *ctx = func_ctx(mask);
> +   assert(ctx->cond_stack_size);
> +   --ctx->cond_stack_size;
> +   if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
> +      return;
> +   mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
>     lp_exec_mask_update(mask);
>  }
>  
>  static void lp_exec_bgnloop(struct lp_exec_mask *mask)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
>  
> -   if (mask->loop_stack_size == 0) {
> -      assert(mask->loop_block == NULL);
> -      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
> -      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
> -      assert(mask->break_var == NULL);
> +   if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
> +      ++ctx->loop_stack_size;
> +      return;
>     }
>  
> -   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
> +   ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
> +      ctx->break_type;
> +   ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
>  
> -   mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
> -      mask->break_type;
> -   mask->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
> +   ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
> +   ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
> +   ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
I am confused why some assignments use the variables from ctx and some
from mask here.

> +   ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
> +   ++ctx->loop_stack_size;
>  
> -   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
> -   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
> -   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
> -   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
> -   ++mask->loop_stack_size;
> +   ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
> +   LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
>  
> -   mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
> -   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
> +   ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
>  
> -   mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
> +   LLVMBuildBr(builder, ctx->loop_block);
> +   LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
>  
> -   LLVMBuildBr(builder, mask->loop_block);
> -   LLVMPositionBuilderAtEnd(builder, mask->loop_block);
> -
> -   mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
> +   mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
>  
>     lp_exec_mask_update(mask);
>  }
> @@ -250,8 +331,9 @@ static void lp_exec_break(struct lp_exec_mask *mask,
>                            struct lp_build_tgsi_context * bld_base)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
>  
> -   if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
> +   if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
>        LLVMValueRef exec_mask = LLVMBuildNot(builder,
>                                              mask->exec_mask,
>                                              "break");
> @@ -266,15 +348,15 @@ static void lp_exec_break(struct lp_exec_mask *mask,
>                                opcode == TGSI_OPCODE_CASE);
>  
>  
> -      if (mask->switch_in_default) {
> +      if (ctx->switch_in_default) {
>           /*
>            * stop default execution but only if this is an unconditional switch.
>            * (The condition here is not perfect since dead code after break is
>            * allowed but should be sufficient since false negatives are just
>            * unoptimized - so we don't have to pre-evaluate that).
>            */
> -         if(break_always && mask->switch_pc) {
> -            bld_base->pc = mask->switch_pc;
> +         if(break_always && ctx->switch_pc) {
> +            bld_base->pc = ctx->switch_pc;
>              return;
>           }
>        }
> @@ -299,12 +381,13 @@ static void lp_exec_break_condition(struct lp_exec_mask *mask,
>                                      LLVMValueRef cond)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
>     LLVMValueRef cond_mask = LLVMBuildAnd(builder,
>                                           mask->exec_mask,
>                                           cond, "cond_mask");
>     cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
>  
> -   if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
> +   if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
>        mask->break_mask = LLVMBuildAnd(builder,
>                                        mask->break_mask,
>                                        cond_mask, "breakc_full");
> @@ -337,6 +420,7 @@ static void lp_exec_endloop(struct gallivm_state *gallivm,
>                              struct lp_exec_mask *mask)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
>     LLVMBasicBlockRef endloop;
>     LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
>     LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
> @@ -346,21 +430,27 @@ static void lp_exec_endloop(struct gallivm_state *gallivm,
>  
>     assert(mask->break_mask);
>  
> +   
> +   assert(ctx->loop_stack_size);
> +   if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
> +      --ctx->loop_stack_size;
> +      return;
> +   }
> +
>     /*
>      * Restore the cont_mask, but don't pop
>      */
> -   assert(mask->loop_stack_size);
> -   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
> +   mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
>     lp_exec_mask_update(mask);
>  
>     /*
>      * Unlike the continue mask, the break_mask must be preserved across loop
>      * iterations
>      */
> -   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
> +   LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
>  
>     /* Decrement the loop limiter */
> -   limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
> +   limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
>  
>     limiter = LLVMBuildSub(
>        builder,
> @@ -368,7 +458,7 @@ static void lp_exec_endloop(struct gallivm_state *gallivm,
>        LLVMConstInt(int_type, 1, false),
>        "");
>  
> -   LLVMBuildStore(builder, limiter, mask->loop_limiter);
> +   LLVMBuildStore(builder, limiter, ctx->loop_limiter);
>  
>     /* i1cond = (mask != 0) */
>     i1cond = LLVMBuildICmp(
> @@ -390,17 +480,18 @@ static void lp_exec_endloop(struct gallivm_state *gallivm,
>     endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
>  
>     LLVMBuildCondBr(builder,
> -                   icond, mask->loop_block, endloop);
> +                   icond, ctx->loop_block, endloop);
>  
>     LLVMPositionBuilderAtEnd(builder, endloop);
>  
> -   assert(mask->loop_stack_size);
> -   --mask->loop_stack_size;
> -   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
> -   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
> -   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
> -   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
> -   mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
> +   assert(ctx->loop_stack_size);
> +   --ctx->loop_stack_size;
> +   mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
> +   mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
> +   ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
> +   ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
> +   ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
> +         ctx->switch_stack_size];
>  
>     lp_exec_mask_update(mask);
>  }
> @@ -408,22 +499,30 @@ static void lp_exec_endloop(struct gallivm_state *gallivm,
>  static void lp_exec_switch(struct lp_exec_mask *mask,
>                             LLVMValueRef switchval)
>  {
> -   mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
> -      mask->break_type;
> -   mask->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
> +   struct function_ctx *ctx = func_ctx(mask);
> +
> +   if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
> +       ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
> +      ctx->switch_stack_size++;
> +      return;
> +   }
> +
> +   ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
> +      ctx->break_type;
> +   ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
>  
> -   mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
> -   mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
> -   mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
> -   mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
> -   mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
> -   mask->switch_stack_size++;
> +   ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
> +   ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
> +   ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
> +   ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
> +   ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
> +   ctx->switch_stack_size++;
>  
> -   mask->switch_val = switchval;
>     mask->switch_mask = LLVMConstNull(mask->int_vec_type);
Again, why does this assign the mask value and not the ctx value?
(There's more similar stuff below.)

> -   mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
> -   mask->switch_in_default = false;
> -   mask->switch_pc = 0;
> +   ctx->switch_val = switchval;
> +   ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
> +   ctx->switch_in_default = false;
> +   ctx->switch_pc = 0;
>  
>     lp_exec_mask_update(mask);
>  }
> @@ -432,44 +531,50 @@ static void lp_exec_endswitch(struct lp_exec_mask *mask,
>                                struct lp_build_tgsi_context * bld_base)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
> +
> +   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
> +      ctx->switch_stack_size--;
> +      return;
> +   }
>  
>     /* check if there's deferred default if so do it now */
> -   if (mask->switch_pc && !mask->switch_in_default) {
> +   if (ctx->switch_pc && !ctx->switch_in_default) {
>        LLVMValueRef prevmask, defaultmask;
>        unsigned tmp_pc;
> -      prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
> -      defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
> +      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
> +      defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
>        mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
> -      mask->switch_in_default = true;
> +      ctx->switch_in_default = true;
>  
>        lp_exec_mask_update(mask);
>  
> -      assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
> +      assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
>               TGSI_OPCODE_DEFAULT);
>  
>        tmp_pc = bld_base->pc;
> -      bld_base->pc = mask->switch_pc;
> +      bld_base->pc = ctx->switch_pc;
>        /*
>         * re-purpose switch_pc to point to here again, since we stop execution of
>         * the deferred default after next break.
>         */
> -      mask->switch_pc = tmp_pc - 1;
> +      ctx->switch_pc = tmp_pc - 1;
>  
>        return;
>     }
>  
> -   else if (mask->switch_pc && mask->switch_in_default) {
> -      assert(bld_base->pc == mask->switch_pc + 1);
> +   else if (ctx->switch_pc && ctx->switch_in_default) {
> +      assert(bld_base->pc == ctx->switch_pc + 1);
>     }
>  
> -   mask->switch_stack_size--;
> -   mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
> -   mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
> -   mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
> -   mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
> -   mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
> +   ctx->switch_stack_size--;
> +   mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
> +   ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
> +   ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
> +   ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
> +   ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
>  
> -   mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
> +   ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
>  
>     lp_exec_mask_update(mask);
>  }
> @@ -478,15 +583,20 @@ static void lp_exec_case(struct lp_exec_mask *mask,
>                           LLVMValueRef caseval)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
>  
>     LLVMValueRef casemask, prevmask;
>  
> +   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
> +      return;
> +   }
> +
>     /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
> -   if (!mask->switch_in_default) {
> -      prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
> -      casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
> -      mask->switch_mask_default = LLVMBuildOr(builder, casemask,
> -                                              mask->switch_mask_default, "sw_default_mask");
> +   if (!ctx->switch_in_default) {
> +      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
> +      casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
> +      ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
> +                                             ctx->switch_mask_default, "sw_default_mask");
>        casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
>        mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
>  
> @@ -506,7 +616,12 @@ static boolean default_analyse_is_last(struct lp_exec_mask *mask,
>                                         int *default_pc_start)
>  {
>     unsigned pc = bld_base->pc;
> -   unsigned curr_switch_stack = mask->switch_stack_size;
> +   struct function_ctx *ctx = func_ctx(mask);
> +   unsigned curr_switch_stack = ctx->switch_stack_size;
> +
> +   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
> +      return false;
> +   }
>  
>     /* skip over case statements which are together with default */
>     while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
> @@ -517,7 +632,7 @@ static boolean default_analyse_is_last(struct lp_exec_mask *mask,
>        unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
>        switch (opcode) {
>        case TGSI_OPCODE_CASE:
> -         if (curr_switch_stack == mask->switch_stack_size) {
> +         if (curr_switch_stack == ctx->switch_stack_size) {
>              *default_pc_start = pc - 1;
>              return false;
>           }
> @@ -526,7 +641,7 @@ static boolean default_analyse_is_last(struct lp_exec_mask *mask,
>           curr_switch_stack++;
>           break;
>        case TGSI_OPCODE_ENDSWITCH:
> -         if (curr_switch_stack == mask->switch_stack_size) {
> +         if (curr_switch_stack == ctx->switch_stack_size) {
>              *default_pc_start = pc - 1;
>              return true;
>           }
> @@ -544,10 +659,15 @@ static void lp_exec_default(struct lp_exec_mask *mask,
>                              struct lp_build_tgsi_context * bld_base)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
>  
>     int default_exec_pc;
>     boolean default_is_last;
>  
> +   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
> +      return;
> +   }
> +
>     /*
>      * This is a messy opcode, because it may not be always at the end and
>      * there can be fallthrough in and out of it.
> @@ -562,11 +682,11 @@ static void lp_exec_default(struct lp_exec_mask *mask,
>      */
>     if (default_is_last) {
>        LLVMValueRef prevmask, defaultmask;
> -      prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
> -      defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
> +      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
> +      defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
>        defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
>        mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
> -      mask->switch_in_default = true;
> +      ctx->switch_in_default = true;
>  
>        lp_exec_mask_update(mask);
>     }
> @@ -593,7 +713,7 @@ static void lp_exec_default(struct lp_exec_mask *mask,
>         * do the same as with the former case, except instead of skipping the code
>         * just execute it without updating the mask, then go back and re-execute.
>         */
> -      mask->switch_pc = bld_base->pc;
> +      ctx->switch_pc = bld_base->pc;
>        if (!ft_into) {
>           bld_base->pc = default_exec_pc;
>        }
> @@ -641,28 +761,33 @@ static void lp_exec_mask_call(struct lp_exec_mask *mask,
>                                int func,
>                                int *pc)
>  {
> -   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
> -   mask->call_stack[mask->call_stack_size].pc = *pc;
> -   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
> -   mask->call_stack_size++;
> +   if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
> +      return;
> +   }
> +
> +   lp_exec_mask_function_init(mask, mask->function_stack_size);
> +   mask->function_stack[mask->function_stack_size].pc = *pc;
> +   mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
> +   mask->function_stack_size++;
>     *pc = func;
>  }
>  
>  static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
>  {
>     LLVMBuilderRef builder = mask->bld->gallivm->builder;
> +   struct function_ctx *ctx = func_ctx(mask);
>     LLVMValueRef exec_mask;
>  
> -   if (mask->cond_stack_size == 0 &&
> -       mask->loop_stack_size == 0 &&
> -       mask->switch_stack_size == 0 &&
> -       mask->call_stack_size == 0) {
> +   if (ctx->cond_stack_size == 0 &&
> +       ctx->loop_stack_size == 0 &&
> +       ctx->switch_stack_size == 0 &&
> +       mask->function_stack_size == 1) {
>        /* returning from main() */
>        *pc = -1;
>        return;
>     }
>  
> -   if (mask->call_stack_size == 0) {
> +   if (mask->function_stack_size == 1) {
>        /*
>         * This requires special handling since we need to ensure
>         * we don't drop the mask even if we have no call stack
> @@ -688,10 +813,17 @@ static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
>  
>  static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
>  {
> -   assert(mask->call_stack_size);
> -   mask->call_stack_size--;
> -   *pc = mask->call_stack[mask->call_stack_size].pc;
> -   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
> +   struct function_ctx *ctx;
> +
> +   assert(mask->function_stack_size > 1);
> +   assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
> +
> +   ctx = func_ctx(mask);
> +   mask->function_stack_size--;
> +
> +   *pc = ctx->pc;
> +   mask->ret_mask = ctx->ret_mask;
> +
>     lp_exec_mask_update(mask);
>  }
>  
> @@ -1522,7 +1654,7 @@ emit_store_chan(
>        assert(dtype == TGSI_TYPE_FLOAT ||
>               dtype == TGSI_TYPE_UNTYPED);
>        value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
> -      /* This will give -1.0 for NaN which is probably not what we want. */
> +      /* This will give -1.0 for NaN which is probably not what we want. */
>        value = lp_build_max_ext(float_bld, value,
>                                 lp_build_const_vec(gallivm, float_bld->type, -1.0),
>                                 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
> @@ -3569,4 +3701,5 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
>        LLVMDumpModule(module);
>  
>     }
> +   lp_exec_mask_fini(&bld.exec_mask);
>  }
> 

As mentioned inline, I don't quite get when the values from mask or ctx
are used. This might well be correct as this is tricky stuff and the
diff is difficult to understand.
Otherwise this looks good to me. Would that also help when we'd switch
to not always inline all functions?

Roland


More information about the mesa-dev mailing list