[Mesa-dev] [PATCH 1/2] nir: Use alloca instead of variable length arrays.

Fri Feb 27 06:04:47 PST 2015

On 26/02/15 18:07, Brian Paul wrote:
> On 02/26/2015 09:51 AM, Jose Fonseca wrote:
>> This is to enable the code to build with -Werror=vla in the short term,
>> and enable the code to build with MSVC2013 soon after.
>> ---
>>   include/c99_alloca.h                 | 45
>> ++++++++++++++++++++++++++++++++++++
>>   src/glsl/nir/nir_from_ssa.c          | 19 +++++++--------
>>   src/glsl/nir/nir_live_variables.c    |  5 ++--
>>   src/glsl/nir/nir_lower_vars_to_ssa.c | 13 +++++++----
>>   4 files changed, 66 insertions(+), 16 deletions(-)
>>   create mode 100644 include/c99_alloca.h
>>
>> diff --git a/include/c99_alloca.h b/include/c99_alloca.h
>> new file mode 100644
>> index 0000000..6d96d06
>> --- /dev/null
>> +++ b/include/c99_alloca.h
>> @@ -0,0 +1,45 @@
>> +/**************************************************************************
>>
>> + *
>> + * Copyright 2015 VMware, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person
>> obtaining a
>> + * copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial
>> portions
>> + * of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS
>> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>> NON-INFRINGEMENT.
>> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
>> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
>> CONTRACT,
>> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> +
>> **************************************************************************/
>>
>> +
>> +#ifndef _C99_ALLOCA_H_
>> +#define _C99_ALLOCA_H_
>> +
>> +
>> +#if defined(_MSC_VER)
>> +
>> +#  include <malloc.h>
>> +
>> +#  define alloca _alloca
>> +
>> +#else /* !defined(_MSC_VER) */
>> +
>> +#  include <alloca.h>
>> +
>> +#endif /* !defined(_MSC_VER) */
>> +
>> +
>> +#endif
>> diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
>> index c695c95..66339f3 100644
>> --- a/src/glsl/nir/nir_from_ssa.c
>> +++ b/src/glsl/nir/nir_from_ssa.c
>> @@ -26,6 +26,7 @@
>>    */
>>
>>   #include "nir.h"
>> +#include "c99_alloca.h"
>>
>>   /*
>>    * This file implements an out-of-SSA pass as described in "Revisiting
>> @@ -181,7 +182,7 @@ merge_merge_sets(merge_set *a, merge_set *b)
>>   static bool
>>   merge_sets_interfere(merge_set *a, merge_set *b)
>>   {
>> -   merge_node *dom[a->size + b->size];
>> +   merge_node **dom = alloca((a->size + b->size) * sizeof *dom);
>>      int dom_idx = -1;
>>
>>      struct exec_node *an = exec_list_get_head(&a->nodes);
>> @@ -673,21 +674,21 @@ resolve_parallel_copy(nir_parallel_copy_instr
>> *pcopy,
>>      }
>>
>>      /* The register/source corresponding to the given index */
>> -   nir_src values[num_copies * 2];
>> -   memset(values, 0, sizeof values);
>> +   nir_src *values = alloca(num_copies * 2 * sizeof *values);
>> +   memset(values, 0, num_copies * 2 * sizeof *values);
>>
>>      /* The current location of a given piece of data */
>> -   int loc[num_copies * 2];
>> +   int *loc = alloca(num_copies * 2 * sizeof *loc);
>>
>>      /* The piece of data that the given piece of data is to be copied
>> from */
>> -   int pred[num_copies * 2];
>> +   int *pred = alloca(num_copies * 2 * sizeof *pred);
>>
>>      /* Initialize loc and pred.  We will use -1 for "null" */
>> -   memset(loc, -1, sizeof loc);
>> -   memset(pred, -1, sizeof pred);
>> +   memset(loc, -1, num_copies * 2 * sizeof *loc);
>> +   memset(pred, -1, num_copies * 2 * sizeof *pred);
>>
>>      /* The destinations we have yet to properly fill */
>> -   int to_do[num_copies * 2];
>> +   int *to_do = alloca(num_copies * 2 * sizeof *to_do);
>>      int to_do_idx = -1;
>>
>>      /* Now we set everything up:
>> @@ -737,7 +738,7 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
>>      }
>>
>>      /* Currently empty destinations we can go ahead and fill */
>> -   int ready[num_copies * 2];
>> +   int *ready = alloca(num_copies * 2 * sizeof *ready);
>>      int ready_idx = -1;
>>
>>      /* Mark the ones that are ready for copying.  We know an index is a
>> diff --git a/src/glsl/nir/nir_live_variables.c
>> b/src/glsl/nir/nir_live_variables.c
>> index 7402dc0..b57ca3a 100644
>> --- a/src/glsl/nir/nir_live_variables.c
>> +++ b/src/glsl/nir/nir_live_variables.c
>> @@ -26,6 +26,7 @@
>>
>>   #include "nir.h"
>>   #include "nir_worklist.h"
>> +#include "c99_alloca.h"
>>
>>   /*
>>    * Basic liveness analysis.  This works only in SSA form.
>> @@ -130,8 +131,8 @@ static bool
>>   propagate_across_edge(nir_block *pred, nir_block *succ,
>>                         struct live_variables_state *state)
>>   {
>> -   BITSET_WORD live[state->bitset_words];
>> -   memcpy(live, succ->live_in, sizeof live);
>> +   BITSET_WORD *live = alloca(state->bitset_words * sizeof *live);
>> +   memcpy(live, succ->live_in, state->bitset_words * sizeof *live);
>>
>>      nir_foreach_instr(succ, instr) {
>>         if (instr->type != nir_instr_type_phi)
>> diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c
>> b/src/glsl/nir/nir_lower_vars_to_ssa.c
>> index 8af7530..f54d1b7 100644
>> --- a/src/glsl/nir/nir_lower_vars_to_ssa.c
>> +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
>> @@ -27,6 +27,9 @@
>>
>>   #include "nir.h"
>>
>> +#include "c99_alloca.h"
>> +
>> +
>>   struct deref_node {
>>      struct deref_node *parent;
>>      const struct glsl_type *type;
>> @@ -899,8 +902,8 @@ rename_variables_block(nir_block *block, struct
>> lower_variables_state *state)
>>   static void
>>   insert_phi_nodes(struct lower_variables_state *state)
>>   {
>> -   unsigned work[state->impl->num_blocks];
>> -   unsigned has_already[state->impl->num_blocks];
>> +   unsigned *work = alloca(state->impl->num_blocks * sizeof *work);
>> +   unsigned *has_already = alloca(state->impl->num_blocks * sizeof
>> *has_already);
>>
>>      /*
>>       * Since the work flags already prevent us from inserting a node
>> that has
>> @@ -910,10 +913,10 @@ insert_phi_nodes(struct lower_variables_state
>> *state)
>>       * function. So all we need to handle W is an array and a pointer
>> to the
>>       * next element to be inserted and the next element to be removed.
>>       */
>> -   nir_block *W[state->impl->num_blocks];
>> +   nir_block **W = alloca(state->impl->num_blocks * sizeof *W);
>>
>> -   memset(work, 0, sizeof work);
>> -   memset(has_already, 0, sizeof has_already);
>> +   memset(work, 0, state->impl->num_blocks * sizeof *work);
>> +   memset(has_already, 0, state->impl->num_blocks * sizeof
>> *has_already);
>>
>>      unsigned w_start, w_end;
>>      unsigned iter_count = 0;
>>
>
> Looks OK to me.
>
> One thing I might have done would be instead of:
>
> unsigned *work = alloca(state->impl->num_blocks * sizeof *work);
> ...
> memset(work, 0, state->impl->num_blocks * sizeof *work);
>
> do
>
> const int work_size = state->impl->num_blocks * sizeof *work;
> unsigned *work = alloca(work_size);
 > ...
 > memset(work, 0, work_size);

It's not a bad idea.  The snafu is that `work` must be declared before 
word_size so that `sizeof *work` is valid.

That or we must use the actual type of `*work`, but that is more 
dangerous as it's very easy to get it wrong, particularly when dealing 
with pointers of pointers.

So this would need to look like

   unsigned *work;
   const size_t work_size = state->impl->num_blocks * sizeof *work;
   work = alloca(work_size);
   ...
   memset(work, 0, work_size);

I'll post this in a follow-up review.

> ...
> memset(work, 0, work_size);
>
>
> AFAIK, there's no zeroing version of alloca().

Yes, I also searched.  And unfortunately inline functions can't be used 
due to alloca semantics.  I'm not sure if there's any C-preprocessor magic.

Anyway, depending on the maximum size of these alloca arrays, we might 
actually need to use malloc, as Windows stack is 1MB by default, plus 
the OpenGL driver only gets to use whatever stack is left from the 
application.

If using C++ on NIR is acceptable, std::vector<> would be a nice 
solution overall: practically the same syntax/semantics as C99 VLAs, 
plus no risk of stack overflow.

>
> Reviewed-by: Brian Paul <brianp at vmware.com>
>

Thanks.

Jose