[Mesa-dev] [PATCH 1/2] nir: Use alloca instead of variable length arrays.

Mon Mar 2 08:02:05 PST 2015

On 27/02/15 16:15, Brian Paul wrote:
> On 02/27/2015 09:10 AM, Ian Romanick wrote:
>> On 02/26/2015 10:07 AM, Brian Paul wrote:
>>> On 02/26/2015 09:51 AM, Jose Fonseca wrote:
>>>> This is to enable the code to build with -Werror=vla in the short term,
>>>> and enable the code to build with MSVC2013 soon after.
>>>> ---
>>>>    include/c99_alloca.h                 | 45
>>>> ++++++++++++++++++++++++++++++++++++
>>>>    src/glsl/nir/nir_from_ssa.c          | 19 +++++++--------
>>>>    src/glsl/nir/nir_live_variables.c    |  5 ++--
>>>>    src/glsl/nir/nir_lower_vars_to_ssa.c | 13 +++++++----
>>>>    4 files changed, 66 insertions(+), 16 deletions(-)
>>>>    create mode 100644 include/c99_alloca.h
>>>>
>>>> diff --git a/include/c99_alloca.h b/include/c99_alloca.h
>>>> new file mode 100644
>>>> index 0000000..6d96d06
>>>> --- /dev/null
>>>> +++ b/include/c99_alloca.h
>>>> @@ -0,0 +1,45 @@
>>>> +/**************************************************************************
>>>>
>>>>
>>>> + *
>>>> + * Copyright 2015 VMware, Inc.
>>>> + * All Rights Reserved.
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person
>>>> obtaining a
>>>> + * copy of this software and associated documentation files (the
>>>> + * "Software"), to deal in the Software without restriction, including
>>>> + * without limitation the rights to use, copy, modify, merge, publish,
>>>> + * distribute, sub license, and/or sell copies of the Software, and to
>>>> + * permit persons to whom the Software is furnished to do so,
>>>> subject to
>>>> + * the following conditions:
>>>> + *
>>>> + * The above copyright notice and this permission notice (including
>>>> the
>>>> + * next paragraph) shall be included in all copies or substantial
>>>> portions
>>>> + * of the Software.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>>> EXPRESS
>>>> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>>>> NON-INFRINGEMENT.
>>>> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
>>>> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
>>>> CONTRACT,
>>>> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>>> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>> + *
>>>> +
>>>> **************************************************************************/
>>>>
>>>>
>>>> +
>>>> +#ifndef _C99_ALLOCA_H_
>>>> +#define _C99_ALLOCA_H_
>>>> +
>>>> +
>>>> +#if defined(_MSC_VER)
>>>> +
>>>> +#  include <malloc.h>
>>>> +
>>>> +#  define alloca _alloca
>>>> +
>>>> +#else /* !defined(_MSC_VER) */
>>>> +
>>>> +#  include <alloca.h>
>>>> +
>>>> +#endif /* !defined(_MSC_VER) */
>>>> +
>>>> +
>>>> +#endif
>>>> diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
>>>> index c695c95..66339f3 100644
>>>> --- a/src/glsl/nir/nir_from_ssa.c
>>>> +++ b/src/glsl/nir/nir_from_ssa.c
>>>> @@ -26,6 +26,7 @@
>>>>     */
>>>>
>>>>    #include "nir.h"
>>>> +#include "c99_alloca.h"
>>>>
>>>>    /*
>>>>     * This file implements an out-of-SSA pass as described in
>>>> "Revisiting
>>>> @@ -181,7 +182,7 @@ merge_merge_sets(merge_set *a, merge_set *b)
>>>>    static bool
>>>>    merge_sets_interfere(merge_set *a, merge_set *b)
>>>>    {
>>>> -   merge_node *dom[a->size + b->size];
>>>> +   merge_node **dom = alloca((a->size + b->size) * sizeof *dom);
>>>>       int dom_idx = -1;
>>>>
>>>>       struct exec_node *an = exec_list_get_head(&a->nodes);
>>>> @@ -673,21 +674,21 @@ resolve_parallel_copy(nir_parallel_copy_instr
>>>> *pcopy,
>>>>       }
>>>>
>>>>       /* The register/source corresponding to the given index */
>>>> -   nir_src values[num_copies * 2];
>>>> -   memset(values, 0, sizeof values);
>>>> +   nir_src *values = alloca(num_copies * 2 * sizeof *values);
>>>> +   memset(values, 0, num_copies * 2 * sizeof *values);
>>>>
>>>>       /* The current location of a given piece of data */
>>>> -   int loc[num_copies * 2];
>>>> +   int *loc = alloca(num_copies * 2 * sizeof *loc);
>>>>
>>>>       /* The piece of data that the given piece of data is to be copied
>>>> from */
>>>> -   int pred[num_copies * 2];
>>>> +   int *pred = alloca(num_copies * 2 * sizeof *pred);
>>>>
>>>>       /* Initialize loc and pred.  We will use -1 for "null" */
>>>> -   memset(loc, -1, sizeof loc);
>>>> -   memset(pred, -1, sizeof pred);
>>>> +   memset(loc, -1, num_copies * 2 * sizeof *loc);
>>>> +   memset(pred, -1, num_copies * 2 * sizeof *pred);
>>>>
>>>>       /* The destinations we have yet to properly fill */
>>>> -   int to_do[num_copies * 2];
>>>> +   int *to_do = alloca(num_copies * 2 * sizeof *to_do);
>>>>       int to_do_idx = -1;
>>>>
>>>>       /* Now we set everything up:
>>>> @@ -737,7 +738,7 @@ resolve_parallel_copy(nir_parallel_copy_instr
>>>> *pcopy,
>>>>       }
>>>>
>>>>       /* Currently empty destinations we can go ahead and fill */
>>>> -   int ready[num_copies * 2];
>>>> +   int *ready = alloca(num_copies * 2 * sizeof *ready);
>>>>       int ready_idx = -1;
>>>>
>>>>       /* Mark the ones that are ready for copying.  We know an index
>>>> is a
>>>> diff --git a/src/glsl/nir/nir_live_variables.c
>>>> b/src/glsl/nir/nir_live_variables.c
>>>> index 7402dc0..b57ca3a 100644
>>>> --- a/src/glsl/nir/nir_live_variables.c
>>>> +++ b/src/glsl/nir/nir_live_variables.c
>>>> @@ -26,6 +26,7 @@
>>>>
>>>>    #include "nir.h"
>>>>    #include "nir_worklist.h"
>>>> +#include "c99_alloca.h"
>>>>
>>>>    /*
>>>>     * Basic liveness analysis.  This works only in SSA form.
>>>> @@ -130,8 +131,8 @@ static bool
>>>>    propagate_across_edge(nir_block *pred, nir_block *succ,
>>>>                          struct live_variables_state *state)
>>>>    {
>>>> -   BITSET_WORD live[state->bitset_words];
>>>> -   memcpy(live, succ->live_in, sizeof live);
>>>> +   BITSET_WORD *live = alloca(state->bitset_words * sizeof *live);
>>>> +   memcpy(live, succ->live_in, state->bitset_words * sizeof *live);
>>>>
>>>>       nir_foreach_instr(succ, instr) {
>>>>          if (instr->type != nir_instr_type_phi)
>>>> diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c
>>>> b/src/glsl/nir/nir_lower_vars_to_ssa.c
>>>> index 8af7530..f54d1b7 100644
>>>> --- a/src/glsl/nir/nir_lower_vars_to_ssa.c
>>>> +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
>>>> @@ -27,6 +27,9 @@
>>>>
>>>>    #include "nir.h"
>>>>
>>>> +#include "c99_alloca.h"
>>>> +
>>>> +
>>>>    struct deref_node {
>>>>       struct deref_node *parent;
>>>>       const struct glsl_type *type;
>>>> @@ -899,8 +902,8 @@ rename_variables_block(nir_block *block, struct
>>>> lower_variables_state *state)
>>>>    static void
>>>>    insert_phi_nodes(struct lower_variables_state *state)
>>>>    {
>>>> -   unsigned work[state->impl->num_blocks];
>>>> -   unsigned has_already[state->impl->num_blocks];
>>>> +   unsigned *work = alloca(state->impl->num_blocks * sizeof *work);
>>>> +   unsigned *has_already = alloca(state->impl->num_blocks * sizeof
>>>> *has_already);
>>>>
>>>>       /*
>>>>        * Since the work flags already prevent us from inserting a node
>>>> that has
>>>> @@ -910,10 +913,10 @@ insert_phi_nodes(struct lower_variables_state
>>>> *state)
>>>>        * function. So all we need to handle W is an array and a pointer
>>>> to the
>>>>        * next element to be inserted and the next element to be
>>>> removed.
>>>>        */
>>>> -   nir_block *W[state->impl->num_blocks];
>>>> +   nir_block **W = alloca(state->impl->num_blocks * sizeof *W);
>>>>
>>>> -   memset(work, 0, sizeof work);
>>>> -   memset(has_already, 0, sizeof has_already);
>>>> +   memset(work, 0, state->impl->num_blocks * sizeof *work);
>>>> +   memset(has_already, 0, state->impl->num_blocks * sizeof
>>>> *has_already);
>>>>
>>>>       unsigned w_start, w_end;
>>>>       unsigned iter_count = 0;
>>>>
>>>
>>> Looks OK to me.
>>>
>>> One thing I might have done would be instead of:
>>>
>>> unsigned *work = alloca(state->impl->num_blocks * sizeof *work);
>>> ...
>>> memset(work, 0, state->impl->num_blocks * sizeof *work);
>>>
>>> do
>>>
>>> const int work_size = state->impl->num_blocks * sizeof *work;
>>> unsigned *work = alloca(work_size);
>>> ...
>>> memset(work, 0, work_size);
>>>
>>>
>>> AFAIK, there's no zeroing version of alloca().
>>
>> It occurs to me that this may be the only reasonable use-case in all
>> history of memset returning the pointer that you pass to it:
>>
>>      unsigned *work = memset(alloca(work_size), 0, work_size);
>
> That's cool!

Yes, indeed, that can save another source line.

FWIW, I also tried to prototype a few macros for dealing with VLAs:

    http://cgit.freedesktop.org/~jrfonseca/mesa/commit/?h=nir-vla

On one hand, they save quite a bit of typing, even compared with vanilla 
C99 VLAs.  On the other hand, being macros it's always a bit magical and 
tricky to read..

I only prototyped this on a single source file.  But if you like this 
direction I can move it to a header and use it on all places in NIR 
where VLAs are needed.

Let me know what you guys want to do.

Jose