[Mesa-dev] [PATCH] glsl: Avoid massive ralloc_strndup overhead in S-Expression parsing.

Paul Berry stereotype441 at gmail.com
Mon Jul 18 11:52:41 PDT 2011


On 15 July 2011 03:16, Kenneth Graunke <kenneth at whitecape.org> wrote:
> When parsing S-Expressions, we need to store nul-terminated strings for
> Symbol nodes.  Prior to this patch, we called ralloc_strndup each time
> we constructed a new s_symbol.  It turns out that this is obscenely
> expensive.
>
> Instead, copy the whole buffer before parsing and overwrite it to
> contain \0 bytes at the appropriate locations.  Since atoms are
> separated by whitespace, (), or ;, we can safely overwrite the character
> after a Symbol.  While much of the buffer may be unused, copying the
> whole buffer is simple and guaranteed to provide enough space.
>
> Prior to this, running piglit-run.py -t glsl tests/quick.tests with GLSL
> 1.30 enabled took just over 10 minutes on my machine.  Now it takes 5.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/glsl/s_expression.cpp |   64 ++++++++++++++++++++++++++++++++------------
>  src/glsl/s_expression.h   |    2 +-
>  2 files changed, 47 insertions(+), 19 deletions(-)
>
> Seriously!  50% faster piglit runs!  I should've done this ages ago.
>
> diff --git a/src/glsl/s_expression.cpp b/src/glsl/s_expression.cpp
> index a922a50..e704a3b 100644
> --- a/src/glsl/s_expression.cpp
> +++ b/src/glsl/s_expression.cpp
> @@ -25,10 +25,13 @@
>  #include <assert.h>
>  #include "s_expression.h"
>
> -s_symbol::s_symbol(const char *tmp, size_t n)
> +s_symbol::s_symbol(const char *str, size_t n)
>  {
> -   this->str = ralloc_strndup (this, tmp, n);
> -   assert(this->str != NULL);
> +   /* Assume the given string is already nul-terminated and in memory that
> +    * will live as long as this node.
> +    */
> +   assert(str[n] == '\0');
> +   this->str = str;
>  }
>
>  s_list::s_list()
> @@ -36,22 +39,26 @@ s_list::s_list()
>  }
>
>  static void
> -skip_whitespace(const char *& src)
> +skip_whitespace(const char *&src, char *&symbol_buffer)
>  {
> -   src += strspn(src, " \v\t\r\n");
> +   size_t n = strspn(src, " \v\t\r\n");
> +   src += n;
> +   symbol_buffer += n;
>    /* Also skip Scheme-style comments: semi-colon 'til end of line */
>    if (src[0] == ';') {
> -      src += strcspn(src, "\n");
> -      skip_whitespace(src);
> +      n = strcspn(src, "\n");
> +      src += n;
> +      symbol_buffer += n;
> +      skip_whitespace(src, symbol_buffer);
>    }
>  }
>
>  static s_expression *
> -read_atom(void *ctx, const char *& src)
> +read_atom(void *ctx, const char *&src, char *&symbol_buffer)
>  {
>    s_expression *expr = NULL;
>
> -   skip_whitespace(src);
> +   skip_whitespace(src, symbol_buffer);
>
>    size_t n = strcspn(src, "( \v\t\r\n);");
>    if (n == 0)
> @@ -70,44 +77,65 @@ read_atom(void *ctx, const char *& src)
>         expr = new(ctx) s_int(i);
>    } else {
>       // Not a number; return a symbol.
> -      expr = new(ctx) s_symbol(src, n);
> +      symbol_buffer[n] = '\0';
> +      expr = new(ctx) s_symbol(symbol_buffer, n);
>    }
>
>    src += n;
> +   symbol_buffer += n;
>
>    return expr;
>  }
>
> -s_expression *
> -s_expression::read_expression(void *ctx, const char *&src)
> +static s_expression *
> +__read_expression(void *ctx, const char *&src, char *&symbol_buffer)
>  {
> -   assert(src != NULL);
> -
> -   s_expression *atom = read_atom(ctx, src);
> +   s_expression *atom = read_atom(ctx, src, symbol_buffer);
>    if (atom != NULL)
>       return atom;
>
> -   skip_whitespace(src);
> +   skip_whitespace(src, symbol_buffer);
>    if (src[0] == '(') {
>       ++src;
> +      ++symbol_buffer;
>
>       s_list *list = new(ctx) s_list;
>       s_expression *expr;
>
> -      while ((expr = read_expression(ctx, src)) != NULL) {
> +      while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) {
>         list->subexpressions.push_tail(expr);
>       }
> -      skip_whitespace(src);
> +      skip_whitespace(src, symbol_buffer);
>       if (src[0] != ')') {
>         printf("Unclosed expression (check your parenthesis).\n");
>         return NULL;
>       }
>       ++src;
> +      ++symbol_buffer;
>       return list;
>    }
>    return NULL;
>  }
>
> +s_expression *
> +s_expression::read_expression(void *ctx, const char *&src)
> +{
> +   assert(src != NULL);
> +
> +   /* When we encounter a Symbol, we need to save a nul-terminated copy of
> +    * the string.  However, ralloc_strndup'ing every individual Symbol is
> +    * extremely expensive.  We could avoid this by simply overwriting the
> +    * next character (guaranteed to be whitespace, parens, or semicolon) with
> +    * a nul-byte.  But overwriting non-whitespace would mess up parsing.
> +    *
> +    * So, just copy the whole buffer ahead of time.  Walk both, leaving the
> +    * original source string unmodified, and altering the copy to contain the
> +    * necessary nul-bytes whenever we encounter a symbol.
> +    */
> +   char *symbol_buffer = ralloc_strdup(ctx, src);
> +   return __read_expression(ctx, src, symbol_buffer);
> +}
> +
>  void s_int::print()
>  {
>    printf("%d", this->val);
> diff --git a/src/glsl/s_expression.h b/src/glsl/s_expression.h
> index c9dc676..642af19 100644
> --- a/src/glsl/s_expression.h
> +++ b/src/glsl/s_expression.h
> @@ -129,7 +129,7 @@ public:
>    void print();
>
>  private:
> -   char *str;
> +   const char *str;
>  };
>
>  /* Lists of expressions: (expr1 ... exprN) */
> --
> 1.7.6
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

Reviewed-by: Paul Berry <stereotype441 at gmail.com>


More information about the mesa-dev mailing list