[Mesa-dev] [PATCH] glsl: Avoid massive ralloc_strndup overhead in S-Expression parsing.
Paul Berry
stereotype441 at gmail.com
Mon Jul 18 11:52:41 PDT 2011
On 15 July 2011 03:16, Kenneth Graunke <kenneth at whitecape.org> wrote:
> When parsing S-Expressions, we need to store nul-terminated strings for
> Symbol nodes. Prior to this patch, we called ralloc_strndup each time
> we constructed a new s_symbol. It turns out that this is obscenely
> expensive.
>
> Instead, copy the whole buffer before parsing and overwrite it to
> contain \0 bytes at the appropriate locations. Since atoms are
> separated by whitespace, (), or ;, we can safely overwrite the character
> after a Symbol. While much of the buffer may be unused, copying the
> whole buffer is simple and guaranteed to provide enough space.
>
> Prior to this, running piglit-run.py -t glsl tests/quick.tests with GLSL
> 1.30 enabled took just over 10 minutes on my machine. Now it takes 5.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/glsl/s_expression.cpp | 64 ++++++++++++++++++++++++++++++++------------
> src/glsl/s_expression.h | 2 +-
> 2 files changed, 47 insertions(+), 19 deletions(-)
>
> Seriously! 50% faster piglit runs! I should've done this ages ago.
>
> diff --git a/src/glsl/s_expression.cpp b/src/glsl/s_expression.cpp
> index a922a50..e704a3b 100644
> --- a/src/glsl/s_expression.cpp
> +++ b/src/glsl/s_expression.cpp
> @@ -25,10 +25,13 @@
> #include <assert.h>
> #include "s_expression.h"
>
> -s_symbol::s_symbol(const char *tmp, size_t n)
> +s_symbol::s_symbol(const char *str, size_t n)
> {
> - this->str = ralloc_strndup (this, tmp, n);
> - assert(this->str != NULL);
> + /* Assume the given string is already nul-terminated and in memory that
> + * will live as long as this node.
> + */
> + assert(str[n] == '\0');
> + this->str = str;
> }
>
> s_list::s_list()
> @@ -36,22 +39,26 @@ s_list::s_list()
> }
>
> static void
> -skip_whitespace(const char *& src)
> +skip_whitespace(const char *&src, char *&symbol_buffer)
> {
> - src += strspn(src, " \v\t\r\n");
> + size_t n = strspn(src, " \v\t\r\n");
> + src += n;
> + symbol_buffer += n;
> /* Also skip Scheme-style comments: semi-colon 'til end of line */
> if (src[0] == ';') {
> - src += strcspn(src, "\n");
> - skip_whitespace(src);
> + n = strcspn(src, "\n");
> + src += n;
> + symbol_buffer += n;
> + skip_whitespace(src, symbol_buffer);
> }
> }
>
> static s_expression *
> -read_atom(void *ctx, const char *& src)
> +read_atom(void *ctx, const char *&src, char *&symbol_buffer)
> {
> s_expression *expr = NULL;
>
> - skip_whitespace(src);
> + skip_whitespace(src, symbol_buffer);
>
> size_t n = strcspn(src, "( \v\t\r\n);");
> if (n == 0)
> @@ -70,44 +77,65 @@ read_atom(void *ctx, const char *& src)
> expr = new(ctx) s_int(i);
> } else {
> // Not a number; return a symbol.
> - expr = new(ctx) s_symbol(src, n);
> + symbol_buffer[n] = '\0';
> + expr = new(ctx) s_symbol(symbol_buffer, n);
> }
>
> src += n;
> + symbol_buffer += n;
>
> return expr;
> }
>
> -s_expression *
> -s_expression::read_expression(void *ctx, const char *&src)
> +static s_expression *
> +__read_expression(void *ctx, const char *&src, char *&symbol_buffer)
> {
> - assert(src != NULL);
> -
> - s_expression *atom = read_atom(ctx, src);
> + s_expression *atom = read_atom(ctx, src, symbol_buffer);
> if (atom != NULL)
> return atom;
>
> - skip_whitespace(src);
> + skip_whitespace(src, symbol_buffer);
> if (src[0] == '(') {
> ++src;
> + ++symbol_buffer;
>
> s_list *list = new(ctx) s_list;
> s_expression *expr;
>
> - while ((expr = read_expression(ctx, src)) != NULL) {
> + while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) {
> list->subexpressions.push_tail(expr);
> }
> - skip_whitespace(src);
> + skip_whitespace(src, symbol_buffer);
> if (src[0] != ')') {
> printf("Unclosed expression (check your parenthesis).\n");
> return NULL;
> }
> ++src;
> + ++symbol_buffer;
> return list;
> }
> return NULL;
> }
>
> +s_expression *
> +s_expression::read_expression(void *ctx, const char *&src)
> +{
> + assert(src != NULL);
> +
> + /* When we encounter a Symbol, we need to save a nul-terminated copy of
> + * the string. However, ralloc_strndup'ing every individual Symbol is
> + * extremely expensive. We could avoid this by simply overwriting the
> + * next character (guaranteed to be whitespace, parens, or semicolon) with
> + * a nul-byte. But overwriting non-whitespace would mess up parsing.
> + *
> + * So, just copy the whole buffer ahead of time. Walk both, leaving the
> + * original source string unmodified, and altering the copy to contain the
> + * necessary nul-bytes whenever we encounter a symbol.
> + */
> + char *symbol_buffer = ralloc_strdup(ctx, src);
> + return __read_expression(ctx, src, symbol_buffer);
> +}
> +
> void s_int::print()
> {
> printf("%d", this->val);
> diff --git a/src/glsl/s_expression.h b/src/glsl/s_expression.h
> index c9dc676..642af19 100644
> --- a/src/glsl/s_expression.h
> +++ b/src/glsl/s_expression.h
> @@ -129,7 +129,7 @@ public:
> void print();
>
> private:
> - char *str;
> + const char *str;
> };
>
> /* Lists of expressions: (expr1 ... exprN) */
> --
> 1.7.6
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
Reviewed-by: Paul Berry <stereotype441 at gmail.com>
More information about the mesa-dev
mailing list