[Mesa-dev] [PATCH 25/26] glsl: Keep common variable names in static storage

Dylan Baker baker.dylan.c at gmail.com
Mon Jul 21 13:48:31 PDT 2014


On Monday, July 14, 2014 03:48:57 PM Ian Romanick wrote:
> From: Ian Romanick <ian.d.romanick at intel.com>
> 
> Valgrind massif results for a trimmed apitrace of dota2:
> 
>                   n        time(i)         total(B)   useful-heap(B)
> extra-heap(B)    stacks(B) Before (32-bit): 52 40,521,071,734      
> 66,157,928       61,054,870     5,103,058            0 After  (32-bit): 79
> 40,523,892,028       65,999,288       60,937,396     5,061,892            0
> 
> Before (64-bit): 48 37,089,379,412       92,630,712       85,454,539    
> 7,176,173            0 After  (64-bit): 71 37,091,183,117       92,433,072 
>      85,309,100     7,123,972            0
> 
> A real savings of 114KiB on 32-bit and 142KiB on 64-bit.
> 
> Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
> ---
>  src/glsl/.gitignore               |   1 +
>  src/glsl/Makefile.am              |  10 ++
>  src/glsl/common_variable_names.py | 220
> ++++++++++++++++++++++++++++++++++++++ src/glsl/ir.cpp                   | 
> 32 ++++--
>  src/glsl/ir.h                     |   9 +-
>  5 files changed, 256 insertions(+), 16 deletions(-)
>  create mode 100644 src/glsl/common_variable_names.py
> 
> diff --git a/src/glsl/.gitignore b/src/glsl/.gitignore
> index 43720f6..16e7e81 100644
> --- a/src/glsl/.gitignore
> +++ b/src/glsl/.gitignore
> @@ -1,3 +1,4 @@
> +get_static_name.h
>  glsl_compiler
>  glsl_lexer.cpp
>  glsl_parser.cpp
> diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
> index 00261fd..c9de9f8 100644
> --- a/src/glsl/Makefile.am
> +++ b/src/glsl/Makefile.am
> @@ -149,6 +149,15 @@ glsl_test_SOURCES = \
> 
>  glsl_test_LDADD = libglsl.la
> 
> +# I don't understand why automake recognizes that glsl_parser_extras.lo
> +# depends on glsl_parser.h, but it cannot recognize that ir.lo depends on
> +# get_static_name.h.
> +
> +ir.lo: get_static_name.h
> +
> +get_static_name.h: common_variable_names.py
> +	$(PYTHON2) $(GLSL_SRCDIR)/common_variable_names.py >
> $(GLSL_BUILDDIR)/get_static_name.h +
>  # We write our own rules for yacc and lex below. We'd rather use automake,
>  # but automake makes it especially difficult for a number of reasons:
>  #
> @@ -205,6 +214,7 @@ BUILT_SOURCES =						\
>  	glcpp/glcpp-parse.c				\
>  	glcpp/glcpp-lex.c
>  CLEANFILES =						\
> +	get_static_name.h				\
>  	glcpp/glcpp-parse.h				\
>  	glsl_parser.h					\
>  	$(BUILT_SOURCES)
> diff --git a/src/glsl/common_variable_names.py
> b/src/glsl/common_variable_names.py new file mode 100644
> index 0000000..7435a12
> --- /dev/null
> +++ b/src/glsl/common_variable_names.py
> @@ -0,0 +1,220 @@
> +# Copyright (c) 2014 Intel Corporation
> +#
> +# Permission is hereby granted, free of charge, to any person obtaining a
> +# copy of this software and associated documentation files (the
> "Software"), +# to deal in the Software without restriction, including
> without limitation +# the rights to use, copy, modify, merge, publish,
> distribute, sublicense, +# and/or sell copies of the Software, and to
> permit persons to whom the +# Software is furnished to do so, subject to
> the following conditions: +#
> +# The above copyright notice and this permission notice (including the next
> +# paragraph) shall be included in all copies or substantial portions of
> the +# Software.
> +#
> +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
> IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
> CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE
> SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE.
> +
> +common_names = [

Python style is to use all caps for constants

> +    "atomic_temp_var",
> +    "color",
> +    "diffuse",
> +    "factor",
> +    "fucxadd",
> +    "fucxmul",
> +    "gl_ClipVertex",
> +    "gl_Color",
> +    "gl_FogFragCoord",
> +    "gl_FragColor",
> +    "gl_FragData",
> +    "gl_FragCoord",
> +    "gl_FrontColor",
> +    "gl_FrontFacing",
> +    "gl_FrontSecondaryColor",
> +    "gl_in_TexCoord0",
> +    "gl_ModelViewProjectionMatrixTranspose",
> +    "gl_MultiTexCoord0",
> +    "gl_out_FragData0",
> +    "gl_out_TexCoord0",
> +    "gl_Position",
> +    "gl_TexCoord",
> +    "gl_Vertex",
> +    "io_vLink0",
> +    "oT0",
> +    "oT1",
> +    "oT2",
> +    "oT3",
> +    "oT4",
> +    "oT5",
> +    "oT6",
> +    "oT7",
> +    "pos",
> +    "r10",
> +    "r11",
> +    "r12",
> +    "sampler0",
> +    "sampler1",
> +    "sampler2",
> +    "sampler3",
> +    "sampler4",
> +    "sampler5",
> +    "sampler6",
> +    "sampler7",
> +    "sampler8",
> +    "va_r",
> +    "vcbones",
> +    "vcscreen",
> +    "vLit",
> +    "vTempPos",
> +    "v_vColor",
> +    "v_vNormal",
> +    "v_vPositionView",
> +    "v_vTexcoordLight"
> +    ]

also, generally in python you would pull the last ] in to the same level as 
the assignment, or append it to the last line. I think in this case dedenting 
would be more readable

For mako, one of the cool things is that mako is pretty flexable about how it 
deals with indentation of control flow, so it's quite valid to indent mako say 
2 spaces, and the code it generates 4 spaces, so that control flow and 
rendered lines aren't at the same level

example:
template = """
% for x in xrange(5)
  % if x % 2 == 0:
    ${x}
  % endif
% endfor
""

> +
> +template = """static const char static_names[] =
> +   "compiler_temp\\0"
> +    % for name in common_names:
> +   "${name}\\0"
> +    % endfor
> +   ;
> +
> +    % for s in sized_lists:
> +        % if len(sized_lists[s]) != 1:
> +static const ${index_type} table_${s}[] = {
> +        % for name in sized_lists[s]:
> +   ${location_dict[name]},
> +        % endfor
> +};


Python has some wonderful methods for iterating dictionaries:
I would write this as:
% for key, names in sized_lists.iteritems():
  % if len(names) != 1:
    static const ${index_type} table_${key}[] = {
	 % for name in names:
      ${location_dict[name]}
	 % endfor
};
  %endif
	...


> +
> +        % endif
> +    % endfor
> +#ifdef DEBUG
> +/**
> + * Slowly search the table for the specified string.
> + *
> + * Used for debugging the code generator.
> + */
> +static bool
> +name_really_is_not_in_static_names(const char *name)
> +{
> +   const char *ptr = static_names + strlen("compiler_temp") + 1;
> +
> +   assert(ptr[0] != 0);
> +
> +   while (ptr[0] != 0) {
> +      const unsigned len = strlen(ptr);
> +
> +      if (strcmp(name, ptr) == 0)
> +         return false;
> +
> +      ptr += len + 1;
> +   }
> +
> +   return true;
> +}
> +#endif /* DEBUG */
> +
> +/**
> + * Search the static name table for the specified name
> + *
> + * \\warning
> + * As the name implies, \\b do \\b not call this function directly. 
> Instead, + * call \\c get_static_name.
> + */
> +static const char *
> +get_static_name_do_not_call(const char *name)
> +{
> +   const unsigned len = strlen(name);
> +   const ${index_type} *table = NULL;
> +   unsigned table_size = 0;
> +
> +   assert(strcmp("compiler_temp", &static_names[0]) == 0);
> +   assert(len >= (sizeof(void *) - 1));
> +
> +   switch (len) {
> +    % for s in sized_lists:

same thing here

> +   case ${s}:
> +        % if len(sized_lists[s]) != 1:
> +      table = table_${s};
> +      table_size = ARRAY_SIZE(table_${s});
> +      break;
> +        % else:
> +      /* ${sized_lists[s][0]} */
> +      return strcmp(name,
> &static_names[${location_dict[sized_lists[s][0]]}]) == 0 +         ?
> &static_names[${location_dict[sized_lists[s][0]]}] : NULL; +        % endif
> +    % endfor
> +   default:
> +      return NULL;
> +   }
> +
> +   /* The tables are pretty small, so the extra overhead of using bsearch
> is +    * likely to be much slower than the open-coded linear search. +   
> */
> +   for (unsigned i = 0; i < table_size; i++) {
> +      if (strcmp(name, &static_names[table[i]]) == 0)
> +         return &static_names[table[i]];
> +   }
> +
> +   return NULL;
> +}
> +
> +/**
> + * Search the static name table for the specified name
> + *
> + * In debug builds, this will perform additional sanity checking.
> + */
> +static const char *
> +get_static_name(const char *name)
> +{
> +#ifdef DEBUG
> +   const char *const ptr = get_static_name_do_not_call(name);
> +
> +   if (ptr != NULL) {
> +      assert(strcmp(ptr, name) == 0);
> +      return ptr;
> +   }
> +
> +   assert(name_really_is_not_in_static_names(name));
> +   return NULL;
> +#else
> +   return get_static_name_do_not_call(name);
> +#endif
> +}
> +"""
> +
> +common_names.sort()

you could replace this with a sort in the assignment.
common_names = sorted([
	'some',
	'strings',
	'in',
	'a',
	'list',
])

> +
> +# Partiation the list of names into sublists of names with the same length
> +sized_lists = {}
> +for name in common_names:
> +    if len(name) in sized_lists:
> +        sized_lists[len(name)].append(name)
> +    else:
> +        sized_lists[len(name)] = [name]

a shorter way to write this: 

sized_lists = {}
for name in common_names:
	sized_lists.setdefault(len(name), default=[]).append(name)

> +
> +# Create a dictionary that matches each name with the location of its first
> +# character in the static string table.
> +location_dict = {}
> +base = len("compiler_temp") + 1
> +for name in common_names:
> +    location_dict[name] = base
> +    base = base + len(name) + 1

it's quite safe to combine the last two lines if you like

> +
> +if base < (1 << 8):
> +   index_type = "uint8_t"
> +elif base < (1 << 16):
> +   index_type = "uint16_t"
> +else:
> +   index_type = "uint32_t"
> +
> +from mako.template import Template
> +print Template(template).render(location_dict = location_dict,
> +                                sized_lists = sized_lists,
> +                                common_names = common_names,
> +                                index_type = index_type)

Generally you don't put spaces around the '=' in keyword arguments

you also probably want to add 'from __future__ import print_function' and use 
print(Template...)

This is good for python3 compatability

> diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
> index 4b22439..543718d 100644
> --- a/src/glsl/ir.cpp
> +++ b/src/glsl/ir.cpp
> @@ -1532,10 +1532,15 @@ ir_swizzle::variable_referenced() const
>     return this->val->variable_referenced();
>  }
> 
> +#include "get_static_name.h"
> 
> -bool ir_variable::temporaries_allocate_names = false;
> +/* Note: static_names is defined in get_static_name.h.
> + */
> +const char *const ir_variable::static_names_begin = &static_names[0];
> +const char *const ir_variable::static_names_end =
> +   &static_names[ARRAY_SIZE(static_names)];
> 
> -const char ir_variable::tmp_name[] = "compiler_temp";
> +bool ir_variable::temporaries_allocate_names = false;
> 
>  ir_variable::ir_variable(const struct glsl_type *type, const char *name,
>  			 ir_variable_mode mode)
> @@ -1546,26 +1551,31 @@ ir_variable::ir_variable(const struct glsl_type
> *type, const char *name, if (mode == ir_var_temporary &&
> !ir_variable::temporaries_allocate_names) name = NULL;
> 
> -   /* The ir_variable clone method may call this constructor with name set
> to -    * tmp_name.
> -    */
>     assert(name != NULL
> 
>            || mode == ir_var_temporary
>            || mode == ir_var_function_in
>            || mode == ir_var_function_out
>            || mode == ir_var_function_inout);
> 
> -   assert(name != ir_variable::tmp_name
> -          || mode == ir_var_temporary);
> -   if (mode == ir_var_temporary
> -       && (name == NULL || name == ir_variable::tmp_name)) {
> -      this->name = ir_variable::tmp_name;
> +
> +   /* If the supplied name is in the table of statically allocated names,
> just +    * re-use the pointer.  Don't bother searching the table for it. 
> This can +    * occur when called by the ir_variable clone method.
> +    */
> +   if (name >= ir_variable::static_names_begin
> +       && name < ir_variable::static_names_end) {
> +      this->name = name;
> +   } else if (mode == ir_var_temporary && name == NULL) {
> +      this->name = ir_variable::static_names_begin;
>     } else if (name == NULL) {
>        this->padding[0] = 0;
>        this->name = (char *) this->padding;
>     } else if (strlen(name) < sizeof(this->padding)) {
>        this->name = strcpy((char *) this->padding, name);
>     } else {
> -      this->name = ralloc_strdup(this, name);
> +      const char *const static_name = get_static_name(name);
> +
> +      this->name = (static_name == NULL)
> +         ? ralloc_strdup(this, name) : static_name;
>     }
> 
>     this->u.max_ifc_array_access = NULL;
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index 770fe60..4088f80 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -598,7 +598,8 @@ public:
> 
>     inline bool is_name_ralloced() const
>     {
> -      return this->name != ir_variable::tmp_name
> +      return !(this->name >= ir_variable::static_names_begin
> +               && this->name < ir_variable::static_names_end)
>           && this->name != (char *) this->padding;
>     }
> 
> @@ -909,10 +910,8 @@ private:
>      */
>     const glsl_type *interface_type;
> 
> -   /**
> -    * Name used for anonymous compiler temporaries
> -    */
> -   static const char tmp_name[];
> +   static const char *const static_names_begin;
> +   static const char *const static_names_end;
> 
>  public:
>     /**
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 473 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140721/34e5aada/attachment-0001.sig>


More information about the mesa-dev mailing list