[Mesa-dev] [PATCH 03/12] glcpp: Use Bloom filter before identifier search

Vladislav Egorov vegorov180 at gmail.com
Sat Jan 7 19:02:04 UTC 2017


Absolute majority of identifiers in shaders are not macro references.
Many shaders don't use preprocessing macros at all. Almost all
queries to parser->defines hash-table will be unsuccessful. Note
that all predefined macros start either with "GL_" or with "__".
Moreover, even user-defined macros are usually use upper case, while
ordinary identifiers use lower case. It's easy to detect macros just
looking on the first two characters of their name. So it makes sense
to make a quick identifier lookup in a Bloom filter before making
relatively expensive hash-table search call.

Many schemes of Bloom are possible, but it seems that two mini 64 bit
Bloom tables -- one for the 1st identifier's char, one for the second
identifier's char -- with one trivial hash-function (6 lowest bits of
the char) work very well. Considering that Latin-1 alphabetic chars
and _ occupy positions from 65 to 122, every character will have its
bit position in 64-bit word, distinguishing lower and upper case.

Some games do things like "#define float4 float", this scheme would
not help here, it barely improves heavy users of preprocessor like
Talos Principle, but in general it works pretty well. In my shader-db
it eliminates 90%+ hash table queries during preprocessing stage.
One 64-bit Bloom table working on only 1st or only 2nd char will
eliminate ~70-80% queries.

For simplicity use the same scheme for both 32-bit and 64-bit archs.
32-bit archs still have pretty efficients ways to do 64-bit shifts.
---
 src/compiler/glsl/glcpp/glcpp-parse.y | 24 ++++++++++++++++++------
 src/compiler/glsl/glcpp/glcpp.h       | 10 ++++++++++
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y
index 4780c9f..47bf1e1 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -1345,6 +1345,11 @@ glcpp_parser_create(glcpp_extension_iterator extensions, void *state, gl_api api
    glcpp_lex_init_extra (parser, &parser->scanner);
    parser->defines = _mesa_hash_table_create(NULL, _mesa_key_hash_string,
                                              _mesa_key_string_equal);
+   parser->defines_bloom0 = 0;
+   parser->defines_bloom1 = 0;
+   GLCPP_DEFINES_BLOOM_PUT(parser, "__LINE__");
+   GLCPP_DEFINES_BLOOM_PUT(parser, "__FILE__");
+
    parser->linalloc = linear_alloc_parent(parser, 0);
    parser->active = NULL;
    parser->lexing_directive = 0;
@@ -1832,6 +1837,9 @@ _glcpp_parser_expand_node(glcpp_parser_t *parser, token_node_t *node,
    *last = node;
    identifier = token->value.str;
 
+   if (!GLCPP_DEFINES_BLOOM_GET(parser, identifier))
+      return NULL;
+
    /* Special handling for __LINE__ and __FILE__, (not through
     * the hash table). */
    if (*identifier == '_') {
@@ -2118,6 +2126,7 @@ _define_object_macro(glcpp_parser_t *parser, YYLTYPE *loc,
       glcpp_error (loc, parser, "Redefinition of macro %s\n",  identifier);
    }
 
+   GLCPP_DEFINES_BLOOM_PUT(parser, identifier);
    _mesa_hash_table_insert (parser->defines, identifier, macro);
 }
 
@@ -2153,6 +2162,7 @@ _define_function_macro(glcpp_parser_t *parser, YYLTYPE *loc,
       glcpp_error (loc, parser, "Redefinition of macro %s\n", identifier);
    }
 
+   GLCPP_DEFINES_BLOOM_PUT(parser, identifier);
    _mesa_hash_table_insert(parser->defines, identifier, macro);
 }
 
@@ -2199,12 +2209,14 @@ glcpp_parser_lex(YYSTYPE *yylval, YYLTYPE *yylloc, glcpp_parser_t *parser)
                ret == ENDIF || ret == HASH_TOKEN) {
          parser->in_control_line = 1;
       } else if (ret == IDENTIFIER) {
-         struct hash_entry *entry = _mesa_hash_table_search(parser->defines,
-                                                            yylval->str);
-         macro_t *macro = entry ? entry->data : NULL;
-         if (macro && macro->is_function) {
-            parser->newline_as_space = 1;
-            parser->paren_count = 0;
+         if (GLCPP_DEFINES_BLOOM_GET(parser, yylval->str)) {
+            struct hash_entry *entry = _mesa_hash_table_search(parser->defines,
+                                                               yylval->str);
+            macro_t *macro = entry ? entry->data : NULL;
+            if (macro && macro->is_function) {
+               parser->newline_as_space = 1;
+               parser->paren_count = 0;
+            }
          }
       }
 
diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h
index 068d83b..a9c08e6 100644
--- a/src/compiler/glsl/glcpp/glcpp.h
+++ b/src/compiler/glsl/glcpp/glcpp.h
@@ -35,6 +35,14 @@
 
 #define yyscan_t void*
 
+#define GLCPP_DEFINES_BLOOM_GET(parser, identifier) \
+   (((((parser)->defines_bloom0) >> ((identifier)[0] & 63)) & 1) & \
+    ((((parser)->defines_bloom1) >> ((identifier)[1] & 63)) & 1))
+
+#define GLCPP_DEFINES_BLOOM_PUT(parser, identifier) \
+   (parser)->defines_bloom0 |= (uint64_t)1 << ((identifier)[0] & 63); \
+   (parser)->defines_bloom1 |= (uint64_t)1 << ((identifier)[1] & 63);
+
 /* Some data types used for parser values. */
 
 typedef struct expression_value {
@@ -191,6 +199,8 @@ struct glcpp_parser {
 	void *linalloc;
 	yyscan_t scanner;
 	struct hash_table *defines;
+	uint64_t defines_bloom0;
+	uint64_t defines_bloom1;
 	active_list_t *active;
 	int lexing_directive;
 	int lexing_version_directive;
-- 
2.7.4



More information about the mesa-dev mailing list