[Mesa-dev] [PATCH 1/3] i965: rename brw_state_cache.c -> brw_program_cache.c

Fri Nov 11 04:12:57 UTC 2016

---
 src/mesa/drivers/dri/i965/Makefile.sources    |   2 +-
 src/mesa/drivers/dri/i965/brw_context.h       |   2 +-
 src/mesa/drivers/dri/i965/brw_program_cache.c | 446 ++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_state.h         |   2 +-
 src/mesa/drivers/dri/i965/brw_state_cache.c   | 446 --------------------------
 5 files changed, 449 insertions(+), 449 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_program_cache.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_state_cache.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 30648e3..1c33ea5 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -139,6 +139,7 @@ i965_FILES = \
 	brw_pipe_control.c \
 	brw_program.c \
 	brw_program.h \
+	brw_program_cache.c \
 	brw_primitive_restart.c \
 	brw_queryobj.c \
 	brw_reset.c \
@@ -148,7 +149,6 @@ i965_FILES = \
 	brw_sf.h \
 	brw_sf_state.c \
 	brw_state_batch.c \
-	brw_state_cache.c \
 	brw_state_dump.c \
 	brw_state.h \
 	brw_state_upload.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 310372a..3a88e56 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -178,7 +178,7 @@ enum brw_cache_id {
 };
 
 enum brw_state_id {
-   /* brw_cache_ids must come first - see brw_state_cache.c */
+   /* brw_cache_ids must come first - see brw_program_cache.c */
    BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
    BRW_STATE_FRAGMENT_PROGRAM,
    BRW_STATE_GEOMETRY_PROGRAM,
diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c
new file mode 100644
index 0000000..4a67b96
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -0,0 +1,446 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keithw at vmware.com>
+  */
+
+/** @file brw_program_cache.c
+ *
+ * This file implements a simple program cache for 965.  The consumers can
+ *  query the hash table of programs using a cache_id and program key, and
+ * receive the corresponding program buffer object (plus associated auxiliary
+ *  data) in return.  Objects in the cache may not have relocations
+ * (pointers to other BOs) in them.
+ *
+ * The inner workings are a simple hash table based on a CRC of the
+ * key data.
+ *
+ * Replacement is not implemented.  Instead, when the cache gets too
+ * big we throw out all of the cache data and let it get regenerated.
+ */
+
+#include "main/imports.h"
+#include "intel_batchbuffer.h"
+#include "brw_state.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+#include "brw_gs.h"
+#include "brw_cs.h"
+#include "brw_program.h"
+
+#define FILE_DEBUG_FLAG DEBUG_STATE
+
+static GLuint
+hash_key(struct brw_cache_item *item)
+{
+   GLuint *ikey = (GLuint *)item->key;
+   GLuint hash = item->cache_id, i;
+
+   assert(item->key_size % 4 == 0);
+
+   /* I'm sure this can be improved on:
+    */
+   for (i = 0; i < item->key_size/4; i++) {
+      hash ^= ikey[i];
+      hash = (hash << 5) | (hash >> 27);
+   }
+
+   return hash;
+}
+
+static int
+brw_cache_item_equals(const struct brw_cache_item *a,
+		      const struct brw_cache_item *b)
+{
+   return a->cache_id == b->cache_id &&
+      a->hash == b->hash &&
+      a->key_size == b->key_size &&
+      (memcmp(a->key, b->key, a->key_size) == 0);
+}
+
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, GLuint hash,
+	     struct brw_cache_item *lookup)
+{
+   struct brw_cache_item *c;
+
+#if 0
+   int bucketcount = 0;
+
+   for (c = cache->items[hash % cache->size]; c; c = c->next)
+      bucketcount++;
+
+   fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
+	   cache->size, bucketcount, cache->n_items);
+#endif
+
+   for (c = cache->items[hash % cache->size]; c; c = c->next) {
+      if (brw_cache_item_equals(lookup, c))
+	 return c;
+   }
+
+   return NULL;
+}
+
+
+static void
+rehash(struct brw_cache *cache)
+{
+   struct brw_cache_item **items;
+   struct brw_cache_item *c, *next;
+   GLuint size, i;
+
+   size = cache->size * 3;
+   items = calloc(size, sizeof(*items));
+
+   for (i = 0; i < cache->size; i++)
+      for (c = cache->items[i]; c; c = next) {
+	 next = c->next;
+	 c->next = items[c->hash % size];
+	 items[c->hash % size] = c;
+      }
+
+   free(cache->items);
+   cache->items = items;
+   cache->size = size;
+}
+
+
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+bool
+brw_search_cache(struct brw_cache *cache,
+                 enum brw_cache_id cache_id,
+                 const void *key, GLuint key_size,
+                 uint32_t *inout_offset, void *inout_aux)
+{
+   struct brw_context *brw = cache->brw;
+   struct brw_cache_item *item;
+   struct brw_cache_item lookup;
+   GLuint hash;
+
+   lookup.cache_id = cache_id;
+   lookup.key = key;
+   lookup.key_size = key_size;
+   hash = hash_key(&lookup);
+   lookup.hash = hash;
+
+   item = search_cache(cache, hash, &lookup);
+
+   if (item == NULL)
+      return false;
+
+   void *aux = ((char *) item->key) + item->key_size;
+
+   if (item->offset != *inout_offset || aux != *((void **) inout_aux)) {
+      brw->ctx.NewDriverState |= (1 << cache_id);
+      *inout_offset = item->offset;
+      *((void **) inout_aux) = aux;
+   }
+
+   return true;
+}
+
+static void
+brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
+{
+   struct brw_context *brw = cache->brw;
+   drm_intel_bo *new_bo;
+
+   new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
+   if (brw->has_llc)
+      drm_intel_gem_bo_map_unsynchronized(new_bo);
+
+   /* Copy any existing data that needs to be saved. */
+   if (cache->next_offset != 0) {
+      if (brw->has_llc) {
+         memcpy(new_bo->virtual, cache->bo->virtual, cache->next_offset);
+      } else {
+         drm_intel_bo_map(cache->bo, false);
+         drm_intel_bo_subdata(new_bo, 0, cache->next_offset,
+                              cache->bo->virtual);
+         drm_intel_bo_unmap(cache->bo);
+      }
+   }
+
+   if (brw->has_llc)
+      drm_intel_bo_unmap(cache->bo);
+   drm_intel_bo_unreference(cache->bo);
+   cache->bo = new_bo;
+   cache->bo_used_by_gpu = false;
+
+   /* Since we have a new BO in place, we need to signal the units
+    * that depend on it (state base address on gen5+, or unit state before).
+    */
+   brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
+   brw->batch.state_base_address_emitted = false;
+}
+
+/**
+ * Attempts to find an item in the cache with identical data.
+ */
+static const struct brw_cache_item *
+brw_lookup_prog(const struct brw_cache *cache,
+                enum brw_cache_id cache_id,
+                const void *data, unsigned data_size)
+{
+   const struct brw_context *brw = cache->brw;
+   unsigned i;
+   const struct brw_cache_item *item;
+
+   for (i = 0; i < cache->size; i++) {
+      for (item = cache->items[i]; item; item = item->next) {
+	 int ret;
+
+	 if (item->cache_id != cache_id || item->size != data_size)
+	    continue;
+
+         if (!brw->has_llc)
+            drm_intel_bo_map(cache->bo, false);
+	 ret = memcmp(cache->bo->virtual + item->offset, data, item->size);
+         if (!brw->has_llc)
+            drm_intel_bo_unmap(cache->bo);
+	 if (ret)
+	    continue;
+
+	 return item;
+      }
+   }
+
+   return NULL;
+}
+
+static uint32_t
+brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
+{
+   uint32_t offset;
+   struct brw_context *brw = cache->brw;
+
+   /* Allocate space in the cache BO for our new program. */
+   if (cache->next_offset + size > cache->bo->size) {
+      uint32_t new_size = cache->bo->size * 2;
+
+      while (cache->next_offset + size > new_size)
+	 new_size *= 2;
+
+      brw_cache_new_bo(cache, new_size);
+   }
+
+   /* If we would block on writing to an in-use program BO, just
+    * recreate it.
+    */
+   if (!brw->has_llc && cache->bo_used_by_gpu) {
+      perf_debug("Copying busy program cache buffer.\n");
+      brw_cache_new_bo(cache, cache->bo->size);
+   }
+
+   offset = cache->next_offset;
+
+   /* Programs are always 64-byte aligned, so set up the next one now */
+   cache->next_offset = ALIGN(offset + size, 64);
+
+   return offset;
+}
+
+void
+brw_upload_cache(struct brw_cache *cache,
+		 enum brw_cache_id cache_id,
+		 const void *key,
+		 GLuint key_size,
+		 const void *data,
+		 GLuint data_size,
+		 const void *aux,
+		 GLuint aux_size,
+		 uint32_t *out_offset,
+		 void *out_aux)
+{
+   struct brw_context *brw = cache->brw;
+   struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+   const struct brw_cache_item *matching_data =
+      brw_lookup_prog(cache, cache_id, data, data_size);
+   GLuint hash;
+   void *tmp;
+
+   item->cache_id = cache_id;
+   item->size = data_size;
+   item->key = key;
+   item->key_size = key_size;
+   item->aux_size = aux_size;
+   hash = hash_key(item);
+   item->hash = hash;
+
+   /* If we can find a matching prog in the cache already, then reuse the
+    * existing stuff without creating new copy into the underlying buffer
+    * object. This is notably useful for programs generating shaders at
+    * runtime, where multiple shaders may compile to the same thing in our
+    * backend.
+    */
+   if (matching_data) {
+      item->offset = matching_data->offset;
+   } else {
+      item->offset = brw_alloc_item_data(cache, data_size);
+
+      /* Copy data to the buffer */
+      if (brw->has_llc) {
+         memcpy((char *)cache->bo->virtual + item->offset, data, data_size);
+      } else {
+         drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
+      }
+   }
+
+   /* Set up the memory containing the key and aux_data */
+   tmp = malloc(key_size + aux_size);
+
+   memcpy(tmp, key, key_size);
+   memcpy(tmp + key_size, aux, aux_size);
+
+   item->key = tmp;
+
+   if (cache->n_items > cache->size * 1.5f)
+      rehash(cache);
+
+   hash %= cache->size;
+   item->next = cache->items[hash];
+   cache->items[hash] = item;
+   cache->n_items++;
+
+   *out_offset = item->offset;
+   *(void **)out_aux = (void *)((char *)item->key + item->key_size);
+   cache->brw->ctx.NewDriverState |= 1 << cache_id;
+}
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+   struct brw_cache *cache = &brw->cache;
+
+   cache->brw = brw;
+
+   cache->size = 7;
+   cache->n_items = 0;
+   cache->items =
+      calloc(cache->size, sizeof(struct brw_cache_item *));
+
+   cache->bo = drm_intel_bo_alloc(brw->bufmgr,
+				  "program cache",
+				  4096, 64);
+   if (brw->has_llc)
+      drm_intel_gem_bo_map_unsynchronized(cache->bo);
+}
+
+static void
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+   struct brw_cache_item *c, *next;
+   GLuint i;
+
+   DBG("%s\n", __func__);
+
+   for (i = 0; i < cache->size; i++) {
+      for (c = cache->items[i]; c; c = next) {
+	 next = c->next;
+         if (c->cache_id == BRW_CACHE_VS_PROG ||
+             c->cache_id == BRW_CACHE_TCS_PROG ||
+             c->cache_id == BRW_CACHE_TES_PROG ||
+             c->cache_id == BRW_CACHE_GS_PROG ||
+             c->cache_id == BRW_CACHE_FS_PROG ||
+             c->cache_id == BRW_CACHE_CS_PROG) {
+            const void *item_aux = c->key + c->key_size;
+            brw_stage_prog_data_free(item_aux);
+         }
+	 free((void *)c->key);
+	 free(c);
+      }
+      cache->items[i] = NULL;
+   }
+
+   cache->n_items = 0;
+
+   /* Start putting programs into the start of the BO again, since
+    * we'll never find the old results.
+    */
+   cache->next_offset = 0;
+
+   /* We need to make sure that the programs get regenerated, since
+    * any offsets leftover in brw_context will no longer be valid.
+    */
+   brw->NewGLState = ~0;
+   brw->ctx.NewDriverState = ~0ull;
+   brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
+   brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
+   brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
+   brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
+
+   /* Also, NULL out any stale program pointers. */
+   brw->vs.base.prog_data = NULL;
+   brw->tcs.base.prog_data = NULL;
+   brw->tes.base.prog_data = NULL;
+   brw->gs.base.prog_data = NULL;
+   brw->wm.base.prog_data = NULL;
+   brw->cs.base.prog_data = NULL;
+
+   intel_batchbuffer_flush(brw);
+}
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
+{
+   /* un-tuned guess.  Each object is generally a page, so 2000 of them is 8 MB of
+    * state cache.
+    */
+   if (brw->cache.n_items > 2000) {
+      perf_debug("Exceeded state cache size limit.  Clearing the set "
+                 "of compiled programs, which will trigger recompiles\n");
+      brw_clear_cache(brw, &brw->cache);
+   }
+}
+
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+
+   DBG("%s\n", __func__);
+
+   if (brw->has_llc)
+      drm_intel_bo_unmap(cache->bo);
+   drm_intel_bo_unreference(cache->bo);
+   cache->bo = NULL;
+   brw_clear_cache(brw, cache);
+   free(cache->items);
+   cache->items = NULL;
+   cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+   brw_destroy_cache(brw, &brw->cache);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 841c3de..38c2cbc 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -217,7 +217,7 @@ brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
 }
 
 /***********************************************************************
- * brw_state_cache.c
+ * brw_program_cache.c
  */
 
 void brw_upload_cache(struct brw_cache *cache,
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
deleted file mode 100644
index e8e71ab..0000000
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw at vmware.com>
-  */
-
-/** @file brw_state_cache.c
- *
- * This file implements a simple static state cache for 965.  The
- * consumers can query the hash table of state using a cache_id,
- * opaque key data, and receive the corresponding state buffer object
- * of state (plus associated auxiliary data) in return.  Objects in
- * the cache may not have relocations (pointers to other BOs) in them.
- *
- * The inner workings are a simple hash table based on a CRC of the
- * key data.
- *
- * Replacement is not implemented.  Instead, when the cache gets too
- * big we throw out all of the cache data and let it get regenerated.
- */
-
-#include "main/imports.h"
-#include "intel_batchbuffer.h"
-#include "brw_state.h"
-#include "brw_vs.h"
-#include "brw_wm.h"
-#include "brw_gs.h"
-#include "brw_cs.h"
-#include "brw_program.h"
-
-#define FILE_DEBUG_FLAG DEBUG_STATE
-
-static GLuint
-hash_key(struct brw_cache_item *item)
-{
-   GLuint *ikey = (GLuint *)item->key;
-   GLuint hash = item->cache_id, i;
-
-   assert(item->key_size % 4 == 0);
-
-   /* I'm sure this can be improved on:
-    */
-   for (i = 0; i < item->key_size/4; i++) {
-      hash ^= ikey[i];
-      hash = (hash << 5) | (hash >> 27);
-   }
-
-   return hash;
-}
-
-static int
-brw_cache_item_equals(const struct brw_cache_item *a,
-		      const struct brw_cache_item *b)
-{
-   return a->cache_id == b->cache_id &&
-      a->hash == b->hash &&
-      a->key_size == b->key_size &&
-      (memcmp(a->key, b->key, a->key_size) == 0);
-}
-
-static struct brw_cache_item *
-search_cache(struct brw_cache *cache, GLuint hash,
-	     struct brw_cache_item *lookup)
-{
-   struct brw_cache_item *c;
-
-#if 0
-   int bucketcount = 0;
-
-   for (c = cache->items[hash % cache->size]; c; c = c->next)
-      bucketcount++;
-
-   fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
-	   cache->size, bucketcount, cache->n_items);
-#endif
-
-   for (c = cache->items[hash % cache->size]; c; c = c->next) {
-      if (brw_cache_item_equals(lookup, c))
-	 return c;
-   }
-
-   return NULL;
-}
-
-
-static void
-rehash(struct brw_cache *cache)
-{
-   struct brw_cache_item **items;
-   struct brw_cache_item *c, *next;
-   GLuint size, i;
-
-   size = cache->size * 3;
-   items = calloc(size, sizeof(*items));
-
-   for (i = 0; i < cache->size; i++)
-      for (c = cache->items[i]; c; c = next) {
-	 next = c->next;
-	 c->next = items[c->hash % size];
-	 items[c->hash % size] = c;
-      }
-
-   free(cache->items);
-   cache->items = items;
-   cache->size = size;
-}
-
-
-/**
- * Returns the buffer object matching cache_id and key, or NULL.
- */
-bool
-brw_search_cache(struct brw_cache *cache,
-                 enum brw_cache_id cache_id,
-                 const void *key, GLuint key_size,
-                 uint32_t *inout_offset, void *inout_aux)
-{
-   struct brw_context *brw = cache->brw;
-   struct brw_cache_item *item;
-   struct brw_cache_item lookup;
-   GLuint hash;
-
-   lookup.cache_id = cache_id;
-   lookup.key = key;
-   lookup.key_size = key_size;
-   hash = hash_key(&lookup);
-   lookup.hash = hash;
-
-   item = search_cache(cache, hash, &lookup);
-
-   if (item == NULL)
-      return false;
-
-   void *aux = ((char *) item->key) + item->key_size;
-
-   if (item->offset != *inout_offset || aux != *((void **) inout_aux)) {
-      brw->ctx.NewDriverState |= (1 << cache_id);
-      *inout_offset = item->offset;
-      *((void **) inout_aux) = aux;
-   }
-
-   return true;
-}
-
-static void
-brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
-{
-   struct brw_context *brw = cache->brw;
-   drm_intel_bo *new_bo;
-
-   new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
-   if (brw->has_llc)
-      drm_intel_gem_bo_map_unsynchronized(new_bo);
-
-   /* Copy any existing data that needs to be saved. */
-   if (cache->next_offset != 0) {
-      if (brw->has_llc) {
-         memcpy(new_bo->virtual, cache->bo->virtual, cache->next_offset);
-      } else {
-         drm_intel_bo_map(cache->bo, false);
-         drm_intel_bo_subdata(new_bo, 0, cache->next_offset,
-                              cache->bo->virtual);
-         drm_intel_bo_unmap(cache->bo);
-      }
-   }
-
-   if (brw->has_llc)
-      drm_intel_bo_unmap(cache->bo);
-   drm_intel_bo_unreference(cache->bo);
-   cache->bo = new_bo;
-   cache->bo_used_by_gpu = false;
-
-   /* Since we have a new BO in place, we need to signal the units
-    * that depend on it (state base address on gen5+, or unit state before).
-    */
-   brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
-   brw->batch.state_base_address_emitted = false;
-}
-
-/**
- * Attempts to find an item in the cache with identical data.
- */
-static const struct brw_cache_item *
-brw_lookup_prog(const struct brw_cache *cache,
-                enum brw_cache_id cache_id,
-                const void *data, unsigned data_size)
-{
-   const struct brw_context *brw = cache->brw;
-   unsigned i;
-   const struct brw_cache_item *item;
-
-   for (i = 0; i < cache->size; i++) {
-      for (item = cache->items[i]; item; item = item->next) {
-	 int ret;
-
-	 if (item->cache_id != cache_id || item->size != data_size)
-	    continue;
-
-         if (!brw->has_llc)
-            drm_intel_bo_map(cache->bo, false);
-	 ret = memcmp(cache->bo->virtual + item->offset, data, item->size);
-         if (!brw->has_llc)
-            drm_intel_bo_unmap(cache->bo);
-	 if (ret)
-	    continue;
-
-	 return item;
-      }
-   }
-
-   return NULL;
-}
-
-static uint32_t
-brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
-{
-   uint32_t offset;
-   struct brw_context *brw = cache->brw;
-
-   /* Allocate space in the cache BO for our new program. */
-   if (cache->next_offset + size > cache->bo->size) {
-      uint32_t new_size = cache->bo->size * 2;
-
-      while (cache->next_offset + size > new_size)
-	 new_size *= 2;
-
-      brw_cache_new_bo(cache, new_size);
-   }
-
-   /* If we would block on writing to an in-use program BO, just
-    * recreate it.
-    */
-   if (!brw->has_llc && cache->bo_used_by_gpu) {
-      perf_debug("Copying busy program cache buffer.\n");
-      brw_cache_new_bo(cache, cache->bo->size);
-   }
-
-   offset = cache->next_offset;
-
-   /* Programs are always 64-byte aligned, so set up the next one now */
-   cache->next_offset = ALIGN(offset + size, 64);
-
-   return offset;
-}
-
-void
-brw_upload_cache(struct brw_cache *cache,
-		 enum brw_cache_id cache_id,
-		 const void *key,
-		 GLuint key_size,
-		 const void *data,
-		 GLuint data_size,
-		 const void *aux,
-		 GLuint aux_size,
-		 uint32_t *out_offset,
-		 void *out_aux)
-{
-   struct brw_context *brw = cache->brw;
-   struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
-   const struct brw_cache_item *matching_data =
-      brw_lookup_prog(cache, cache_id, data, data_size);
-   GLuint hash;
-   void *tmp;
-
-   item->cache_id = cache_id;
-   item->size = data_size;
-   item->key = key;
-   item->key_size = key_size;
-   item->aux_size = aux_size;
-   hash = hash_key(item);
-   item->hash = hash;
-
-   /* If we can find a matching prog in the cache already, then reuse the
-    * existing stuff without creating new copy into the underlying buffer
-    * object. This is notably useful for programs generating shaders at
-    * runtime, where multiple shaders may compile to the same thing in our
-    * backend.
-    */
-   if (matching_data) {
-      item->offset = matching_data->offset;
-   } else {
-      item->offset = brw_alloc_item_data(cache, data_size);
-
-      /* Copy data to the buffer */
-      if (brw->has_llc) {
-         memcpy((char *)cache->bo->virtual + item->offset, data, data_size);
-      } else {
-         drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
-      }
-   }
-
-   /* Set up the memory containing the key and aux_data */
-   tmp = malloc(key_size + aux_size);
-
-   memcpy(tmp, key, key_size);
-   memcpy(tmp + key_size, aux, aux_size);
-
-   item->key = tmp;
-
-   if (cache->n_items > cache->size * 1.5f)
-      rehash(cache);
-
-   hash %= cache->size;
-   item->next = cache->items[hash];
-   cache->items[hash] = item;
-   cache->n_items++;
-
-   *out_offset = item->offset;
-   *(void **)out_aux = (void *)((char *)item->key + item->key_size);
-   cache->brw->ctx.NewDriverState |= 1 << cache_id;
-}
-
-void
-brw_init_caches(struct brw_context *brw)
-{
-   struct brw_cache *cache = &brw->cache;
-
-   cache->brw = brw;
-
-   cache->size = 7;
-   cache->n_items = 0;
-   cache->items =
-      calloc(cache->size, sizeof(struct brw_cache_item *));
-
-   cache->bo = drm_intel_bo_alloc(brw->bufmgr,
-				  "program cache",
-				  4096, 64);
-   if (brw->has_llc)
-      drm_intel_gem_bo_map_unsynchronized(cache->bo);
-}
-
-static void
-brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
-{
-   struct brw_cache_item *c, *next;
-   GLuint i;
-
-   DBG("%s\n", __func__);
-
-   for (i = 0; i < cache->size; i++) {
-      for (c = cache->items[i]; c; c = next) {
-	 next = c->next;
-         if (c->cache_id == BRW_CACHE_VS_PROG ||
-             c->cache_id == BRW_CACHE_TCS_PROG ||
-             c->cache_id == BRW_CACHE_TES_PROG ||
-             c->cache_id == BRW_CACHE_GS_PROG ||
-             c->cache_id == BRW_CACHE_FS_PROG ||
-             c->cache_id == BRW_CACHE_CS_PROG) {
-            const void *item_aux = c->key + c->key_size;
-            brw_stage_prog_data_free(item_aux);
-         }
-	 free((void *)c->key);
-	 free(c);
-      }
-      cache->items[i] = NULL;
-   }
-
-   cache->n_items = 0;
-
-   /* Start putting programs into the start of the BO again, since
-    * we'll never find the old results.
-    */
-   cache->next_offset = 0;
-
-   /* We need to make sure that the programs get regenerated, since
-    * any offsets leftover in brw_context will no longer be valid.
-    */
-   brw->NewGLState = ~0;
-   brw->ctx.NewDriverState = ~0ull;
-   brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
-   brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
-   brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
-   brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
-
-   /* Also, NULL out any stale program pointers. */
-   brw->vs.base.prog_data = NULL;
-   brw->tcs.base.prog_data = NULL;
-   brw->tes.base.prog_data = NULL;
-   brw->gs.base.prog_data = NULL;
-   brw->wm.base.prog_data = NULL;
-   brw->cs.base.prog_data = NULL;
-
-   intel_batchbuffer_flush(brw);
-}
-
-void
-brw_state_cache_check_size(struct brw_context *brw)
-{
-   /* un-tuned guess.  Each object is generally a page, so 2000 of them is 8 MB of
-    * state cache.
-    */
-   if (brw->cache.n_items > 2000) {
-      perf_debug("Exceeded state cache size limit.  Clearing the set "
-                 "of compiled programs, which will trigger recompiles\n");
-      brw_clear_cache(brw, &brw->cache);
-   }
-}
-
-
-static void
-brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
-{
-
-   DBG("%s\n", __func__);
-
-   if (brw->has_llc)
-      drm_intel_bo_unmap(cache->bo);
-   drm_intel_bo_unreference(cache->bo);
-   cache->bo = NULL;
-   brw_clear_cache(brw, cache);
-   free(cache->items);
-   cache->items = NULL;
-   cache->size = 0;
-}
-
-
-void
-brw_destroy_caches(struct brw_context *brw)
-{
-   brw_destroy_cache(brw, &brw->cache);
-}
-- 
2.7.4