[Mesa-dev] [PATCH v2 6/6] anv: Rework pipeline caching

Sat Aug 27 01:21:04 UTC 2016

The original pipeline cache the Kristian wrote was based on a now-false
premise that the shaders can be stored in the pipeline cache.  The Vulkan
1.0 spec explicitly states that the pipeline cache object is transiant and
you are allowed to delete it after using it to create a pipeline with no
ill effects.  As nice as Kristian's design was, it doesn't jive with the
expectation provided by the Vulkan spec.

The new pipeline cache uses reference-counted anv_shader_bin objects that
are backed by a large state pool.  The cache itself is just a hash table
mapping keys hashes to anv_shader_bin objects.  This has the added
advantage of removing one more hand-rolled hash table from mesa.

v2: Don't bake key or prog_data sizes in.  Instead, let the caller specify
    them.  We need this for blorp which uses a different set of keys and
    prog_data objects from the rest of Vulkan.

Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97476
---
 src/intel/vulkan/anv_cmd_buffer.c     |   6 +-
 src/intel/vulkan/anv_device.c         |   1 -
 src/intel/vulkan/anv_pipeline.c       | 140 ++++-----
 src/intel/vulkan/anv_pipeline_cache.c | 515 ++++++++++++----------------------
 src/intel/vulkan/anv_private.h        |  69 ++---
 src/intel/vulkan/genX_l3.c            |   3 +-
 src/intel/vulkan/genX_pipeline.c      |   3 +-
 src/intel/vulkan/genX_pipeline_util.h |   2 +-
 8 files changed, 289 insertions(+), 450 deletions(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 6c082aa..9dedde8 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -757,7 +757,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
       return VK_SUCCESS;
    }
 
-   struct anv_pipeline_bind_map *map = &pipeline->bindings[stage];
+   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
    if (bias + map->surface_count == 0) {
       *bt_state = (struct anv_state) { 0, };
       return VK_SUCCESS;
@@ -922,7 +922,7 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
       return VK_SUCCESS;
    }
 
-   struct anv_pipeline_bind_map *map = &pipeline->bindings[stage];
+   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
    if (map->sampler_count == 0) {
       *state = (struct anv_state) { 0, };
       return VK_SUCCESS;
@@ -1096,7 +1096,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
    struct anv_push_constants *data =
       cmd_buffer->state.push_constants[stage];
    const struct brw_stage_prog_data *prog_data =
-      cmd_buffer->state.pipeline->prog_data[stage];
+      anv_shader_bin_get_prog_data(cmd_buffer->state.pipeline->shaders[stage]);
 
    /* If we don't actually have any push constants, bail. */
    if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 765dc6e..cf63993 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -881,7 +881,6 @@ VkResult anv_CreateDevice(
    anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024);
    anv_state_pool_init(&device->instruction_state_pool,
                        &device->instruction_block_pool);
-   anv_pipeline_cache_init(&device->default_pipeline_cache, device);
 
    anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
 
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 933e45b..b336a58 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -204,6 +204,12 @@ void anv_DestroyPipeline(
                          pAllocator ? pAllocator : &device->alloc);
    if (pipeline->blend_state.map)
       anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
+
+   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+      if (pipeline->shaders[s])
+         anv_shader_bin_unref(device, pipeline->shaders[s]);
+   }
+
    anv_free2(&device->alloc, pAllocator, pipeline);
 }
 
@@ -393,15 +399,34 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
    prog_data->binding_table.image_start = bias;
 }
 
+static struct anv_shader_bin *
+anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
+                           struct anv_pipeline_cache *cache,
+                           const void *key_data, uint32_t key_size,
+                           const void *kernel_data, uint32_t kernel_size,
+                           const void *prog_data, uint32_t prog_data_size,
+                           const struct anv_pipeline_bind_map *bind_map)
+{
+   if (cache) {
+      return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
+                                              kernel_data, kernel_size,
+                                              prog_data, prog_data_size,
+                                              bind_map);
+   } else {
+      return anv_shader_bin_create(pipeline->device, key_data, key_size,
+                                   kernel_data, kernel_size,
+                                   prog_data, prog_data_size, bind_map);
+   }
+}
+
+
 static void
 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
                                 gl_shader_stage stage,
-                                const struct brw_stage_prog_data *prog_data,
-                                struct anv_pipeline_bind_map *map)
+                                struct anv_shader_bin *shader)
 {
-   pipeline->prog_data[stage] = prog_data;
+   pipeline->shaders[stage] = shader;
    pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
-   pipeline->bindings[stage] = *map;
 }
 
 static VkResult
@@ -414,21 +439,20 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_vs_prog_key key;
-   uint32_t kernel = NO_KERNEL;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_vs_prog_key(&pipeline->device->info, &key);
 
-   if (module->size > 0) {
+   if (cache) {
       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
                       pipeline->layout, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+      bin = anv_pipeline_cache_search(cache, sha1, 20);
    }
 
-   if (kernel == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_vs_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -467,28 +491,25 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      stage_prog_data = &prog_data.base.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+                                       shader_code, code_size,
+                                       &prog_data, sizeof(prog_data), &map);
+
       ralloc_free(mem_ctx);
    }
 
    const struct brw_vs_prog_data *vs_prog_data =
-      (const struct brw_vs_prog_data *) stage_prog_data;
+      (const struct brw_vs_prog_data *)anv_shader_bin_get_prog_data(bin);
 
    if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
-      pipeline->vs_simd8 = kernel;
+      pipeline->vs_simd8 = bin->kernel.offset;
       pipeline->vs_vec4 = NO_KERNEL;
    } else {
       pipeline->vs_simd8 = NO_KERNEL;
-      pipeline->vs_vec4 = kernel;
+      pipeline->vs_vec4 = bin->kernel.offset;
    }
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
-                                   stage_prog_data, &map);
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin);
 
    return VK_SUCCESS;
 }
@@ -503,21 +524,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_gs_prog_key key;
-   uint32_t kernel = NO_KERNEL;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_gs_prog_key(&pipeline->device->info, &key);
 
-   if (module->size > 0) {
+   if (cache) {
       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
                       pipeline->layout, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+      bin = anv_pipeline_cache_search(cache, sha1, 20);
    }
 
-   if (kernel == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_gs_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -555,20 +575,16 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
       }
 
       /* TODO: SIMD8 GS */
-      stage_prog_data = &prog_data.base.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+                                       shader_code, code_size,
+                                       &prog_data, sizeof(prog_data), &map);
 
       ralloc_free(mem_ctx);
    }
 
-   pipeline->gs_kernel = kernel;
+   pipeline->gs_kernel = bin->kernel.offset;
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
-                                   stage_prog_data, &map);
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin);
 
    return VK_SUCCESS;
 }
@@ -584,21 +600,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_wm_prog_key key;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_wm_prog_key(&pipeline->device->info, info, extra, &key);
 
-   if (module->size > 0) {
+   if (cache) {
       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
                       pipeline->layout, spec_info);
-      pipeline->ps_ksp0 =
-         anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+      bin = anv_pipeline_cache_search(cache, sha1, 20);
    }
 
-   if (pipeline->ps_ksp0 == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_wm_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -687,19 +702,16 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      stage_prog_data = &prog_data.base;
-      pipeline->ps_ksp0 =
-         anv_pipeline_cache_upload_kernel(cache,
-                                          module->size > 0 ? sha1 : NULL,
-                                          shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+                                       shader_code, code_size,
+                                       &prog_data, sizeof(prog_data), &map);
 
       ralloc_free(mem_ctx);
    }
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
-                                   stage_prog_data, &map);
+   pipeline->ps_ksp0 = bin->kernel.offset;
+
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin);
 
    return VK_SUCCESS;
 }
@@ -714,21 +726,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_cs_prog_key key;
-   uint32_t kernel = NO_KERNEL;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_cs_prog_key(&pipeline->device->info, &key);
 
-   if (module->size > 0) {
+   if (cache) {
       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
                       pipeline->layout, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+      bin = anv_pipeline_cache_search(cache, sha1, 20);
    }
 
-   if (module->size == 0 || kernel == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_cs_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -760,20 +771,16 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      stage_prog_data = &prog_data.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+                                       shader_code, code_size,
+                                       &prog_data, sizeof(prog_data), &map);
 
       ralloc_free(mem_ctx);
    }
 
-   pipeline->cs_simd = kernel;
+   pipeline->cs_simd = bin->kernel.offset;
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
-                                   stage_prog_data, &map);
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin);
 
    return VK_SUCCESS;
 }
@@ -1167,8 +1174,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    /* When we free the pipeline, we detect stages based on the NULL status
     * of various prog_data pointers.  Make them NULL by default.
     */
-   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+   memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
 
    pipeline->vs_simd8 = NO_KERNEL;
    pipeline->vs_vec4 = NO_KERNEL;
@@ -1283,9 +1289,6 @@ anv_graphics_pipeline_create(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 
-   if (cache == NULL)
-      cache = &device->default_pipeline_cache;
-
    switch (device->info.gen) {
    case 7:
       if (device->info.is_haswell)
@@ -1339,9 +1342,6 @@ static VkResult anv_compute_pipeline_create(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 
-   if (cache == NULL)
-      cache = &device->default_pipeline_cache;
-
    switch (device->info.gen) {
    case 7:
       if (device->info.is_haswell)
diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c
index abca9fe..fd3f6ba 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -22,6 +22,7 @@
  */
 
 #include "util/mesa-sha1.h"
+#include "util/hash_table.h"
 #include "util/debug.h"
 #include "anv_private.h"
 
@@ -148,69 +149,6 @@ anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
  */
 
 void
-anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
-                        struct anv_device *device)
-{
-   cache->device = device;
-   anv_state_stream_init(&cache->program_stream,
-                         &device->instruction_block_pool);
-   pthread_mutex_init(&cache->mutex, NULL);
-
-   cache->kernel_count = 0;
-   cache->total_size = 0;
-   cache->table_size = 1024;
-   const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
-   cache->hash_table = malloc(byte_size);
-
-   /* We don't consider allocation failure fatal, we just start with a 0-sized
-    * cache. */
-   if (cache->hash_table == NULL ||
-       !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
-      cache->table_size = 0;
-   else
-      memset(cache->hash_table, 0xff, byte_size);
-}
-
-void
-anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
-{
-   anv_state_stream_finish(&cache->program_stream);
-   pthread_mutex_destroy(&cache->mutex);
-   free(cache->hash_table);
-}
-
-struct cache_entry {
-   unsigned char sha1[20];
-   uint32_t prog_data_size;
-   uint32_t kernel_size;
-   uint32_t surface_count;
-   uint32_t sampler_count;
-   uint32_t image_count;
-
-   char prog_data[0];
-
-   /* kernel follows prog_data at next 64 byte aligned address */
-};
-
-static uint32_t
-entry_size(struct cache_entry *entry)
-{
-   /* This returns the number of bytes needed to serialize an entry, which
-    * doesn't include the alignment padding bytes.
-    */
-
-   struct brw_stage_prog_data *prog_data = (void *)entry->prog_data;
-   const uint32_t param_size =
-      prog_data->nr_params * sizeof(*prog_data->param);
-
-   const uint32_t map_size =
-      entry->surface_count * sizeof(struct anv_pipeline_binding) +
-      entry->sampler_count * sizeof(struct anv_pipeline_binding);
-
-   return sizeof(*entry) + entry->prog_data_size + param_size + map_size;
-}
-
-void
 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
                 struct anv_shader_module *module,
                 const char *entrypoint,
@@ -236,221 +174,94 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
    _mesa_sha1_final(ctx, hash);
 }
 
-static uint32_t
-anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache,
-                                   const unsigned char *sha1,
-                                   const struct brw_stage_prog_data **prog_data,
-                                   struct anv_pipeline_bind_map *map)
+static struct anv_shader_bin *
+anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
+                                 const void *key_data, uint32_t key_size)
 {
-   const uint32_t mask = cache->table_size - 1;
-   const uint32_t start = (*(uint32_t *) sha1);
-
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      const uint32_t index = (start + i) & mask;
-      const uint32_t offset = cache->hash_table[index];
-
-      if (offset == ~0)
-         return NO_KERNEL;
-
-      struct cache_entry *entry =
-         cache->program_stream.block_pool->map + offset;
-      if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
-         if (prog_data) {
-            assert(map);
-            void *p = entry->prog_data;
-            *prog_data = p;
-            p += entry->prog_data_size;
-            p += (*prog_data)->nr_params * sizeof(*(*prog_data)->param);
-            map->surface_count = entry->surface_count;
-            map->sampler_count = entry->sampler_count;
-            map->image_count = entry->image_count;
-            map->surface_to_descriptor = p;
-            p += map->surface_count * sizeof(struct anv_pipeline_binding);
-            map->sampler_to_descriptor = p;
-         }
-
-         return offset + align_u32(entry_size(entry), 64);
-      }
-   }
+   uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
+   struct shader_bin_key *key = (void *)vla;
+   key->size = key_size;
+   memcpy(key->data, key_data, key_size);
 
-   /* This can happen if the pipeline cache is disabled via
-    * ANV_ENABLE_PIPELINE_CACHE=false
-    */
-   return NO_KERNEL;
+   struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
+   if (entry)
+      return entry->data;
+   else
+      return NULL;
 }
 
-uint32_t
+struct anv_shader_bin *
 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
-                          const unsigned char *sha1,
-                          const struct brw_stage_prog_data **prog_data,
-                          struct anv_pipeline_bind_map *map)
+                          const void *key_data, uint32_t key_size)
 {
-   uint32_t kernel;
+   if (!cache->cache)
+      return NULL;
 
    pthread_mutex_lock(&cache->mutex);
 
-   kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
+   struct anv_shader_bin *shader =
+      anv_pipeline_cache_search_locked(cache, key_data, key_size);
 
    pthread_mutex_unlock(&cache->mutex);
 
-   return kernel;
-}
-
-static void
-anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
-                             struct cache_entry *entry, uint32_t entry_offset)
-{
-   const uint32_t mask = cache->table_size - 1;
-   const uint32_t start = (*(uint32_t *) entry->sha1);
-
-   /* We'll always be able to insert when we get here. */
-   assert(cache->kernel_count < cache->table_size / 2);
-
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      const uint32_t index = (start + i) & mask;
-      if (cache->hash_table[index] == ~0) {
-         cache->hash_table[index] = entry_offset;
-         break;
-      }
-   }
+   /* We increment refcount before handing it to the caller */
+   if (shader)
+      anv_shader_bin_ref(shader);
 
-   cache->total_size += entry_size(entry) + entry->kernel_size;
-   cache->kernel_count++;
+   return shader;
 }
 
-static VkResult
-anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
+static struct anv_shader_bin *
+anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
+                              const void *key_data, uint32_t key_size,
+                              const void *kernel_data, uint32_t kernel_size,
+                              const void *prog_data, uint32_t prog_data_size,
+                              const struct anv_pipeline_bind_map *bind_map)
 {
-   const uint32_t table_size = cache->table_size * 2;
-   const uint32_t old_table_size = cache->table_size;
-   const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
-   uint32_t *table;
-   uint32_t *old_table = cache->hash_table;
-
-   table = malloc(byte_size);
-   if (table == NULL)
-      return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-   cache->hash_table = table;
-   cache->table_size = table_size;
-   cache->kernel_count = 0;
-   cache->total_size = 0;
-
-   memset(cache->hash_table, 0xff, byte_size);
-   for (uint32_t i = 0; i < old_table_size; i++) {
-      const uint32_t offset = old_table[i];
-      if (offset == ~0)
-         continue;
-
-      struct cache_entry *entry =
-         cache->program_stream.block_pool->map + offset;
-      anv_pipeline_cache_set_entry(cache, entry, offset);
-   }
-
-   free(old_table);
+   struct anv_shader_bin *shader =
+      anv_pipeline_cache_search_locked(cache, key_data, key_size);
+   if (shader)
+      return shader;
+
+   struct anv_shader_bin *bin =
+      anv_shader_bin_create(cache->device, key_data, key_size,
+                            kernel_data, kernel_size,
+                            prog_data, prog_data_size, bind_map);
+   if (!bin)
+      return NULL;
 
-   return VK_SUCCESS;
-}
+   _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin);
 
-static void
-anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
-                             struct cache_entry *entry, uint32_t entry_offset)
-{
-   if (cache->kernel_count == cache->table_size / 2)
-      anv_pipeline_cache_grow(cache);
-
-   /* Failing to grow that hash table isn't fatal, but may mean we don't
-    * have enough space to add this new kernel. Only add it if there's room.
-    */
-   if (cache->kernel_count < cache->table_size / 2)
-      anv_pipeline_cache_set_entry(cache, entry, entry_offset);
+   return bin;
 }
 
-uint32_t
+struct anv_shader_bin *
 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
-                                 const unsigned char *sha1,
-                                 const void *kernel, size_t kernel_size,
-                                 const struct brw_stage_prog_data **prog_data,
-                                 size_t prog_data_size,
-                                 struct anv_pipeline_bind_map *map)
+                                 const void *key_data, uint32_t key_size,
+                                 const void *kernel_data, uint32_t kernel_size,
+                                 const void *prog_data, uint32_t prog_data_size,
+                                 const struct anv_pipeline_bind_map *bind_map)
 {
-   pthread_mutex_lock(&cache->mutex);
-
-   /* Before uploading, check again that another thread didn't upload this
-    * shader while we were compiling it.
-    */
-   if (sha1) {
-      uint32_t cached_kernel =
-         anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
-      if (cached_kernel != NO_KERNEL) {
-         pthread_mutex_unlock(&cache->mutex);
-         return cached_kernel;
-      }
-   }
-
-   struct cache_entry *entry;
-
-   assert((*prog_data)->nr_pull_params == 0);
-   assert((*prog_data)->nr_image_params == 0);
-
-   const uint32_t param_size =
-      (*prog_data)->nr_params * sizeof(*(*prog_data)->param);
-
-   const uint32_t map_size =
-      map->surface_count * sizeof(struct anv_pipeline_binding) +
-      map->sampler_count * sizeof(struct anv_pipeline_binding);
-
-   const uint32_t preamble_size =
-      align_u32(sizeof(*entry) + prog_data_size + param_size + map_size, 64);
+   if (cache->cache) {
+      pthread_mutex_lock(&cache->mutex);
 
-   const uint32_t size = preamble_size + kernel_size;
+      struct anv_shader_bin *bin =
+         anv_pipeline_cache_add_shader(cache, key_data, key_size,
+                                       kernel_data, kernel_size,
+                                       prog_data, prog_data_size, bind_map);
 
-   assert(size < cache->program_stream.block_pool->block_size);
-   const struct anv_state state =
-      anv_state_stream_alloc(&cache->program_stream, size, 64);
+      pthread_mutex_unlock(&cache->mutex);
 
-   entry = state.map;
-   entry->prog_data_size = prog_data_size;
-   entry->surface_count = map->surface_count;
-   entry->sampler_count = map->sampler_count;
-   entry->image_count = map->image_count;
-   entry->kernel_size = kernel_size;
+      /* We increment refcount before handing it to the caller */
+      anv_shader_bin_ref(bin);
 
-   void *p = entry->prog_data;
-   memcpy(p, *prog_data, prog_data_size);
-   p += prog_data_size;
-
-   memcpy(p, (*prog_data)->param, param_size);
-   ((struct brw_stage_prog_data *)entry->prog_data)->param = p;
-   p += param_size;
-
-   memcpy(p, map->surface_to_descriptor,
-          map->surface_count * sizeof(struct anv_pipeline_binding));
-   map->surface_to_descriptor = p;
-   p += map->surface_count * sizeof(struct anv_pipeline_binding);
-
-   memcpy(p, map->sampler_to_descriptor,
-          map->sampler_count * sizeof(struct anv_pipeline_binding));
-   map->sampler_to_descriptor = p;
-
-   if (sha1) {
-      assert(anv_pipeline_cache_search_unlocked(cache, sha1,
-                                                NULL, NULL) == NO_KERNEL);
-
-      memcpy(entry->sha1, sha1, sizeof(entry->sha1));
-      anv_pipeline_cache_add_entry(cache, entry, state.offset);
+      return bin;
+   } else {
+      /* In this case, we're not caching it so the caller owns it entirely */
+      return anv_shader_bin_create(cache->device, key_data, key_size,
+                                   kernel_data, kernel_size,
+                                   prog_data, prog_data_size, bind_map);
    }
-
-   pthread_mutex_unlock(&cache->mutex);
-
-   memcpy(state.map + preamble_size, kernel, kernel_size);
-
-   if (!cache->device->info.has_llc)
-      anv_state_clflush(state);
-
-   *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
-
-   return state.offset + preamble_size;
 }
 
 struct cache_header {
@@ -469,6 +280,9 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
    struct cache_header header;
    uint8_t uuid[VK_UUID_SIZE];
 
+   if (cache->cache == NULL)
+      return;
+
    if (size < sizeof(header))
       return;
    memcpy(&header, data, sizeof(header));
@@ -484,48 +298,70 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
    if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
       return;
 
-   void *end = (void *) data + size;
-   void *p = (void *) data + header.header_size;
-
-   while (p < end) {
-      struct cache_entry *entry = p;
-
-      void *data = entry->prog_data;
-
-      /* Make a copy of prog_data so that it's mutable */
-      uint8_t prog_data_tmp[512];
-      assert(entry->prog_data_size <= sizeof(prog_data_tmp));
-      memcpy(prog_data_tmp, data, entry->prog_data_size);
-      struct brw_stage_prog_data *prog_data = (void *)prog_data_tmp;
-      data += entry->prog_data_size;
-
-      prog_data->param = data;
-      data += prog_data->nr_params * sizeof(*prog_data->param);
-
-      struct anv_pipeline_binding *surface_to_descriptor = data;
-      data += entry->surface_count * sizeof(struct anv_pipeline_binding);
-      struct anv_pipeline_binding *sampler_to_descriptor = data;
-      data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
-      void *kernel = data;
-
-      struct anv_pipeline_bind_map map = {
-         .surface_count = entry->surface_count,
-         .sampler_count = entry->sampler_count,
-         .image_count = entry->image_count,
-         .surface_to_descriptor = surface_to_descriptor,
-         .sampler_to_descriptor = sampler_to_descriptor
-      };
-
-      const struct brw_stage_prog_data *const_prog_data = prog_data;
-
-      anv_pipeline_cache_upload_kernel(cache, entry->sha1,
-                                       kernel, entry->kernel_size,
-                                       &const_prog_data,
-                                       entry->prog_data_size, &map);
-      p = kernel + entry->kernel_size;
+   const void *end = data + size;
+   const void *p = data + header.header_size;
+
+   /* Count is the total number of valid entries */
+   uint32_t count;
+   if (p + sizeof(count) >= end)
+      return;
+   memcpy(&count, p, sizeof(count));
+   p += align_u32(sizeof(count), 8);
+
+   for (uint32_t i = 0; i < count; i++) {
+      struct anv_shader_bin bin;
+      if (p + sizeof(bin) > end)
+         break;
+      memcpy(&bin, p, sizeof(bin));
+      p += align_u32(sizeof(struct anv_shader_bin), 8);
+
+      const void *prog_data = p;
+      p += align_u32(bin.prog_data_size, 8);
+
+      struct shader_bin_key key;
+      if (p + sizeof(key) > end)
+         break;
+      memcpy(&key, p, sizeof(key));
+      const void *key_data = p + sizeof(key);
+      p += align_u32(sizeof(key) + key.size, 8);
+
+      /* We're going to memcpy this so getting rid of const is fine */
+      struct anv_pipeline_binding *bindings = (void *)p;
+      p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
+                     sizeof(struct anv_pipeline_binding), 8);
+      bin.bind_map.surface_to_descriptor = bindings;
+      bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
+
+      const void *kernel_data = p;
+      p += align_u32(bin.kernel_size, 8);
+
+      if (p > end)
+         break;
+
+      anv_pipeline_cache_add_shader(cache, key_data, key.size,
+                                    kernel_data, bin.kernel_size,
+                                    prog_data, bin.prog_data_size,
+                                    &bin.bind_map);
    }
 }
 
+static uint32_t
+sha1_hash_func(const void *void_key)
+{
+   const struct shader_bin_key *key = void_key;
+   return _mesa_hash_data(key->data, key->size);
+}
+
+static bool
+sha1_compare_func(const void *void_a, const void *void_b)
+{
+   const struct shader_bin_key *a = void_a, *b = void_b;
+   if (a->size != b->size)
+      return false;
+
+   return memcmp(a->data, b->data, a->size) == 0;
+}
+
 VkResult anv_CreatePipelineCache(
     VkDevice                                    _device,
     const VkPipelineCacheCreateInfo*            pCreateInfo,
@@ -544,7 +380,15 @@ VkResult anv_CreatePipelineCache(
    if (cache == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   anv_pipeline_cache_init(cache, device);
+   cache->device = device;
+   pthread_mutex_init(&cache->mutex, NULL);
+
+   if (env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true)) {
+      cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
+                                             sha1_compare_func);
+   } else {
+      cache->cache = NULL;
+   }
 
    if (pCreateInfo->initialDataSize > 0)
       anv_pipeline_cache_load(cache,
@@ -564,7 +408,19 @@ void anv_DestroyPipelineCache(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 
-   anv_pipeline_cache_finish(cache);
+   pthread_mutex_destroy(&cache->mutex);
+
+   if (cache->cache) {
+      /* This is a bit unfortunate.  In order to keep things from randomly
+       * going away, the shader cache has to hold a reference to all shader
+       * binaries it contains.  We unref them when we destroy the cache.
+       */
+      struct hash_entry *entry;
+      hash_table_foreach(cache->cache, entry)
+         anv_shader_bin_unref(device, entry->data);
+
+      _mesa_hash_table_destroy(cache->cache, NULL);
+   }
 
    anv_free2(&device->alloc, pAllocator, cache);
 }
@@ -579,9 +435,16 @@ VkResult anv_GetPipelineCacheData(
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
    struct cache_header *header;
 
-   const size_t size = sizeof(*header) + cache->total_size;
-
    if (pData == NULL) {
+      size_t size = align_u32(sizeof(*header), 8) +
+                    align_u32(sizeof(uint32_t), 8);
+
+      if (cache->cache) {
+         struct hash_entry *entry;
+         hash_table_foreach(cache->cache, entry)
+            size += anv_shader_bin_data_size(entry->data);
+      }
+
       *pDataSize = size;
       return VK_SUCCESS;
    }
@@ -598,25 +461,25 @@ VkResult anv_GetPipelineCacheData(
    header->vendor_id = 0x8086;
    header->device_id = device->chipset_id;
    anv_device_get_cache_uuid(header->uuid);
-   p += header->header_size;
-
-   struct cache_entry *entry;
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      if (cache->hash_table[i] == ~0)
-         continue;
+   p += align_u32(header->header_size, 8);
 
-      entry = cache->program_stream.block_pool->map + cache->hash_table[i];
-      const uint32_t size = entry_size(entry);
-      if (end < p + size + entry->kernel_size)
-         break;
+   uint32_t *count = p;
+   p += align_u32(sizeof(*count), 8);
+   *count = 0;
 
-      memcpy(p, entry, size);
-      p += size;
+   if (cache->cache) {
+      struct hash_entry *entry;
+      hash_table_foreach(cache->cache, entry) {
+         struct anv_shader_bin *shader = entry->data;
+         size_t data_size = anv_shader_bin_data_size(entry->data);
+         if (p + data_size > end)
+            break;
 
-      void *kernel = (void *) entry + align_u32(size, 64);
+         anv_shader_bin_write_data(shader, p);
+         p += data_size;
 
-      memcpy(p, kernel, entry->kernel_size);
-      p += entry->kernel_size;
+         (*count)++;
+      }
    }
 
    *pDataSize = p - pData;
@@ -624,25 +487,6 @@ VkResult anv_GetPipelineCacheData(
    return VK_SUCCESS;
 }
 
-static void
-anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
-                         struct anv_pipeline_cache *src)
-{
-   for (uint32_t i = 0; i < src->table_size; i++) {
-      const uint32_t offset = src->hash_table[i];
-      if (offset == ~0)
-         continue;
-
-      struct cache_entry *entry =
-         src->program_stream.block_pool->map + offset;
-
-      if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
-         continue;
-
-      anv_pipeline_cache_add_entry(dst, entry, offset);
-   }
-}
-
 VkResult anv_MergePipelineCaches(
     VkDevice                                    _device,
     VkPipelineCache                             destCache,
@@ -651,10 +495,23 @@ VkResult anv_MergePipelineCaches(
 {
    ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
 
+   if (!dst->cache)
+      return VK_SUCCESS;
+
    for (uint32_t i = 0; i < srcCacheCount; i++) {
       ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
+      if (!src->cache)
+         continue;
 
-      anv_pipeline_cache_merge(dst, src);
+      struct hash_entry *entry;
+      hash_table_foreach(src->cache, entry) {
+         struct anv_shader_bin *bin = entry->data;
+         if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin)))
+            continue;
+
+         anv_shader_bin_ref(bin);
+         _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin);
+      }
    }
 
    return VK_SUCCESS;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 4b44597..491d695 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -658,31 +658,22 @@ struct anv_queue {
 
 struct anv_pipeline_cache {
    struct anv_device *                          device;
-   struct anv_state_stream                      program_stream;
    pthread_mutex_t                              mutex;
 
-   uint32_t                                     total_size;
-   uint32_t                                     table_size;
-   uint32_t                                     kernel_count;
-   uint32_t *                                   hash_table;
+   struct hash_table *                          cache;
 };
 
 struct anv_pipeline_bind_map;
 
-void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
-                             struct anv_device *device);
-void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
-uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
-                                   const unsigned char *sha1,
-                                   const struct brw_stage_prog_data **prog_data,
-                                   struct anv_pipeline_bind_map *map);
-uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
-                                          const unsigned char *sha1,
-                                          const void *kernel,
-                                          size_t kernel_size,
-                                          const struct brw_stage_prog_data **prog_data,
-                                          size_t prog_data_size,
-                                          struct anv_pipeline_bind_map *map);
+struct anv_shader_bin *
+anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
+                          const void *key, uint32_t key_size);
+struct anv_shader_bin *
+anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
+                                 const void *key_data, uint32_t key_size,
+                                 const void *kernel_data, uint32_t kernel_size,
+                                 const void *prog_data, uint32_t prog_data_size,
+                                 const struct anv_pipeline_bind_map *bind_map);
 
 struct anv_device {
     VK_LOADER_DATA                              _loader_data;
@@ -705,7 +696,6 @@ struct anv_device {
 
     struct anv_block_pool                       instruction_block_pool;
     struct anv_state_pool                       instruction_state_pool;
-    struct anv_pipeline_cache                   default_pipeline_cache;
 
     struct anv_block_pool                       surface_state_block_pool;
     struct anv_state_pool                       surface_state_pool;
@@ -1519,12 +1509,12 @@ struct anv_pipeline {
    struct anv_dynamic_state                     dynamic_state;
 
    struct anv_pipeline_layout *                 layout;
-   struct anv_pipeline_bind_map                 bindings[MESA_SHADER_STAGES];
 
    bool                                         use_repclear;
    bool                                         needs_data_cache;
 
-   const struct brw_stage_prog_data *           prog_data[MESA_SHADER_STAGES];
+   struct anv_shader_bin *                      shaders[MESA_SHADER_STAGES];
+
    struct {
       uint32_t                                  start[MESA_SHADER_GEOMETRY + 1];
       uint32_t                                  size[MESA_SHADER_GEOMETRY + 1];
@@ -1574,29 +1564,22 @@ anv_pipeline_has_stage(const struct anv_pipeline *pipeline,
    return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
 }
 
-static inline const struct brw_vs_prog_data *
-get_vs_prog_data(struct anv_pipeline *pipeline)
-{
-   return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX];
-}
-
-static inline const struct brw_gs_prog_data *
-get_gs_prog_data(struct anv_pipeline *pipeline)
-{
-   return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY];
-}
-
-static inline const struct brw_wm_prog_data *
-get_wm_prog_data(struct anv_pipeline *pipeline)
-{
-   return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT];
+#define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage)                   \
+static inline const struct brw_##prefix##_prog_data *                \
+get_##prefix##_prog_data(struct anv_pipeline *pipeline)              \
+{                                                                    \
+   if (anv_pipeline_has_stage(pipeline, stage)) {                    \
+      return (const struct brw_##prefix##_prog_data *)               \
+             anv_shader_bin_get_prog_data(pipeline->shaders[stage]); \
+   } else {                                                          \
+      return NULL;                                                   \
+   }                                                                 \
 }
 
-static inline const struct brw_cs_prog_data *
-get_cs_prog_data(struct anv_pipeline *pipeline)
-{
-   return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE];
-}
+ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
+ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
+ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
+ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE)
 
 struct anv_graphics_pipeline_create_info {
    /**
diff --git a/src/intel/vulkan/genX_l3.c b/src/intel/vulkan/genX_l3.c
index 8b3b8ac..a74071c 100644
--- a/src/intel/vulkan/genX_l3.c
+++ b/src/intel/vulkan/genX_l3.c
@@ -318,7 +318,8 @@ get_pipeline_state_l3_weights(const struct anv_pipeline *pipeline)
       if (!anv_pipeline_has_stage(pipeline, i))
          continue;
 
-      const struct brw_stage_prog_data *prog_data = pipeline->prog_data[i];
+      const struct brw_stage_prog_data *prog_data =
+         anv_shader_bin_get_prog_data(pipeline->shaders[i]);
 
       needs_dc |= pipeline->needs_data_cache;
       needs_slm |= prog_data->total_shared;
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 5cbcfd2..5ea5f9b 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -63,8 +63,7 @@ genX(compute_pipeline_create)(
    /* When we free the pipeline, we detect stages based on the NULL status
     * of various prog_data pointers.  Make them NULL by default.
     */
-   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+   memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
 
    pipeline->vs_simd8 = NO_KERNEL;
    pipeline->vs_vec4 = NO_KERNEL;
diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h
index 62fd01c..6518fae 100644
--- a/src/intel/vulkan/genX_pipeline_util.h
+++ b/src/intel/vulkan/genX_pipeline_util.h
@@ -671,7 +671,7 @@ emit_cb_state(struct anv_pipeline *pipeline,
    uint32_t surface_count = 0;
    struct anv_pipeline_bind_map *map;
    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
-      map = &pipeline->bindings[MESA_SHADER_FRAGMENT];
+      map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
       surface_count = map->surface_count;
    }
 
-- 
2.5.0.400.gff86faf