[Mesa-dev] [PATCH 6/6] anv: Rework pipeline caching

Thu Aug 25 22:28:18 UTC 2016

The original pipeline cache the Kristian wrote was based on a now-false
premise that the shaders can be stored in the pipeline cache.  The Vulkan
1.0 spec explicitly states that the pipeline cache object is transiant and
you are allowed to delete it after using it to create a pipeline with no
ill effects.  As nice as Kristian's design was, it doesn't jive with the
expectation provided by the Vulkan spec.

The new pipeline cache uses reference-counted anv_shader_bin objects that
are backed by a large state pool.  The cache itself is just a hash table
mapping SHA1 hashes to anv_shader_bin objects.  This has the added
advantage of removing one more hand-rolled hash table from mesa.

Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97476
---
 src/intel/vulkan/anv_cmd_buffer.c     |   6 +-
 src/intel/vulkan/anv_device.c         |   1 -
 src/intel/vulkan/anv_pipeline.c       | 145 +++++-----
 src/intel/vulkan/anv_pipeline_cache.c | 487 ++++++++++++----------------------
 src/intel/vulkan/anv_private.h        |  56 ++--
 src/intel/vulkan/genX_l3.c            |   3 +-
 src/intel/vulkan/genX_pipeline.c      |   3 +-
 src/intel/vulkan/genX_pipeline_util.h |   2 +-
 8 files changed, 277 insertions(+), 426 deletions(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 6c082aa..d27c707 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -757,7 +757,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
       return VK_SUCCESS;
    }
 
-   struct anv_pipeline_bind_map *map = &pipeline->bindings[stage];
+   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
    if (bias + map->surface_count == 0) {
       *bt_state = (struct anv_state) { 0, };
       return VK_SUCCESS;
@@ -922,7 +922,7 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
       return VK_SUCCESS;
    }
 
-   struct anv_pipeline_bind_map *map = &pipeline->bindings[stage];
+   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
    if (map->sampler_count == 0) {
       *state = (struct anv_state) { 0, };
       return VK_SUCCESS;
@@ -1096,7 +1096,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
    struct anv_push_constants *data =
       cmd_buffer->state.push_constants[stage];
    const struct brw_stage_prog_data *prog_data =
-      cmd_buffer->state.pipeline->prog_data[stage];
+      &cmd_buffer->state.pipeline->shaders[stage]->prog_data.base;
 
    /* If we don't actually have any push constants, bail. */
    if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 765dc6e..cf63993 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -881,7 +881,6 @@ VkResult anv_CreateDevice(
    anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024);
    anv_state_pool_init(&device->instruction_state_pool,
                        &device->instruction_block_pool);
-   anv_pipeline_cache_init(&device->default_pipeline_cache, device);
 
    anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
 
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 933e45b..af1393d 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -204,6 +204,12 @@ void anv_DestroyPipeline(
                          pAllocator ? pAllocator : &device->alloc);
    if (pipeline->blend_state.map)
       anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
+
+   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+      if (pipeline->shaders[s])
+         anv_shader_bin_unref(device, pipeline->shaders[s]);
+   }
+
    anv_free2(&device->alloc, pAllocator, pipeline);
 }
 
@@ -393,15 +399,34 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
    prog_data->binding_table.image_start = bias;
 }
 
+static struct anv_shader_bin *
+anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
+                           struct anv_pipeline_cache *cache,
+                           gl_shader_stage stage,
+                           const unsigned char *src_sha1,
+                           const void *kernel,
+                           size_t kernel_size,
+                           const struct brw_stage_prog_data *prog_data,
+                           const struct anv_pipeline_bind_map *bind_map)
+{
+   if (cache) {
+      return anv_pipeline_cache_upload_kernel(cache, stage, src_sha1,
+                                              kernel, kernel_size,
+                                              prog_data, bind_map);
+   } else {
+      return anv_shader_bin_create(pipeline->device, stage, src_sha1,
+                                   kernel, kernel_size, prog_data, bind_map);
+   }
+}
+
+
 static void
 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
                                 gl_shader_stage stage,
-                                const struct brw_stage_prog_data *prog_data,
-                                struct anv_pipeline_bind_map *map)
+                                struct anv_shader_bin *shader)
 {
-   pipeline->prog_data[stage] = prog_data;
+   pipeline->shaders[stage] = shader;
    pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
-   pipeline->bindings[stage] = *map;
 }
 
 static VkResult
@@ -414,21 +439,20 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_vs_prog_key key;
-   uint32_t kernel = NO_KERNEL;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_vs_prog_key(&pipeline->device->info, &key);
 
-   if (module->size > 0) {
+   if (cache) {
       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
                       pipeline->layout, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+      bin = anv_pipeline_cache_search(cache, sha1);
    }
 
-   if (kernel == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_vs_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -467,28 +491,25 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      stage_prog_data = &prog_data.base.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache,
+                                       MESA_SHADER_VERTEX, sha1,
+                                       shader_code, code_size,
+                                       &prog_data.base.base, &map);
+
       ralloc_free(mem_ctx);
    }
 
-   const struct brw_vs_prog_data *vs_prog_data =
-      (const struct brw_vs_prog_data *) stage_prog_data;
+   const struct brw_vs_prog_data *vs_prog_data = &bin->prog_data.vs;
 
    if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
-      pipeline->vs_simd8 = kernel;
+      pipeline->vs_simd8 = bin->kernel.offset;
       pipeline->vs_vec4 = NO_KERNEL;
    } else {
       pipeline->vs_simd8 = NO_KERNEL;
-      pipeline->vs_vec4 = kernel;
+      pipeline->vs_vec4 = bin->kernel.offset;
    }
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
-                                   stage_prog_data, &map);
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin);
 
    return VK_SUCCESS;
 }
@@ -503,21 +524,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_gs_prog_key key;
-   uint32_t kernel = NO_KERNEL;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_gs_prog_key(&pipeline->device->info, &key);
 
-   if (module->size > 0) {
+   if (cache) {
       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
                       pipeline->layout, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+      bin = anv_pipeline_cache_search(cache, sha1);
    }
 
-   if (kernel == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_gs_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -555,20 +575,17 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
       }
 
       /* TODO: SIMD8 GS */
-      stage_prog_data = &prog_data.base.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache,
+                                       MESA_SHADER_GEOMETRY, sha1,
+                                       shader_code, code_size,
+                                       &prog_data.base.base, &map);
 
       ralloc_free(mem_ctx);
    }
 
-   pipeline->gs_kernel = kernel;
+   pipeline->gs_kernel = bin->kernel.offset;
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
-                                   stage_prog_data, &map);
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin);
 
    return VK_SUCCESS;
 }
@@ -584,21 +601,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_wm_prog_key key;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_wm_prog_key(&pipeline->device->info, info, extra, &key);
 
-   if (module->size > 0) {
+   if (cache) {
       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
                       pipeline->layout, spec_info);
-      pipeline->ps_ksp0 =
-         anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+      bin = anv_pipeline_cache_search(cache, sha1);
    }
 
-   if (pipeline->ps_ksp0 == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_wm_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -687,19 +703,17 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      stage_prog_data = &prog_data.base;
-      pipeline->ps_ksp0 =
-         anv_pipeline_cache_upload_kernel(cache,
-                                          module->size > 0 ? sha1 : NULL,
-                                          shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache,
+                                       MESA_SHADER_FRAGMENT, sha1,
+                                       shader_code, code_size,
+                                       &prog_data.base, &map);
 
       ralloc_free(mem_ctx);
    }
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
-                                   stage_prog_data, &map);
+   pipeline->ps_ksp0 = bin->kernel.offset;
+
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin);
 
    return VK_SUCCESS;
 }
@@ -714,21 +728,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_cs_prog_key key;
-   uint32_t kernel = NO_KERNEL;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_cs_prog_key(&pipeline->device->info, &key);
 
-   if (module->size > 0) {
+   if (cache) {
       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
                       pipeline->layout, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+      bin = anv_pipeline_cache_search(cache, sha1);
    }
 
-   if (module->size == 0 || kernel == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_cs_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -760,20 +773,17 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      stage_prog_data = &prog_data.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache,
+                                       MESA_SHADER_COMPUTE, sha1,
+                                       shader_code, code_size,
+                                       &prog_data.base, &map);
 
       ralloc_free(mem_ctx);
    }
 
-   pipeline->cs_simd = kernel;
+   pipeline->cs_simd = bin->kernel.offset;
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
-                                   stage_prog_data, &map);
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin);
 
    return VK_SUCCESS;
 }
@@ -1167,8 +1177,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    /* When we free the pipeline, we detect stages based on the NULL status
     * of various prog_data pointers.  Make them NULL by default.
     */
-   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+   memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
 
    pipeline->vs_simd8 = NO_KERNEL;
    pipeline->vs_vec4 = NO_KERNEL;
@@ -1283,9 +1292,6 @@ anv_graphics_pipeline_create(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 
-   if (cache == NULL)
-      cache = &device->default_pipeline_cache;
-
    switch (device->info.gen) {
    case 7:
       if (device->info.is_haswell)
@@ -1339,9 +1345,6 @@ static VkResult anv_compute_pipeline_create(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 
-   if (cache == NULL)
-      cache = &device->default_pipeline_cache;
-
    switch (device->info.gen) {
    case 7:
       if (device->info.is_haswell)
diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c
index a2b9d3b..c57be52 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -22,6 +22,7 @@
  */
 
 #include "util/mesa-sha1.h"
+#include "util/hash_table.h"
 #include "util/debug.h"
 #include "anv_private.h"
 
@@ -124,69 +125,6 @@ anv_shader_bin_destroy(struct anv_device *device,
  */
 
 void
-anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
-                        struct anv_device *device)
-{
-   cache->device = device;
-   anv_state_stream_init(&cache->program_stream,
-                         &device->instruction_block_pool);
-   pthread_mutex_init(&cache->mutex, NULL);
-
-   cache->kernel_count = 0;
-   cache->total_size = 0;
-   cache->table_size = 1024;
-   const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
-   cache->hash_table = malloc(byte_size);
-
-   /* We don't consider allocation failure fatal, we just start with a 0-sized
-    * cache. */
-   if (cache->hash_table == NULL ||
-       !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
-      cache->table_size = 0;
-   else
-      memset(cache->hash_table, 0xff, byte_size);
-}
-
-void
-anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
-{
-   anv_state_stream_finish(&cache->program_stream);
-   pthread_mutex_destroy(&cache->mutex);
-   free(cache->hash_table);
-}
-
-struct cache_entry {
-   unsigned char sha1[20];
-   uint32_t prog_data_size;
-   uint32_t kernel_size;
-   uint32_t surface_count;
-   uint32_t sampler_count;
-   uint32_t image_count;
-
-   char prog_data[0];
-
-   /* kernel follows prog_data at next 64 byte aligned address */
-};
-
-static uint32_t
-entry_size(struct cache_entry *entry)
-{
-   /* This returns the number of bytes needed to serialize an entry, which
-    * doesn't include the alignment padding bytes.
-    */
-
-   struct brw_stage_prog_data *prog_data = (void *)entry->prog_data;
-   const uint32_t param_size =
-      prog_data->nr_params * sizeof(*prog_data->param);
-
-   const uint32_t map_size =
-      entry->surface_count * sizeof(struct anv_pipeline_binding) +
-      entry->sampler_count * sizeof(struct anv_pipeline_binding);
-
-   return sizeof(*entry) + entry->prog_data_size + param_size + map_size;
-}
-
-void
 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
                 struct anv_shader_module *module,
                 const char *entrypoint,
@@ -212,221 +150,98 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
    _mesa_sha1_final(ctx, hash);
 }
 
-static uint32_t
+static struct anv_shader_bin *
 anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache,
-                                   const unsigned char *sha1,
-                                   const struct brw_stage_prog_data **prog_data,
-                                   struct anv_pipeline_bind_map *map)
+                                   const unsigned char *sha1)
 {
-   const uint32_t mask = cache->table_size - 1;
-   const uint32_t start = (*(uint32_t *) sha1);
-
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      const uint32_t index = (start + i) & mask;
-      const uint32_t offset = cache->hash_table[index];
-
-      if (offset == ~0)
-         return NO_KERNEL;
-
-      struct cache_entry *entry =
-         cache->program_stream.block_pool->map + offset;
-      if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
-         if (prog_data) {
-            assert(map);
-            void *p = entry->prog_data;
-            *prog_data = p;
-            p += entry->prog_data_size;
-            p += (*prog_data)->nr_params * sizeof(*(*prog_data)->param);
-            map->surface_count = entry->surface_count;
-            map->sampler_count = entry->sampler_count;
-            map->image_count = entry->image_count;
-            map->surface_to_descriptor = p;
-            p += map->surface_count * sizeof(struct anv_pipeline_binding);
-            map->sampler_to_descriptor = p;
-         }
-
-         return offset + align_u32(entry_size(entry), 64);
-      }
-   }
+   if (cache->cache == NULL)
+      return NULL;
 
-   /* This can happen if the pipeline cache is disabled via
-    * ANV_ENABLE_PIPELINE_CACHE=false
-    */
-   return NO_KERNEL;
+   struct hash_entry *entry = _mesa_hash_table_search(cache->cache, sha1);
+   if (entry)
+      return entry->data;
+   else
+      return NULL;
 }
 
-uint32_t
+struct anv_shader_bin *
 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
-                          const unsigned char *sha1,
-                          const struct brw_stage_prog_data **prog_data,
-                          struct anv_pipeline_bind_map *map)
+                          const unsigned char *sha1)
 {
-   uint32_t kernel;
+   struct anv_shader_bin *bin;
 
    pthread_mutex_lock(&cache->mutex);
 
-   kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
+   bin = anv_pipeline_cache_search_unlocked(cache, sha1);
 
    pthread_mutex_unlock(&cache->mutex);
 
-   return kernel;
-}
-
-static void
-anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
-                             struct cache_entry *entry, uint32_t entry_offset)
-{
-   const uint32_t mask = cache->table_size - 1;
-   const uint32_t start = (*(uint32_t *) entry->sha1);
-
-   /* We'll always be able to insert when we get here. */
-   assert(cache->kernel_count < cache->table_size / 2);
-
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      const uint32_t index = (start + i) & mask;
-      if (cache->hash_table[index] == ~0) {
-         cache->hash_table[index] = entry_offset;
-         break;
-      }
-   }
-
-   cache->total_size += entry_size(entry) + entry->kernel_size;
-   cache->kernel_count++;
-}
-
-static VkResult
-anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
-{
-   const uint32_t table_size = cache->table_size * 2;
-   const uint32_t old_table_size = cache->table_size;
-   const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
-   uint32_t *table;
-   uint32_t *old_table = cache->hash_table;
-
-   table = malloc(byte_size);
-   if (table == NULL)
-      return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-   cache->hash_table = table;
-   cache->table_size = table_size;
-   cache->kernel_count = 0;
-   cache->total_size = 0;
-
-   memset(cache->hash_table, 0xff, byte_size);
-   for (uint32_t i = 0; i < old_table_size; i++) {
-      const uint32_t offset = old_table[i];
-      if (offset == ~0)
-         continue;
-
-      struct cache_entry *entry =
-         cache->program_stream.block_pool->map + offset;
-      anv_pipeline_cache_set_entry(cache, entry, offset);
-   }
-
-   free(old_table);
+   /* We increment refcount before handing it to the caller */
+   if (bin)
+      anv_shader_bin_ref(bin);
 
-   return VK_SUCCESS;
+   return bin;
 }
 
-static void
-anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
-                             struct cache_entry *entry, uint32_t entry_offset)
+static struct anv_shader_bin *
+anv_pipeline_cache_add_kernel(struct anv_pipeline_cache *cache,
+                              gl_shader_stage stage,
+                              const unsigned char *src_sha1,
+                              const void *kernel,
+                              size_t kernel_size,
+                              const struct brw_stage_prog_data *prog_data,
+                              const struct anv_pipeline_bind_map *bind_map)
 {
-   if (cache->kernel_count == cache->table_size / 2)
-      anv_pipeline_cache_grow(cache);
-
-   /* Failing to grow that hash table isn't fatal, but may mean we don't
-    * have enough space to add this new kernel. Only add it if there's room.
-    */
-   if (cache->kernel_count < cache->table_size / 2)
-      anv_pipeline_cache_set_entry(cache, entry, entry_offset);
-}
-
-uint32_t
-anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
-                                 const unsigned char *sha1,
-                                 const void *kernel, size_t kernel_size,
-                                 const struct brw_stage_prog_data **prog_data,
-                                 size_t prog_data_size,
-                                 struct anv_pipeline_bind_map *map)
-{
-   pthread_mutex_lock(&cache->mutex);
+   assert(cache->cache);
 
    /* Before uploading, check again that another thread didn't upload this
     * shader while we were compiling it.
     */
-   if (sha1) {
-      uint32_t cached_kernel =
-         anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
-      if (cached_kernel != NO_KERNEL) {
-         pthread_mutex_unlock(&cache->mutex);
-         return cached_kernel;
-      }
+   if (src_sha1) {
+      struct anv_shader_bin *cached =
+         anv_pipeline_cache_search_unlocked(cache, src_sha1);
+      if (cached)
+         return cached;
    }
 
-   struct cache_entry *entry;
-
-   assert((*prog_data)->nr_pull_params == 0);
-   assert((*prog_data)->nr_image_params == 0);
-
-   const uint32_t param_size =
-      (*prog_data)->nr_params * sizeof(*(*prog_data)->param);
-
-   const uint32_t map_size =
-      map->surface_count * sizeof(struct anv_pipeline_binding) +
-      map->sampler_count * sizeof(struct anv_pipeline_binding);
-
-   const uint32_t preamble_size =
-      align_u32(sizeof(*entry) + prog_data_size + param_size + map_size, 64);
-
-   const uint32_t size = preamble_size + kernel_size;
-
-   assert(size < cache->program_stream.block_pool->block_size);
-   const struct anv_state state =
-      anv_state_stream_alloc(&cache->program_stream, size, 64);
+   struct anv_shader_bin *bin =
+      anv_shader_bin_create(cache->device, stage, src_sha1,
+                            kernel, kernel_size, prog_data, bind_map);
+   if (!bin)
+      return NULL;
 
-   entry = state.map;
-   entry->prog_data_size = prog_data_size;
-   entry->surface_count = map->surface_count;
-   entry->sampler_count = map->sampler_count;
-   entry->image_count = map->image_count;
-   entry->kernel_size = kernel_size;
+   _mesa_hash_table_insert(cache->cache, bin->src_sha1, bin);
 
-   void *p = entry->prog_data;
-   memcpy(p, *prog_data, prog_data_size);
-   p += prog_data_size;
+   return bin;
+}
 
-   memcpy(p, (*prog_data)->param, param_size);
-   ((struct brw_stage_prog_data *)entry->prog_data)->param = p;
-   p += param_size;
+struct anv_shader_bin *
+anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
+                                 gl_shader_stage stage,
+                                 const unsigned char *src_sha1,
+                                 const void *kernel,
+                                 size_t kernel_size,
+                                 const struct brw_stage_prog_data *prog_data,
+                                 const struct anv_pipeline_bind_map *bind_map)
+{
+   if (cache->cache) {
+      pthread_mutex_lock(&cache->mutex);
 
-   memcpy(p, map->surface_to_descriptor,
-          map->surface_count * sizeof(struct anv_pipeline_binding));
-   map->surface_to_descriptor = p;
-   p += map->surface_count * sizeof(struct anv_pipeline_binding);
+      struct anv_shader_bin *bin =
+         anv_pipeline_cache_add_kernel(cache, stage, src_sha1,
+                                       kernel, kernel_size, prog_data, bind_map);
 
-   memcpy(p, map->sampler_to_descriptor,
-          map->sampler_count * sizeof(struct anv_pipeline_binding));
-   map->sampler_to_descriptor = p;
+      pthread_mutex_unlock(&cache->mutex);
 
-   if (sha1) {
-      assert(anv_pipeline_cache_search_unlocked(cache, sha1,
-                                                NULL, NULL) == NO_KERNEL);
+      /* We increment refcount before handing it to the caller */
+      anv_shader_bin_ref(bin);
 
-      memcpy(entry->sha1, sha1, sizeof(entry->sha1));
-      anv_pipeline_cache_add_entry(cache, entry, state.offset);
+      return bin;
+   } else {
+      /* In this case, we're not caching it so the caller owns it entirely */
+      return anv_shader_bin_create(cache->device, stage, src_sha1,
+                                   kernel, kernel_size, prog_data, bind_map);
    }
-
-   pthread_mutex_unlock(&cache->mutex);
-
-   memcpy(state.map + preamble_size, kernel, kernel_size);
-
-   if (!cache->device->info.has_llc)
-      anv_state_clflush(state);
-
-   *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
-
-   return state.offset + preamble_size;
 }
 
 struct cache_header {
@@ -445,6 +260,9 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
    struct cache_header header;
    uint8_t uuid[VK_UUID_SIZE];
 
+   if (cache->cache == NULL)
+      return;
+
    if (size < sizeof(header))
       return;
    memcpy(&header, data, sizeof(header));
@@ -463,45 +281,47 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
    void *end = (void *) data + size;
    void *p = (void *) data + header.header_size;
 
-   while (p < end) {
-      struct cache_entry *entry = p;
-
-      void *data = entry->prog_data;
-
-      /* Make a copy of prog_data so that it's mutable */
-      uint8_t prog_data_tmp[512];
-      assert(entry->prog_data_size <= sizeof(prog_data_tmp));
-      memcpy(prog_data_tmp, data, entry->prog_data_size);
-      struct brw_stage_prog_data *prog_data = (void *)prog_data_tmp;
-      data += entry->prog_data_size;
-
-      prog_data->param = data;
-      data += prog_data->nr_params * sizeof(*prog_data->param);
-
-      struct anv_pipeline_binding *surface_to_descriptor = data;
-      data += entry->surface_count * sizeof(struct anv_pipeline_binding);
-      struct anv_pipeline_binding *sampler_to_descriptor = data;
-      data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
-      void *kernel = data;
-
-      struct anv_pipeline_bind_map map = {
-         .surface_count = entry->surface_count,
-         .sampler_count = entry->sampler_count,
-         .image_count = entry->image_count,
-         .surface_to_descriptor = surface_to_descriptor,
-         .sampler_to_descriptor = sampler_to_descriptor
-      };
-
-      const struct brw_stage_prog_data *const_prog_data = prog_data;
-
-      anv_pipeline_cache_upload_kernel(cache, entry->sha1,
-                                       kernel, entry->kernel_size,
-                                       &const_prog_data,
-                                       entry->prog_data_size, &map);
-      p = kernel + entry->kernel_size;
+   /* Count is the total number of valid entries */
+   uint32_t count;
+   if (p + sizeof(count) >= end)
+      return;
+   memcpy(&count, p, sizeof(count));
+   p += align_u32(sizeof(count), 8);
+
+   for (uint32_t i = 0; i < count; i++) {
+      struct anv_shader_bin *entry = p;
+      if (p + sizeof(entry) >= end)
+         return;
+      const size_t entry_size = anv_shader_bin_size(&entry->bind_map);
+
+      struct anv_pipeline_bind_map bind_map = entry->bind_map;
+      bind_map.surface_to_descriptor = entry->bindings;
+      bind_map.sampler_to_descriptor = entry->bindings + bind_map.surface_count;
+      p += align_u32(entry_size, 8);
+
+      void *kernel = p;
+      p += align_u32(entry->kernel_size, 8);
+      if (p >= end)
+         break;
+
+      anv_pipeline_cache_add_kernel(cache, entry->stage, entry->src_sha1,
+                                    kernel, entry->kernel_size,
+                                    &entry->prog_data.base, &bind_map);
    }
 }
 
+static uint32_t
+sha1_hash_func(const void *key)
+{
+   return _mesa_hash_data(key, 20);
+}
+
+static bool
+sha1_compare_func(const void *a, const void *b)
+{
+   return memcmp(a, b, 20) == 0;
+}
+
 VkResult anv_CreatePipelineCache(
     VkDevice                                    _device,
     const VkPipelineCacheCreateInfo*            pCreateInfo,
@@ -520,7 +340,15 @@ VkResult anv_CreatePipelineCache(
    if (cache == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   anv_pipeline_cache_init(cache, device);
+   cache->device = device;
+   pthread_mutex_init(&cache->mutex, NULL);
+
+   if (env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true)) {
+      cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
+                                             sha1_compare_func);
+   } else {
+      cache->cache = NULL;
+   }
 
    if (pCreateInfo->initialDataSize > 0)
       anv_pipeline_cache_load(cache,
@@ -540,7 +368,19 @@ void anv_DestroyPipelineCache(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 
-   anv_pipeline_cache_finish(cache);
+   pthread_mutex_destroy(&cache->mutex);
+
+   if (cache->cache) {
+      /* This is a bit unfortunate.  In order to keep things from randomly
+       * going away, the shader cache has to hold a reference to all shader
+       * binaries it contains.  We unref them when we destroy the cache.
+       */
+      struct hash_entry *entry;
+      hash_table_foreach(cache->cache, entry)
+         anv_shader_bin_unref(device, entry->data);
+
+      _mesa_hash_table_destroy(cache->cache, NULL);
+   }
 
    anv_free2(&device->alloc, pAllocator, cache);
 }
@@ -555,9 +395,18 @@ VkResult anv_GetPipelineCacheData(
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
    struct cache_header *header;
 
-   const size_t size = sizeof(*header) + cache->total_size;
-
    if (pData == NULL) {
+      size_t size = sizeof(*header) + sizeof(uint32_t);
+
+      if (cache->cache) {
+         struct hash_entry *entry;
+         hash_table_foreach(cache->cache, entry) {
+            struct anv_shader_bin *bin = entry->data;
+            size += anv_shader_bin_size(&bin->bind_map);
+            size += bin->kernel_size;
+         }
+      }
+
       *pDataSize = size;
       return VK_SUCCESS;
    }
@@ -576,23 +425,25 @@ VkResult anv_GetPipelineCacheData(
    anv_device_get_cache_uuid(header->uuid);
    p += header->header_size;
 
-   struct cache_entry *entry;
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      if (cache->hash_table[i] == ~0)
-         continue;
+   uint32_t *count = p;
+   p += align_u32(sizeof(*count), 8);
+   *count = 0;
 
-      entry = cache->program_stream.block_pool->map + cache->hash_table[i];
-      const uint32_t size = entry_size(entry);
-      if (end < p + size + entry->kernel_size)
-         break;
-
-      memcpy(p, entry, size);
-      p += size;
+   if (cache->cache) {
+      struct hash_entry *entry;
+      hash_table_foreach(cache->cache, entry) {
+         struct anv_shader_bin *bin = entry->data;
+         const size_t bin_size = anv_shader_bin_size(&bin->bind_map);
+         if (p + bin_size + bin->kernel_size >= end)
+            break;
 
-      void *kernel = (void *) entry + align_u32(size, 64);
+         memcpy(p, bin, bin_size);
+         p += align_u32(bin_size, 8);
+         memcpy(p, bin->kernel.map, bin->kernel_size);
+         p += align_u32(bin->kernel_size, 8);
 
-      memcpy(p, kernel, entry->kernel_size);
-      p += entry->kernel_size;
+         (*count)++;
+      }
    }
 
    *pDataSize = p - pData;
@@ -600,25 +451,6 @@ VkResult anv_GetPipelineCacheData(
    return VK_SUCCESS;
 }
 
-static void
-anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
-                         struct anv_pipeline_cache *src)
-{
-   for (uint32_t i = 0; i < src->table_size; i++) {
-      const uint32_t offset = src->hash_table[i];
-      if (offset == ~0)
-         continue;
-
-      struct cache_entry *entry =
-         src->program_stream.block_pool->map + offset;
-
-      if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
-         continue;
-
-      anv_pipeline_cache_add_entry(dst, entry, offset);
-   }
-}
-
 VkResult anv_MergePipelineCaches(
     VkDevice                                    _device,
     VkPipelineCache                             destCache,
@@ -627,10 +459,23 @@ VkResult anv_MergePipelineCaches(
 {
    ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
 
+   if (!dst->cache)
+      return VK_SUCCESS;
+
    for (uint32_t i = 0; i < srcCacheCount; i++) {
       ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
+      if (!src->cache)
+         continue;
+
+      struct hash_entry *entry;
+      hash_table_foreach(src->cache, entry) {
+         struct anv_shader_bin *bin = entry->data;
+         if (_mesa_hash_table_search(dst->cache, bin->src_sha1))
+            continue;
 
-      anv_pipeline_cache_merge(dst, src);
+         anv_shader_bin_ref(bin);
+         _mesa_hash_table_insert(dst->cache, bin->src_sha1, bin);
+      }
    }
 
    return VK_SUCCESS;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 84d1031..d03a6ce 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -658,31 +658,24 @@ struct anv_queue {
 
 struct anv_pipeline_cache {
    struct anv_device *                          device;
-   struct anv_state_stream                      program_stream;
    pthread_mutex_t                              mutex;
 
-   uint32_t                                     total_size;
-   uint32_t                                     table_size;
-   uint32_t                                     kernel_count;
-   uint32_t *                                   hash_table;
+   struct hash_table *                          cache;
 };
 
 struct anv_pipeline_bind_map;
 
-void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
-                             struct anv_device *device);
-void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
-uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
-                                   const unsigned char *sha1,
-                                   const struct brw_stage_prog_data **prog_data,
-                                   struct anv_pipeline_bind_map *map);
-uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
-                                          const unsigned char *sha1,
-                                          const void *kernel,
-                                          size_t kernel_size,
-                                          const struct brw_stage_prog_data **prog_data,
-                                          size_t prog_data_size,
-                                          struct anv_pipeline_bind_map *map);
+struct anv_shader_bin *
+anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
+                          const unsigned char *sha1);
+struct anv_shader_bin *
+anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
+                                 gl_shader_stage stage,
+                                 const unsigned char *src_sha1,
+                                 const void *kernel,
+                                 size_t kernel_size,
+                                 const struct brw_stage_prog_data *prog_data,
+                                 const struct anv_pipeline_bind_map *bind_map);
 
 struct anv_device {
     VK_LOADER_DATA                              _loader_data;
@@ -705,7 +698,6 @@ struct anv_device {
 
     struct anv_block_pool                       instruction_block_pool;
     struct anv_state_pool                       instruction_state_pool;
-    struct anv_pipeline_cache                   default_pipeline_cache;
 
     struct anv_block_pool                       surface_state_block_pool;
     struct anv_state_pool                       surface_state_pool;
@@ -1525,12 +1517,12 @@ struct anv_pipeline {
    struct anv_dynamic_state                     dynamic_state;
 
    struct anv_pipeline_layout *                 layout;
-   struct anv_pipeline_bind_map                 bindings[MESA_SHADER_STAGES];
 
    bool                                         use_repclear;
    bool                                         needs_data_cache;
 
-   const struct brw_stage_prog_data *           prog_data[MESA_SHADER_STAGES];
+   struct anv_shader_bin *                      shaders[MESA_SHADER_STAGES];
+
    struct {
       uint32_t                                  start[MESA_SHADER_GEOMETRY + 1];
       uint32_t                                  size[MESA_SHADER_GEOMETRY + 1];
@@ -1583,25 +1575,37 @@ anv_pipeline_has_stage(const struct anv_pipeline *pipeline,
 static inline const struct brw_vs_prog_data *
 get_vs_prog_data(struct anv_pipeline *pipeline)
 {
-   return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX];
+   if (anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX))
+      return &pipeline->shaders[MESA_SHADER_VERTEX]->prog_data.vs;
+   else
+      return NULL;
 }
 
 static inline const struct brw_gs_prog_data *
 get_gs_prog_data(struct anv_pipeline *pipeline)
 {
-   return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY];
+   if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
+      return &pipeline->shaders[MESA_SHADER_GEOMETRY]->prog_data.gs;
+   else
+      return NULL;
 }
 
 static inline const struct brw_wm_prog_data *
 get_wm_prog_data(struct anv_pipeline *pipeline)
 {
-   return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT];
+   if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
+      return &pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data.fs;
+   else
+      return NULL;
 }
 
 static inline const struct brw_cs_prog_data *
 get_cs_prog_data(struct anv_pipeline *pipeline)
 {
-   return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE];
+   if (anv_pipeline_has_stage(pipeline, MESA_SHADER_COMPUTE))
+      return &pipeline->shaders[MESA_SHADER_COMPUTE]->prog_data.cs;
+   else
+      return NULL;
 }
 
 struct anv_graphics_pipeline_create_info {
diff --git a/src/intel/vulkan/genX_l3.c b/src/intel/vulkan/genX_l3.c
index 8b3b8ac..2833403 100644
--- a/src/intel/vulkan/genX_l3.c
+++ b/src/intel/vulkan/genX_l3.c
@@ -318,7 +318,8 @@ get_pipeline_state_l3_weights(const struct anv_pipeline *pipeline)
       if (!anv_pipeline_has_stage(pipeline, i))
          continue;
 
-      const struct brw_stage_prog_data *prog_data = pipeline->prog_data[i];
+      const struct brw_stage_prog_data *prog_data =
+         &pipeline->shaders[i]->prog_data.base;
 
       needs_dc |= pipeline->needs_data_cache;
       needs_slm |= prog_data->total_shared;
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 5cbcfd2..5ea5f9b 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -63,8 +63,7 @@ genX(compute_pipeline_create)(
    /* When we free the pipeline, we detect stages based on the NULL status
     * of various prog_data pointers.  Make them NULL by default.
     */
-   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+   memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
 
    pipeline->vs_simd8 = NO_KERNEL;
    pipeline->vs_vec4 = NO_KERNEL;
diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h
index 62fd01c..6518fae 100644
--- a/src/intel/vulkan/genX_pipeline_util.h
+++ b/src/intel/vulkan/genX_pipeline_util.h
@@ -671,7 +671,7 @@ emit_cb_state(struct anv_pipeline *pipeline,
    uint32_t surface_count = 0;
    struct anv_pipeline_bind_map *map;
    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
-      map = &pipeline->bindings[MESA_SHADER_FRAGMENT];
+      map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
       surface_count = map->surface_count;
    }
 
-- 
2.5.0.400.gff86faf