[Mesa-dev] [PATCH 11/65] i965: add initial implementation of on disk shader cache

Timothy Arceri timothy.arceri at collabora.com
Fri Apr 29 13:33:10 UTC 2016


From: Carl Worth <cworth at cworth.org>

This uses the recently-added cache.c to write out the final, compiled,
linked binary for a vertex and fragment shader programs.

This is the initial implementation done by Carl with a few fixs. Further
improvements will be made in following commits.

Signed-off-by: Timothy Arceri <timothy.arceri at collabora.com>
---
 src/mesa/drivers/dri/i965/Makefile.sources   |   1 +
 src/mesa/drivers/dri/i965/brw_shader_cache.c | 369 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_state.h        |   7 +
 3 files changed, 377 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_shader_cache.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 441d727..782eb3f 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -156,6 +156,7 @@ i965_FILES = \
 	brw_sf_emit.c \
 	brw_sf.h \
 	brw_sf_state.c \
+	brw_shader_cache.cpp \
 	brw_state_batch.c \
 	brw_state_cache.c \
 	brw_state_dump.c \
diff --git a/src/mesa/drivers/dri/i965/brw_shader_cache.c b/src/mesa/drivers/dri/i965/brw_shader_cache.c
new file mode 100644
index 0000000..3e5314d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_shader_cache.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <util/macros.h>
+#include <util/mesa-sha1.h>
+#include <main/mtypes.h>
+#include <compiler/glsl/glsl_parser_extras.h>
+#include <compiler/glsl/ir_uniform.h>
+#include <compiler/glsl/cache.h>
+#include <compiler/glsl/blob.h>
+
+#include "brw_state.h"
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_context.h"
+
+void
+upload_cached_program(struct brw_context *brw)
+{
+   char sha1_buf[41];
+   size_t size;
+   uint8_t *buffer;
+   struct blob_reader binary;
+   struct gl_shader_program *prog;
+   struct brw_wm_prog_key wm_key;
+   struct brw_vs_prog_key vs_key;
+   unsigned char sha1[20];
+   char manifest[256];
+   int i, offset = 0;
+   struct program_cache *cache;
+   uint8_t *vs_program, *wm_program;
+   size_t vs_program_size, wm_program_size;
+   struct brw_vs_prog_data *vs_prog_data;
+   struct brw_wm_prog_data *wm_prog_data;
+   struct brw_stage_prog_data *prog_data;
+   size_t vs_prog_data_size, wm_prog_data_size;
+   intptr_t parameter_values_base;
+   intptr_t uniform_data_slots_base;
+   void *local = ralloc_context(NULL);
+   uint32_t nr_params, nr_pull_params;
+
+   cache = brw->ctx.Cache;
+   if (cache == NULL)
+      return;
+
+   prog = brw->ctx.Shader.ActiveProgram;
+   if (prog == NULL)
+      return;
+
+   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
+                      "program: %s\n", _mesa_sha1_format(sha1_buf, prog->sha1));
+
+   brw_wm_populate_key(brw, &wm_key);
+   _mesa_sha1_compute(&wm_key, sizeof wm_key, sha1);
+   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
+                      "wm_key: %s\n", _mesa_sha1_format(sha1_buf, sha1));
+
+   brw_vs_populate_key(brw, &vs_key);
+   _mesa_sha1_compute(&vs_key, sizeof vs_key, sha1);
+   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
+                      "vs_key: %s\n", _mesa_sha1_format(sha1_buf, sha1));
+
+   _mesa_sha1_compute(manifest, strlen(manifest), brw->binary_sha1);
+
+   buffer = cache_get(cache, brw->binary_sha1, &size);
+   if (buffer == NULL)
+      goto FAIL;
+
+   printf("populating bo cache with binary: %s\n",
+          _mesa_sha1_format(sha1_buf, brw->binary_sha1));
+
+   blob_reader_init(&binary, buffer, size);
+
+   /* Read VS program from blob. */
+   vs_program_size = blob_read_uint32(&binary);
+
+   vs_program = blob_read_bytes(&binary, vs_program_size);
+
+   /* Read VS program_data from blob and fixup params pointers. */
+   vs_prog_data_size = blob_read_uint32(&binary);
+   if (vs_prog_data_size != sizeof *vs_prog_data)
+      goto FAIL;
+
+   vs_prog_data = blob_read_bytes(&binary, vs_prog_data_size);
+   prog_data = &vs_prog_data->base.base;
+
+   parameter_values_base = blob_read_intptr(&binary);
+   uniform_data_slots_base = blob_read_intptr(&binary);
+
+   nr_params = blob_read_uint32(&binary);
+   if (nr_params != prog_data->nr_params)
+      goto FAIL;
+
+   prog_data->param = rzalloc_array(local, const gl_constant_value *,
+                                    nr_params);
+   printf("Allocating %d prog_data->params (%p)\n",
+          prog_data->nr_params, prog_data->param);
+
+   for (i = 0; i < nr_params; i++) {
+      intptr_t param = blob_read_intptr(&binary);
+      ptrdiff_t p_offset, u_offset;
+      struct gl_program_parameter_list *param_list =
+         prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program->Parameters;
+
+      p_offset = (param - parameter_values_base) / sizeof(gl_constant_value);
+      u_offset = (param - uniform_data_slots_base) / sizeof(gl_constant_value);
+      
+      if (p_offset >= 0 && p_offset < 4 * param_list->NumParameters) {
+         prog_data->param[i] =
+            ((gl_constant_value *) param_list->ParameterValues) + p_offset;
+      } else if (u_offset >= 0 && u_offset < prog->NumUniformDataSlots) {
+         prog_data->param[i] = prog->UniformDataSlots + u_offset;
+      } else {
+         printf("Error: Failed to fixup vs pointer value %p\n", (void *) param);
+         goto FAIL;
+      }
+   }
+
+   nr_pull_params = blob_read_uint32(&binary);
+   if (nr_pull_params != prog_data->nr_pull_params)
+      goto FAIL;
+
+   prog_data->pull_param = rzalloc_array(local, const gl_constant_value *,
+                                         nr_pull_params);
+
+
+   for (i = 0; i < nr_pull_params; i++) {
+      intptr_t pull_param = blob_read_intptr(&binary);
+      /* FIXME: We need to fixup pull_params pointers here. */
+   }
+
+   struct brw_vertex_program *vp =
+      (struct brw_vertex_program *)brw->vertex_program;
+   brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
+                    &vs_key, sizeof(struct brw_vs_prog_key),
+                    vs_program, vs_program_size,
+                    vs_prog_data, vs_prog_data_size,
+                    &brw->vs.base.prog_offset, &brw->vs.prog_data, vp);
+
+   /* Read WM program from blob. */
+   wm_program_size = blob_read_uint32(&binary);
+
+   wm_program = blob_read_bytes(&binary, wm_program_size);
+
+   /* Read WM program_data from blob and fixup params pointers. */
+   wm_prog_data_size = blob_read_uint32(&binary);
+   if (wm_prog_data_size != sizeof *wm_prog_data)
+      goto FAIL;
+
+   wm_prog_data = blob_read_bytes(&binary, wm_prog_data_size);
+   prog_data = &wm_prog_data->base;
+
+   parameter_values_base = blob_read_intptr(&binary);
+
+   nr_params = blob_read_uint32(&binary);
+   if (nr_params != prog_data->nr_params)
+      goto FAIL;
+
+   prog_data->param = rzalloc_array(local, const gl_constant_value *,
+                                    nr_params);
+   printf("Allocating %d prog_data->params (%p)\n",
+          prog_data->nr_params, prog_data->param);
+
+   for (i = 0; i < nr_params; i++) {
+      intptr_t param = blob_read_intptr(&binary);
+      ptrdiff_t p_offset, u_offset;
+      struct gl_program_parameter_list *param_list =
+         prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->Parameters;
+
+      p_offset = (param - parameter_values_base) / sizeof(gl_constant_value);
+      u_offset = (param - uniform_data_slots_base) / sizeof(gl_constant_value);
+      
+      if (p_offset >= 0 && p_offset < 4 * param_list->NumParameters) {
+         prog_data->param[i] =
+            ((gl_constant_value *) param_list->ParameterValues) + p_offset;
+      } else if (u_offset >= 0 && u_offset < prog->NumUniformDataSlots) {
+         prog_data->param[i] = prog->UniformDataSlots + u_offset;
+      } else {
+         printf("Error: Failed to fixup fs pointer value %p\n", (void *) param);
+         goto FAIL;
+      }
+   }
+
+   nr_pull_params = blob_read_uint32(&binary);
+   if (nr_pull_params != prog_data->nr_pull_params)
+      goto FAIL;
+
+   prog_data->pull_param = rzalloc_array(local, const gl_constant_value *,
+                                         nr_pull_params);
+
+
+   for (i = 0; i < nr_pull_params; i++) {
+      intptr_t pull_param = blob_read_intptr(&binary);
+      /* FIXME: We need to fixup pull_params pointers here. */
+   }
+
+   struct brw_fragment_program *wp =
+      (struct brw_fragment_program *)brw->fragment_program;
+   brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
+                    &wm_key, sizeof(struct brw_wm_prog_key),
+                    wm_program, wm_program_size,
+                    wm_prog_data, wm_prog_data_size,
+                    &brw->wm.base.prog_offset, &brw->wm.prog_data, wp);
+
+   if (binary.current != binary.end || binary.overrun) {
+      printf ("Error reading program from cache (did not read every byte "
+              "written)\n");
+      goto FAIL;
+   }
+
+   printf ("%s: Successfully read every byte written!\n", __FUNCTION__);
+   prog->program_written_to_cache = true;
+
+/* FIXME: I'm currently leaking anything allocated off of this local
+ * context. What we really want here is a context that lives across both
+ * upload_cached_program and write_cached program. To implement this, Ken
+ * suggests rewriting brw_state_upload.c:brw_upload_state() to pull the code
+ * called for the common atoms out of the loop, and then explicitly call these
+ * caching functions around those, (rather than hooking into the atoms to call
+ * our cache functions).
+
+   ralloc_free(local);
+*/
+   free(buffer);
+   return;
+
+FAIL:
+   /* Fall back and compile from source here. */
+   prog->program_written_to_cache = false;
+   local = NULL;
+   printf("FIXME: May need to fallback to compile from source here...\n");
+   free(buffer);
+}
+
+void
+write_cached_program(struct brw_context *brw)
+{
+   struct blob *binary;
+   uint8_t *blob_cursor;
+   size_t vs_program_size, wm_program_size;
+   uint32_t nr_params, nr_pull_params;
+   struct gl_shader_program *prog;
+   struct program_cache *cache;
+   char buf[41];
+   unsigned i;
+
+   cache = brw->ctx.Cache;
+   if (cache == NULL)
+      return;
+
+   prog = brw->ctx.Shader.ActiveProgram;
+   if (prog == NULL)
+      return;
+
+   if (prog->program_written_to_cache)
+      return;
+
+   binary = blob_create (NULL);
+   if (binary == NULL)
+      return;
+
+   /* Write VS program to blob. */
+   vs_program_size = brw->vs.prog_data->program_size;
+
+   blob_write_uint32(binary, vs_program_size);
+
+   blob_cursor = blob_reserve_bytes(binary, vs_program_size);
+   drm_intel_bo_get_subdata(brw->cache.bo, brw->vs.base.prog_offset,
+                            vs_program_size, blob_cursor);
+
+   /* Write VS program_data to blob. */
+   blob_write_uint32(binary, sizeof *brw->vs.prog_data);
+   blob_write_bytes(binary, brw->vs.prog_data, sizeof *brw->vs.prog_data);
+
+   /* Include variable-length params from end of brw_stage_prog_data as well.
+    *
+    * Before writing either of the params or pull_params arrays, we first
+    * write out the addresses of the ParameterValues and UniformDataSlots
+    * storage. The pointers within params will be pointers to within one of
+    * these blocks of storage. So we can use the addresses of this storage
+    * together with the pointer values to correctly construct pointers to the
+    * actual storage when the program data is loaded from the cache.
+    */
+   blob_write_intptr(binary,
+                     (intptr_t) prog->_LinkedShaders[MESA_SHADER_VERTEX]->
+                      Program->Parameters->ParameterValues);
+   blob_write_intptr(binary, (intptr_t) prog->UniformDataSlots);
+
+   nr_params = brw->vs.prog_data->base.base.nr_params;
+   blob_write_uint32(binary, nr_params);
+
+   for (i = 0; i < nr_params; i++) {
+      blob_write_intptr(binary,
+                        (intptr_t) brw->vs.prog_data->base.base.param[i]);
+   }
+
+   nr_pull_params = brw->vs.prog_data->base.base.nr_pull_params;
+   blob_write_uint32(binary, nr_pull_params);
+
+   for (i = 0; i < nr_pull_params; i++) {
+      blob_write_intptr(binary,
+                        (intptr_t) brw->vs.prog_data->base.base.pull_param[i]);
+   }
+   
+   /* Write WM program to blob. */
+   wm_program_size = brw->wm.prog_data->program_size;
+
+   blob_write_uint32(binary, wm_program_size);
+
+   blob_cursor = blob_reserve_bytes(binary, wm_program_size);
+   drm_intel_bo_get_subdata(brw->cache.bo, brw->wm.base.prog_offset,
+                            wm_program_size, blob_cursor);
+
+   /* Write WM program_data to blob. */
+   blob_write_uint32(binary, sizeof *brw->wm.prog_data);
+   blob_write_bytes(binary, brw->wm.prog_data, sizeof *brw->wm.prog_data);
+
+   /* Include variable-length params, (don't need to rewrite UniformDataSlots
+    * pointer).
+    */
+   blob_write_intptr(binary,
+                     (intptr_t) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->
+                     Program->Parameters->ParameterValues);
+
+   nr_params = brw->wm.prog_data->base.nr_params;
+   blob_write_uint32(binary, nr_params);
+
+   for (i = 0; i < nr_params; i++) {
+      blob_write_intptr(binary,
+                        (intptr_t) brw->wm.prog_data->base.param[i]);
+   }
+
+   nr_pull_params = brw->wm.prog_data->base.nr_pull_params;
+   blob_write_uint32(binary, nr_pull_params);
+
+   for (i = 0; i < nr_pull_params; i++) {
+      blob_write_intptr(binary,
+                        (intptr_t) brw->wm.prog_data->base.pull_param[i]);
+   }
+
+   printf("putting binary in cache: %s\n",
+          _mesa_sha1_format(buf, brw->binary_sha1));
+
+   cache_put(cache, brw->binary_sha1, binary->data, binary->size);
+
+   ralloc_free (binary);
+
+   prog->program_written_to_cache = true;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 071d3ca..0864d0f 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -203,6 +203,13 @@ void gen8_write_pma_stall_bits(struct brw_context *brw,
 /* gen8_misc_state.c */
 void gen8_upload_state_base_address(struct brw_context *brw);
 
+/* brw_shader_cache.h */
+void
+upload_cached_program(struct brw_context *brw);
+
+void
+write_cached_program(struct brw_context *brw);
+
 /***********************************************************************
  * brw_state.c
  */
-- 
2.5.5



More information about the mesa-dev mailing list