[Mesa-dev] [PATCH v2 18/32] i965: add initial implementation of on disk shader cache
Jason Ekstrand
jason at jlekstrand.net
Fri Oct 20 23:36:21 UTC 2017
I've got a meta-comment and then a couple more inline comments below before
I sign-off for the week-end.
Most of my comments on this patch have focussed towards two goals:
1) As close to perfect paridy between read_program_data and
write_program_data as we can manage. They should be almost identical
except for the direction the data flows.
2) Reduced redundancy. There are several places where we pass values or
read/write values that are already found in brw_stage_prog_data. Let's
just use the ones in brw_stage_prog_data. If we can't trust that, then
we're toast anyway.
Hopefully that will make the rest of my comments a bit more understandable.
On Fri, Oct 20, 2017 at 4:30 PM, Jason Ekstrand <jason at jlekstrand.net>
wrote:
> On Fri, Oct 20, 2017 at 4:05 PM, Jason Ekstrand <jason at jlekstrand.net>
> wrote:
>
>> On Wed, Oct 18, 2017 at 10:32 PM, Jordan Justen <
>> jordan.l.justen at intel.com> wrote:
>>
>>> From: Timothy Arceri <timothy.arceri at collabora.com>
>>>
>>> This uses the recently-added disk_cache.c to write out the final
>>> linked binary for vertex and fragment shader programs.
>>>
>>> This is based off the initial implementation done by Carl Worth.
>>>
>>> v2:
>>> * Squash 'i965: add image param shader cache support'
>>> * Squash 'i965: add shader cache support for pull param pointers'
>>> * Sustantially simplified by a rework on top of Jason's 2975e4c56a7a.
>>> * Rename load_program_data to read_program_data. (Jason)
>>>
>>> [jordan.l.justen at intel.com: *_cached_program =>
>>> brw_disk_cache_*_program]
>>> [jordan.l.justen at intel.com: brw_shader_cache.c => brw_disk_cache.c]
>>> [jordan.l.justen at intel.com: don't map to write program when LLC is
>>> present]
>>> [jordan.l.justen at intel.com: set program_written_to_cache on read from
>>> cache]
>>> [jordan.l.justen at intel.com: only try cache when status is
>>> linking_skipped]
>>> [jordan.l.justen at intel.com: rework based on uniforms rework
>>> 2975e4c56a7a]
>>> Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
>>> ---
>>> src/mesa/drivers/dri/i965/Makefile.sources | 1 +
>>> src/mesa/drivers/dri/i965/brw_disk_cache.c | 357
>>> +++++++++++++++++++++++++++++
>>> src/mesa/drivers/dri/i965/brw_state.h | 5 +
>>> src/mesa/drivers/dri/i965/meson.build | 1 +
>>> 4 files changed, 364 insertions(+)
>>> create mode 100644 src/mesa/drivers/dri/i965/brw_disk_cache.c
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources
>>> b/src/mesa/drivers/dri/i965/Makefile.sources
>>> index 053d89b81e..2980cdb3c5 100644
>>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>>> @@ -14,6 +14,7 @@ i965_FILES = \
>>> brw_cs.h \
>>> brw_curbe.c \
>>> brw_defines.h \
>>> + brw_disk_cache.c \
>>> brw_draw.c \
>>> brw_draw.h \
>>> brw_draw_upload.c \
>>> diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c
>>> b/src/mesa/drivers/dri/i965/brw_disk_cache.c
>>> new file mode 100644
>>> index 0000000000..6fe39a7997
>>> --- /dev/null
>>> +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
>>> @@ -0,0 +1,357 @@
>>> +/*
>>> + * Copyright © 2014 Intel Corporation
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person
>>> obtaining a
>>> + * copy of this software and associated documentation files (the
>>> "Software"),
>>> + * to deal in the Software without restriction, including without
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice (including the
>>> next
>>> + * paragraph) shall be included in all copies or substantial portions
>>> of the
>>> + * Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
>>> SHALL
>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>>> OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>>> ARISING
>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>>> DEALINGS
>>> + * IN THE SOFTWARE.
>>> + */
>>> +
>>> +#include "compiler/blob.h"
>>> +#include "compiler/glsl/ir_uniform.h"
>>> +#include "compiler/glsl/shader_cache.h"
>>> +#include "main/mtypes.h"
>>> +#include "util/disk_cache.h"
>>> +#include "util/macros.h"
>>> +#include "util/mesa-sha1.h"
>>> +
>>> +#include "brw_context.h"
>>> +#include "brw_state.h"
>>> +#include "brw_vs.h"
>>> +#include "brw_wm.h"
>>> +
>>> +static size_t
>>> +key_size(gl_shader_stage stage)
>>> +{
>>> + switch (stage) {
>>> + case MESA_SHADER_VERTEX:
>>> + return sizeof(struct brw_vs_prog_key);
>>> + case MESA_SHADER_TESS_CTRL:
>>> + return sizeof(struct brw_tcs_prog_key);
>>> + case MESA_SHADER_TESS_EVAL:
>>> + return sizeof(struct brw_tes_prog_key);
>>> + case MESA_SHADER_GEOMETRY:
>>> + return sizeof(struct brw_gs_prog_key);
>>> + case MESA_SHADER_FRAGMENT:
>>> + return sizeof(struct brw_wm_prog_key);
>>> + case MESA_SHADER_COMPUTE:
>>> + return sizeof(struct brw_cs_prog_key);
>>> + default:
>>> + unreachable("Unsupported stage!");
>>> + }
>>> +}
>>>
>>
>> It might be worth putting this in brw_program.h and maybe adding one for
>> prog_data as well.
>>
>>
>>> +
>>> +static void
>>> +gen_shader_sha1(struct brw_context *brw, struct gl_program *prog,
>>> + gl_shader_stage stage, void *key, unsigned char
>>> *out_sha1)
>>> +{
>>> + char sha1_buf[41];
>>> + unsigned char sha1[20];
>>> + char manifest[256];
>>> + int offset = 0;
>>> +
>>> + _mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
>>> + offset += snprintf(manifest, sizeof(manifest), "program: %s\n",
>>> sha1_buf);
>>> +
>>> + _mesa_sha1_compute(key, key_size(stage), sha1);
>>> + _mesa_sha1_format(sha1_buf, sha1);
>>> + offset += snprintf(manifest + offset, sizeof(manifest) - offset,
>>> + "%s_key: %s\n", _mesa_shader_stage_to_abbrev(s
>>> tage),
>>> + sha1_buf);
>>> +
>>> + _mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
>>> +}
>>> +
>>> +static void
>>> +read_program_data(struct gl_program *glprog, struct blob_reader *binary,
>>> + struct brw_stage_prog_data *prog_data,
>>> + struct brw_stage_state *stage_state, struct
>>> gl_context *ctx)
>>>
>>
stage_state is never used in this function, we can get rid of the parameter
>
>> With the simplifications below, this becomes a very short function. Can
>> we either roll it into read_and_upload or pull the rest of the prog_data
>> read code into this function? I'd really like it if read_program_data and
>> write_program_data were almost identical.
>>
>>
>>> +{
>>> + uint32_t nr_params = blob_read_uint32(binary);
>>> + assert(nr_params == prog_data->nr_params);
>>>
>>
>> If we trust prog_data (which we had better be able to do!) this is
>> unnecessary. Same with nr_pull_params.
>>
>>
>>> + assert(!binary->overrun);
>>> +
>>> + prog_data->param = rzalloc_array(NULL, uint32_t, nr_params);
>>> +
>>> + uint32_t nr_image_params = blob_read_uint32(binary);
>>> + assert(nr_image_params == glprog->info.num_images);
>>>
>>
>> I don't think we need this anymore. Let's just drop nr_image_params from
>> read/write_program_data
>>
>>
>>> + assert(!binary->overrun);
>>> +
>>> + for (unsigned i = 0; i < nr_params; i++)
>>> + prog_data->param[i] = blob_read_uint32(binary);
>>> +
>>> + uint32_t nr_pull_params = blob_read_uint32(binary);
>>> + assert(nr_pull_params == prog_data->nr_pull_params);
>>> +
>>> + prog_data->pull_param = rzalloc_array(NULL, uint32_t,
>>> nr_pull_params);
>>> +
>>> + for (unsigned i = 0; i < nr_pull_params; i++)
>>> + prog_data->pull_param[i] = blob_read_uint32(binary);
>>> +
>>> + assert(!binary->overrun);
>>> +}
>>> +
>>> +#define SET_UPLOAD_PRAMS(sh, sh_caps, prog) \
>>> + assert(prog_data_size == sizeof(struct brw_##sh##_prog_data)); \
>>> + sh##_key.program_string_id = prog->id; \
>>> + cache_id = BRW_CACHE_##sh_caps##_PROG; \
>>> + key = &sh##_key; \
>>> + max_threads = devinfo->max_##sh##_threads; \
>>> + stage_state = &brw->sh.base; \
>>> +
>>> +static bool
>>> +read_and_upload(struct brw_context *brw, struct disk_cache *cache,
>>> + struct blob_reader *binary, struct gl_program *prog,
>>> + gl_shader_stage stage)
>>> +{
>>> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
>>> +
>>> + unsigned char binary_sha1[20];
>>> +
>>> + struct brw_wm_prog_key wm_key;
>>> + struct brw_vs_prog_key vs_key;
>>> +
>>> + switch (stage) {
>>> + case MESA_SHADER_VERTEX:
>>> + brw_vs_populate_key(brw, &vs_key);
>>> + /* We don't care what instance of the program it is we only care
>>> if
>>> + * its the correct binary to load so ignore program id for on
>>> disk cache.
>>> + */
>>> + vs_key.program_string_id = 0;
>>> + gen_shader_sha1(brw, prog, stage, &vs_key, binary_sha1);
>>> + break;
>>> + case MESA_SHADER_FRAGMENT:
>>> + brw_wm_populate_key(brw, &wm_key);
>>> + wm_key.program_string_id = 0;
>>> + gen_shader_sha1(brw, prog, stage, &wm_key, binary_sha1);
>>> + break;
>>> + default:
>>> + unreachable("Unsupported stage!");
>>> + }
>>> +
>>> + size_t size;
>>> + uint8_t *buffer = disk_cache_get(cache, binary_sha1, &size);
>>> + if (buffer == NULL) {
>>> + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>>> + char sha1_buf[41];
>>> + _mesa_sha1_format(sha1_buf, binary_sha1);
>>> + fprintf(stderr, "No cached %s binary found for: %s\n",
>>> + _mesa_shader_stage_to_abbrev(stage), sha1_buf);
>>> + }
>>> + return false;
>>> + }
>>> +
>>> + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>>> + char sha1_buf[41];
>>> + _mesa_sha1_format(sha1_buf, binary_sha1);
>>> + fprintf(stderr, "attempting to populate bo cache with binary:
>>> %s\n",
>>> + sha1_buf);
>>> + }
>>> +
>>> + blob_reader_init(binary, buffer, size);
>>> +
>>> + /* Read shader program from blob. */
>>> + size_t program_size = blob_read_uint32(binary);
>>> + const uint8_t *program = blob_read_bytes(binary, program_size);
>>> +
>>> + /* Read shader program_data from blob. */
>>> + size_t prog_data_size = blob_read_uint32(binary);
>>>
>>
>> assert(prog_data_size == brw_prog_data_size(stage));
>>
>>
>>> + assert(!binary->overrun);
>>> + const void *blob_prog_data = blob_read_bytes(binary, prog_data_size);
>>> + // TODO: fix mem_ctx
>>> + struct brw_stage_prog_data *prog_data =
>>> + ralloc_size(NULL, prog_data_size);
>>> + memcpy(prog_data, blob_prog_data, prog_data_size);
>>>
>>
>> You can just use blob_copy_bytes instead of this.
>>
>>
>>> +
>>> + /* Upload params set by SET_UPLOAD_PRAMS() */
>>> + struct brw_stage_state *stage_state;
>>> + enum brw_cache_id cache_id;
>>> + unsigned max_threads;
>>> + void *key;
>>> +
>>> + switch (stage) {
>>> + case MESA_SHADER_VERTEX: {
>>> + struct brw_program *vp = (struct brw_program *) prog;
>>> + SET_UPLOAD_PRAMS(vs, VS, vp)
>>> + break;
>>> + }
>>> + case MESA_SHADER_FRAGMENT: {
>>> + struct brw_program *wp = (struct brw_program *) prog;
>>> + SET_UPLOAD_PRAMS(wm, FS, wp)
>>> + break;
>>> + }
>>> + default:
>>> + unreachable("Unsupported stage!");
>>> + }
>>> +
>>> + read_program_data(prog, binary, prog_data, stage_state, &brw->ctx);
>>>
>>
Which means there's no reason we need to call read_program_data *after* the
above switch and we can move it up and just roll the other prog_data
reading stuff into it.
> +
>>> + if (binary->current != binary->end || binary->overrun) {
>>> + /* Something very bad has gone wrong discard the item from the
>>> cache and
>>> + * rebuild from source.
>>> + */
>>> + assert(!"Invalid i965 shader disk cache item!");
>>> +
>>> + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>>> + fprintf(stderr, "Error reading program from cache (invalid
>>> i965 "
>>> + "cache item)\n");
>>> + }
>>> +
>>> + disk_cache_remove(cache, binary_sha1);
>>> + free(buffer);
>>> + return false;
>>> + }
>>> +
>>> + brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch,
>>> + max_threads);
>>> +
>>> + brw_upload_cache(&brw->cache, cache_id, key, key_size(stage),
>>> program,
>>> + program_size, prog_data, prog_data_size,
>>> + &stage_state->prog_offset, &stage_state->prog_data);
>>> +
>>> + prog->program_written_to_cache = true;
>>> +
>>> + free(buffer);
>>> +
>>> + return true;
>>> +}
>>> +
>>> +bool
>>> +brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage
>>> stage)
>>> +{
>>> + struct blob_reader binary;
>>> +
>>> + struct disk_cache *cache = brw->ctx.Cache;
>>> + if (cache == NULL)
>>> + return false;
>>> +
>>> + struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage];
>>> + if (prog == NULL)
>>> + return false;
>>> +
>>> + if (prog->sh.data->LinkStatus != linking_skipped)
>>> + goto FAIL;
>>> +
>>> + if (!read_and_upload(brw, cache, &binary, prog, stage))
>>> + goto FAIL;
>>> +
>>> + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>>> + fprintf(stderr, "read gen program from cache\n");
>>> + }
>>> +
>>> + return true;
>>> +
>>> +FAIL:
>>> + /*FIXME: Fall back and compile from source here. */
>>> + return false;
>>> +}
>>> +
>>> +static void
>>> +write_program_data(struct brw_context *brw, struct gl_program *prog,
>>> + void *key, struct brw_stage_prog_data *prog_data,
>>> + size_t program_size, size_t prog_data_size,
>>> + uint32_t prog_offset, struct disk_cache *cache,
>>> + gl_shader_stage stage)
>>> +{
>>> + unsigned char sha1[20];
>>> + char buf[41];
>>> +
>>> + struct blob binary;
>>> + blob_init(&binary);
>>> +
>>> + gen_shader_sha1(brw, prog, stage, key, sha1);
>>> +
>>> + /* Write program to blob. */
>>> + blob_write_uint32(&binary, program_size);
>>> +
>>> + size_t blob_offset = blob_reserve_bytes(&binary, program_size);
>>> + uint8_t *blob_cursor = binary.data + blob_offset;
>>> +
>>> + /* Copy program binary */
>>> + if (brw->screen->devinfo.has_llc) {
>>> + memcpy(blob_cursor, brw->cache.map + prog_offset, program_size);
>>>
>>
>> blob_overwrite_bytes
>>
>>
>>> + } else {
>>> + void *map = brw_bo_map(brw, brw->cache.bo, MAP_READ);
>>> + if (unlikely(!map)) {
>>> + _mesa_error_no_memory(__func__);
>>> + return;
>>> + }
>>> + memcpy(blob_cursor, map + prog_offset, program_size);
>>>
>>
>> blob_overwrite_bytes
>>
>>
>>> + brw_bo_unmap(brw->cache.bo);
>>> + }
>>> +
>>> + /* Write program_data to blob. */
>>> + blob_write_uint32(&binary, prog_data_size);
>>> + blob_write_bytes(&binary, prog_data, prog_data_size);
>>> +
>>> + blob_write_uint32(&binary, prog_data->nr_params);
>>> + blob_write_uint32(&binary, prog->info.num_images);
>>> +
>>> + for (unsigned i = 0; i < prog_data->nr_params; i++) {
>>> + blob_write_uint32(&binary, prog_data->param[i]);
>>>
>>
>> If we're concerned about the efficiency of this code,
>> blob_read/write_bytes might be better here. I don't know that we care that
>> much though.
>>
>>
>>> + }
>>> +
>>> + blob_write_uint32(&binary, prog_data->nr_pull_params);
>>> + for (unsigned i = 0; i < prog_data->nr_pull_params; i++) {
>>> + blob_write_uint32(&binary, prog_data->pull_param[i]);
>>> + }
>>> +
>>> + _mesa_sha1_format(buf, sha1);
>>> + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>>> + fprintf(stderr, "putting binary in cache: %s\n", buf);
>>> + }
>>> +
>>> + disk_cache_put(cache, sha1, binary.data, binary.size, NULL);
>>> +
>>> + prog->program_written_to_cache = true;
>>> + blob_finish(&binary);
>>> +}
>>> +
>>> +void
>>> +brw_disk_cache_write_program(struct brw_context *brw)
>>> +{
>>> + struct disk_cache *cache = brw->ctx.Cache;
>>> + if (cache == NULL)
>>> + return;
>>> +
>>> + struct gl_program *prog =
>>> + brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX];
>>> + if (prog && !prog->program_written_to_cache) {
>>> + struct brw_vs_prog_key vs_key;
>>> + brw_vs_populate_key(brw, &vs_key);
>>> + vs_key.program_string_id = 0;
>>> +
>>> + write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data,
>>> + brw->vs.base.prog_data->program_size,
>>>
>>
> We don't need to pass this in explicitly
>
> + sizeof(struct brw_vs_prog_data),
>>> + brw->vs.base.prog_offset, cache,
>>> + MESA_SHADER_VERTEX);
>>> + }
>>> +
>>> + prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
>>> + if (prog && !prog->program_written_to_cache) {
>>> + struct brw_wm_prog_key wm_key;
>>> + brw_wm_populate_key(brw, &wm_key);
>>> + wm_key.program_string_id = 0;
>>> +
>>> + write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data,
>>> + brw->wm.base.prog_data->program_size,
>>>
>>
> Or here
>
>
>> + sizeof(struct brw_wm_prog_data),
>>> + brw->wm.base.prog_offset, cache,
>>> + MESA_SHADER_FRAGMENT);
>>> + }
>>> +}
>>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h
>>> b/src/mesa/drivers/dri/i965/brw_state.h
>>> index 8db354cf23..6f2e0501b4 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>>> @@ -131,6 +131,11 @@ void brw_upload_state_base_address(struct
>>> brw_context *brw);
>>> void gen8_write_pma_stall_bits(struct brw_context *brw,
>>> uint32_t pma_stall_bits);
>>>
>>> +/* brw_disk_cache.c */
>>> +bool brw_disk_cache_upload_program(struct brw_context *brw,
>>> + gl_shader_stage stage);
>>> +void brw_disk_cache_write_program(struct brw_context *brw);
>>> +
>>> /**********************************************************
>>> *************
>>> * brw_state.c
>>> */
>>> diff --git a/src/mesa/drivers/dri/i965/meson.build
>>> b/src/mesa/drivers/dri/i965/meson.build
>>> index 144a254bd6..09e1179adc 100644
>>> --- a/src/mesa/drivers/dri/i965/meson.build
>>> +++ b/src/mesa/drivers/dri/i965/meson.build
>>> @@ -34,6 +34,7 @@ files_i965 = files(
>>> 'brw_cs.h',
>>> 'brw_curbe.c',
>>> 'brw_defines.h',
>>> + 'brw_disk_cache.c',
>>> 'brw_draw.c',
>>> 'brw_draw.h',
>>> 'brw_draw_upload.c',
>>> --
>>> 2.15.0.rc0
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171020/4a4b68d1/attachment-0001.html>
More information about the mesa-dev
mailing list