[Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory
Nicolai Hähnle
nhaehnle at gmail.com
Fri Feb 19 01:47:31 UTC 2016
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
On 18.02.2016 16:10, Marek Olšák wrote:
> A new version of the patch is attached. Please review.
>
> Marek
>
> On Tue, Feb 16, 2016 at 6:02 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>> On 15.02.2016 18:59, Marek Olšák wrote:
>>>
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> ---
>>> src/gallium/drivers/radeonsi/si_pipe.c | 5 +-
>>> src/gallium/drivers/radeonsi/si_pipe.h | 16 ++
>>> src/gallium/drivers/radeonsi/si_shader.h | 4 +-
>>> src/gallium/drivers/radeonsi/si_state.h | 2 +
>>> src/gallium/drivers/radeonsi/si_state_shaders.c | 234
>>> +++++++++++++++++++++++-
>>> 5 files changed, 254 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>>> b/src/gallium/drivers/radeonsi/si_pipe.c
>>> index 75d4775..a576237 100644
>>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>>> @@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen*
>>> pscreen)
>>> }
>>> }
>>> pipe_mutex_destroy(sscreen->shader_parts_mutex);
>>> -
>>> + si_destroy_shader_cache(sscreen);
>>> r600_destroy_common_screen(&sscreen->b);
>>> }
>>>
>>> @@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct
>>> radeon_winsys *ws)
>>> sscreen->b.b.resource_create = r600_resource_create_common;
>>>
>>> if (!r600_common_screen_init(&sscreen->b, ws) ||
>>> - !si_init_gs_info(sscreen)) {
>>> + !si_init_gs_info(sscreen) ||
>>> + !si_init_shader_cache(sscreen)) {
>>> FREE(sscreen);
>>> return NULL;
>>> }
>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>>> b/src/gallium/drivers/radeonsi/si_pipe.h
>>> index 1ac7bc4..ef860a5 100644
>>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>>> @@ -80,6 +80,7 @@
>>> #define SI_MAX_BORDER_COLORS 4096
>>>
>>> struct si_compute;
>>> +struct hash_table;
>>>
>>> struct si_screen {
>>> struct r600_common_screen b;
>>> @@ -94,6 +95,21 @@ struct si_screen {
>>> struct si_shader_part *tcs_epilogs;
>>> struct si_shader_part *ps_prologs;
>>> struct si_shader_part *ps_epilogs;
>>> +
>>> + /* Shader cache in memory.
>>> + *
>>> + * Design & limitations:
>>> + * - The shader cache is per screen (= per process), never saved
>>> to
>>> + * disk, and skips redundant shader compilations from TGSI to
>>> bytecode.
>>> + * - It can only be used with one-variant-per-shader support, in
>>> which
>>> + * case only the main (typically middle) part of shaders is
>>> cached.
>>> + * - Only VS, TCS, TES, PS are cached, out of which only the hw VS
>>> + * variants of VS and TES are cached, so LS and ES aren't.
>>> + * - GS and CS aren't cached, but it's certainly possible to cache
>>> + * those as well.
>>> + */
>>> + pipe_mutex shader_cache_mutex;
>>> + struct hash_table *shader_cache;
>>> };
>>>
>>> struct si_blend_color {
>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
>>> b/src/gallium/drivers/radeonsi/si_shader.h
>>> index 48e048d..7e46871 100644
>>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>>> @@ -362,8 +362,10 @@ struct si_shader {
>>> struct r600_resource *bo;
>>> struct r600_resource *scratch_bo;
>>> union si_shader_key key;
>>> - struct radeon_shader_binary binary;
>>> bool is_binary_shared;
>>> +
>>> + /* The following data is all that's needed for binary shaders. */
>>> + struct radeon_shader_binary binary;
>>> struct si_shader_config config;
>>> struct si_shader_info info;
>>> };
>>> diff --git a/src/gallium/drivers/radeonsi/si_state.h
>>> b/src/gallium/drivers/radeonsi/si_state.h
>>> index f64c4d4..40792cb 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state.h
>>> +++ b/src/gallium/drivers/radeonsi/si_state.h
>>> @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context
>>> *ctx,
>>> /* si_state_shader.c */
>>> bool si_update_shaders(struct si_context *sctx);
>>> void si_init_shader_functions(struct si_context *sctx);
>>> +bool si_init_shader_cache(struct si_screen *sscreen);
>>> +void si_destroy_shader_cache(struct si_screen *sscreen);
>>>
>>> /* si_state_draw.c */
>>> void si_emit_cache_flush(struct si_context *sctx, struct r600_atom
>>> *atom);
>>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> index c62cbb7..bc3e5be 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> @@ -32,10 +32,217 @@
>>>
>>> #include "tgsi/tgsi_parse.h"
>>> #include "tgsi/tgsi_ureg.h"
>>> +#include "util/hash_table.h"
>>> +#include "util/u_hash.h"
>>> #include "util/u_memory.h"
>>> #include "util/u_prim.h"
>>> #include "util/u_simple_shaders.h"
>>>
>>> +/* SHADER_CACHE */
>>> +
>>> +/**
>>> + * Return the TGSI binary in a buffer. The first 4 bytes contain its size
>>> as
>>> + * integer.
>>> + */
>>> +static void *si_get_tgsi_binary(struct si_shader_selector *sel)
>>> +{
>>> + unsigned tgsi_size = tgsi_num_tokens(sel->tokens) *
>>> + sizeof(struct tgsi_token);
>>> + unsigned size = 4 + tgsi_size + sizeof(sel->so);
>>> + char *result = (char*)MALLOC(size);
>>> +
>>> + if (!result)
>>> + return NULL;
>>> +
>>> + *((uint32_t*)result) = size;
>>> + memcpy(result + 4, sel->tokens, tgsi_size);
>>> + memcpy(result + 4 + tgsi_size, &sel->so, sizeof(sel->so));
>>> + return result;
>>> +}
>>> +
>>> +/** Copy "data" to "ptr" and return the next dword following copied data.
>>> */
>>> +static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned
>>> size)
>>> +{
>>> + memcpy(ptr, data, size);
>>> + ptr += DIV_ROUND_UP(size, 4);
>>> + return ptr;
>>> +}
>>> +
>>> +/** Read data from "ptr". Return the next dword following the data. */
>>> +static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size)
>>> +{
>>> + memcpy(data, ptr, size);
>>> + ptr += DIV_ROUND_UP(size, 4);
>>> + return ptr;
>>> +}
>>> +
>>> +/**
>>> + * Write the size as uint followed by the data. Return the next dword
>>> + * following the copied data.
>>> + */
>>> +static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned
>>> size)
>>> +{
>>> + *ptr++ = size;
>>> + return write_data(ptr, data, size);
>>> +}
>>> +
>>> +/**
>>> + * Read the size as uint followed by the data. Return both via
>>> parameters.
>>> + * Return the next dword following the data.
>>> + */
>>> +static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
>>> +{
>>> + *size = *ptr++;
>>> + assert(*data == NULL);
>>> + *data = malloc(*size);
>>> + return read_data(ptr, *data, *size);
>>> +}
>>> +
>>> +/**
>>> + * Return the shader binary in a buffer. The first 4 bytes contain its
>>> size
>>> + * as integer.
>>> + */
>>> +static void *si_get_shader_binary(struct si_shader *shader)
>>> +{
>>> + /* There is always a size of data followed by the data itself. */
>>> + unsigned relocs_size = shader->binary.reloc_count *
>>> + sizeof(shader->binary.relocs[0]);
>>> + unsigned disasm_size = strlen(shader->binary.disasm_string) + 1;
>>> + unsigned size =
>>> + 4 + /* total size */
>>> + 4 + /* CRC32 of the data below */
>>> + align(sizeof(shader->config), 4) +
>>> + align(sizeof(shader->info), 4) +
>>> + 4 + align(shader->binary.code_size, 4) +
>>> + 4 + align(shader->binary.rodata_size, 4) +
>>> + 4 + align(relocs_size, 4) +
>>> + 4 + align(disasm_size, 4);
>>> + void *buffer = CALLOC(1, size);
>>> + uint32_t *ptr = (uint32_t*)buffer;
>>> +
>>> + if (!buffer)
>>> + return NULL;
>>> +
>>> + *ptr++ = size;
>>> + ptr++; /* CRC32 is calculated at the end. */
>>> +
>>> + ptr = write_data(ptr, &shader->config, sizeof(shader->config));
>>> + ptr = write_data(ptr, &shader->info, sizeof(shader->info));
>>> + ptr = write_chunk(ptr, shader->binary.code,
>>> shader->binary.code_size);
>>> + ptr = write_chunk(ptr, shader->binary.rodata,
>>> shader->binary.rodata_size);
>>> + ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
>>> + ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);
>>
>>
>> Suggestion: assert((char *)ptr - (char *)buffer == size);
>>
>>
>>> +
>>> + /* Compute CRC32. */
>>> + ptr = (uint32_t*)buffer;
>>> + ptr++;
>>> + *ptr = util_hash_crc32(ptr + 1, size - 8);
>>> +
>>> + return buffer;
>>> +}
>>> +
>>> +static bool si_load_shader_binary(struct si_shader *shader, void *binary)
>>> +{
>>> + uint32_t *ptr = (uint32_t*)binary;
>>> + uint32_t size = *ptr++;
>>> + uint32_t crc32 = *ptr++;
>>> + unsigned chunk_size;
>>> +
>>> + if (util_hash_crc32(ptr, size - 8) != crc32) {
>>> + fprintf(stderr, "radeonsi: binary shader has invalid
>>> CRC32\n");
>>> + return false;
>>> + }
>>> +
>>> + ptr = read_data(ptr, &shader->config, sizeof(shader->config));
>>> + ptr = read_data(ptr, &shader->info, sizeof(shader->info));
>>> + ptr = read_chunk(ptr, (void**)&shader->binary.code,
>>> + &shader->binary.code_size);
>>> + ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
>>> + &shader->binary.rodata_size);
>>> + ptr = read_chunk(ptr, (void**)&shader->binary.relocs,
>>> &chunk_size);
>>> + shader->binary.reloc_count = chunk_size /
>>> sizeof(shader->binary.relocs[0]);
>>> + ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string,
>>> &chunk_size);
>>> +
>>> + return true;
>>> +}
>>> +
>>> +/**
>>> + * Insert a shader into the cache. It's assumed the shader is not in the
>>> cache.
>>> + * Use si_shader_cache_load_shader before calling this.
>>> + *
>>> + * Returns true if the tgsi_binary should be deleted after this.
>>> + */
>>> +static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
>>> + void *tgsi_binary,
>>> + struct si_shader *shader)
>>> +{
>>> + struct hash_entry *entry;
>>> + void *hw_binary = si_get_shader_binary(shader);
>>> +
>>> + if (!hw_binary)
>>> + return true;
>>> +
>>> + entry = _mesa_hash_table_insert(sscreen->shader_cache,
>>> tgsi_binary,
>>> + hw_binary);
>>> + return entry->key != tgsi_binary;
>>
>>
>> _mesa_hash_table_insert can return NULL, indicating that it didn't insert
>> anything.
>>
>> If it is successful, entry->key == tgsi_binary will always hold, so the
>> second check is unnecessary.
>>
>> Nicolai
>>
>>
>>> +}
>>> +
>>> +static bool si_shader_cache_load_shader(struct si_screen *sscreen,
>>> + void *tgsi_binary,
>>> + struct si_shader *shader)
>>> +{
>>> + struct hash_entry *entry =
>>> + _mesa_hash_table_search(sscreen->shader_cache,
>>> tgsi_binary);
>>> + if (!entry)
>>> + return false;
>>> +
>>> + return si_load_shader_binary(shader, entry->data);
>>> +}
>>> +
>>> +static uint32_t si_shader_cache_key_hash(const void *key)
>>> +{
>>> + /* The first dword is the key size. */
>>> + return util_hash_crc32(key, *(uint32_t*)key);
>>> +}
>>> +
>>> +static bool si_shader_cache_key_equals(const void *a, const void *b)
>>> +{
>>> + uint32_t *keya = (uint32_t*)a;
>>> + uint32_t *keyb = (uint32_t*)b;
>>> +
>>> + /* The first dword is the key size. */
>>> + if (*keya != *keyb)
>>> + return false;
>>> +
>>> + return memcmp(keya, keyb, *keya) == 0;
>>> +}
>>> +
>>> +static void si_destroy_shader_cache_entry(struct hash_entry *entry)
>>> +{
>>> + FREE((void*)entry->key);
>>> + FREE(entry->data);
>>> +}
>>> +
>>> +bool si_init_shader_cache(struct si_screen *sscreen)
>>> +{
>>> + pipe_mutex_init(sscreen->shader_cache_mutex);
>>> + sscreen->shader_cache =
>>> + _mesa_hash_table_create(NULL,
>>> + si_shader_cache_key_hash,
>>> + si_shader_cache_key_equals);
>>> + return sscreen->shader_cache != NULL;
>>> +}
>>> +
>>> +void si_destroy_shader_cache(struct si_screen *sscreen)
>>> +{
>>> + if (sscreen->shader_cache)
>>> + _mesa_hash_table_destroy(sscreen->shader_cache,
>>> + si_destroy_shader_cache_entry);
>>> + pipe_mutex_destroy(sscreen->shader_cache_mutex);
>>> +}
>>> +
>>> +/* SHADER STATES */
>>> +
>>> static void si_set_tesseval_regs(struct si_shader *shader,
>>> struct si_pm4_state *pm4)
>>> {
>>> @@ -936,17 +1143,36 @@ static void *si_create_shader_selector(struct
>>> pipe_context *ctx,
>>> if (sel->type != PIPE_SHADER_GEOMETRY &&
>>> !sscreen->use_monolithic_shaders) {
>>> struct si_shader *shader = CALLOC_STRUCT(si_shader);
>>> + void *tgsi_binary;
>>>
>>> if (!shader)
>>> goto error;
>>>
>>> shader->selector = sel;
>>>
>>> - if (si_compile_tgsi_shader(sscreen, sctx->tm, shader,
>>> false,
>>> - &sctx->b.debug) != 0) {
>>> - FREE(shader);
>>> - goto error;
>>> + tgsi_binary = si_get_tgsi_binary(sel);
>>> +
>>> + /* Try to load the shader from the shader cache. */
>>> + pipe_mutex_lock(sscreen->shader_cache_mutex);
>>> +
>>> + if (tgsi_binary &&
>>> + si_shader_cache_load_shader(sscreen, tgsi_binary,
>>> shader)) {
>>> + FREE(tgsi_binary);
>>> + } else {
>>> + /* Compile the shader if it hasn't been loaded
>>> from the cache. */
>>> + if (si_compile_tgsi_shader(sscreen, sctx->tm,
>>> shader, false,
>>> + &sctx->b.debug) != 0) {
>>> + FREE(shader);
>>> + FREE(tgsi_binary);
>>> +
>>> pipe_mutex_unlock(sscreen->shader_cache_mutex);
>>> + goto error;
>>> + }
>>> +
>>> + if (si_shader_cache_insert_shader(sscreen,
>>> tgsi_binary, shader))
>>> + FREE(tgsi_binary);
>>> }
>>> + pipe_mutex_unlock(sscreen->shader_cache_mutex);
>>> +
>>> sel->main_shader_part = shader;
>>> }
>>>
>>>
>>
More information about the mesa-dev
mailing list