[Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory

Nicolai Hähnle nhaehnle at gmail.com
Tue Feb 16 17:02:00 UTC 2016


On 15.02.2016 18:59, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
>   src/gallium/drivers/radeonsi/si_pipe.c          |   5 +-
>   src/gallium/drivers/radeonsi/si_pipe.h          |  16 ++
>   src/gallium/drivers/radeonsi/si_shader.h        |   4 +-
>   src/gallium/drivers/radeonsi/si_state.h         |   2 +
>   src/gallium/drivers/radeonsi/si_state_shaders.c | 234 +++++++++++++++++++++++-
>   5 files changed, 254 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 75d4775..a576237 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
>   		}
>   	}
>   	pipe_mutex_destroy(sscreen->shader_parts_mutex);
> -
> +	si_destroy_shader_cache(sscreen);
>   	r600_destroy_common_screen(&sscreen->b);
>   }
>
> @@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
>   	sscreen->b.b.resource_create = r600_resource_create_common;
>
>   	if (!r600_common_screen_init(&sscreen->b, ws) ||
> -	    !si_init_gs_info(sscreen)) {
> +	    !si_init_gs_info(sscreen) ||
> +	    !si_init_shader_cache(sscreen)) {
>   		FREE(sscreen);
>   		return NULL;
>   	}
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index 1ac7bc4..ef860a5 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -80,6 +80,7 @@
>   #define SI_MAX_BORDER_COLORS	4096
>
>   struct si_compute;
> +struct hash_table;
>
>   struct si_screen {
>   	struct r600_common_screen	b;
> @@ -94,6 +95,21 @@ struct si_screen {
>   	struct si_shader_part		*tcs_epilogs;
>   	struct si_shader_part		*ps_prologs;
>   	struct si_shader_part		*ps_epilogs;
> +
> +	/* Shader cache in memory.
> +	 *
> +	 * Design & limitations:
> +	 * - The shader cache is per screen (= per process), never saved to
> +	 *   disk, and skips redundant shader compilations from TGSI to bytecode.
> +	 * - It can only be used with one-variant-per-shader support, in which
> +	 *   case only the main (typically middle) part of shaders is cached.
> +	 * - Only VS, TCS, TES, PS are cached, out of which only the hw VS
> +	 *   variants of VS and TES are cached, so LS and ES aren't.
> +	 * - GS and CS aren't cached, but it's certainly possible to cache
> +	 *   those as well.
> +	 */
> +	pipe_mutex			shader_cache_mutex;
> +	struct hash_table		*shader_cache;
>   };
>
>   struct si_blend_color {
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 48e048d..7e46871 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -362,8 +362,10 @@ struct si_shader {
>   	struct r600_resource		*bo;
>   	struct r600_resource		*scratch_bo;
>   	union si_shader_key		key;
> -	struct radeon_shader_binary	binary;
>   	bool				is_binary_shared;
> +
> +	/* The following data is all that's needed for binary shaders. */
> +	struct radeon_shader_binary	binary;
>   	struct si_shader_config		config;
>   	struct si_shader_info		info;
>   };
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index f64c4d4..40792cb 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
>   /* si_state_shader.c */
>   bool si_update_shaders(struct si_context *sctx);
>   void si_init_shader_functions(struct si_context *sctx);
> +bool si_init_shader_cache(struct si_screen *sscreen);
> +void si_destroy_shader_cache(struct si_screen *sscreen);
>
>   /* si_state_draw.c */
>   void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index c62cbb7..bc3e5be 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -32,10 +32,217 @@
>
>   #include "tgsi/tgsi_parse.h"
>   #include "tgsi/tgsi_ureg.h"
> +#include "util/hash_table.h"
> +#include "util/u_hash.h"
>   #include "util/u_memory.h"
>   #include "util/u_prim.h"
>   #include "util/u_simple_shaders.h"
>
> +/* SHADER_CACHE */
> +
> +/**
> + * Return the TGSI binary in a buffer. The first 4 bytes contain its size as
> + * integer.
> + */
> +static void *si_get_tgsi_binary(struct si_shader_selector *sel)
> +{
> +	unsigned tgsi_size = tgsi_num_tokens(sel->tokens) *
> +			     sizeof(struct tgsi_token);
> +	unsigned size = 4 + tgsi_size + sizeof(sel->so);
> +	char *result = (char*)MALLOC(size);
> +
> +	if (!result)
> +		return NULL;
> +
> +	*((uint32_t*)result) = size;
> +	memcpy(result + 4, sel->tokens, tgsi_size);
> +	memcpy(result + 4 + tgsi_size, &sel->so, sizeof(sel->so));
> +	return result;
> +}
> +
> +/** Copy "data" to "ptr" and return the next dword following copied data. */
> +static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned size)
> +{
> +	memcpy(ptr, data, size);
> +	ptr += DIV_ROUND_UP(size, 4);
> +	return ptr;
> +}
> +
> +/** Read data from "ptr". Return the next dword following the data. */
> +static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size)
> +{
> +	memcpy(data, ptr, size);
> +	ptr += DIV_ROUND_UP(size, 4);
> +	return ptr;
> +}
> +
> +/**
> + * Write the size as uint followed by the data. Return the next dword
> + * following the copied data.
> + */
> +static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned size)
> +{
> +	*ptr++ = size;
> +	return write_data(ptr, data, size);
> +}
> +
> +/**
> + * Read the size as uint followed by the data. Return both via parameters.
> + * Return the next dword following the data.
> + */
> +static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
> +{
> +	*size = *ptr++;
> +	assert(*data == NULL);
> +	*data = malloc(*size);
> +	return read_data(ptr, *data, *size);
> +}
> +
> +/**
> + * Return the shader binary in a buffer. The first 4 bytes contain its size
> + * as integer.
> + */
> +static void *si_get_shader_binary(struct si_shader *shader)
> +{
> +	/* There is always a size of data followed by the data itself. */
> +	unsigned relocs_size = shader->binary.reloc_count *
> +			       sizeof(shader->binary.relocs[0]);
> +	unsigned disasm_size = strlen(shader->binary.disasm_string) + 1;
> +	unsigned size =
> +		4 + /* total size */
> +		4 + /* CRC32 of the data below */
> +		align(sizeof(shader->config), 4) +
> +		align(sizeof(shader->info), 4) +
> +		4 + align(shader->binary.code_size, 4) +
> +		4 + align(shader->binary.rodata_size, 4) +
> +		4 + align(relocs_size, 4) +
> +		4 + align(disasm_size, 4);
> +	void *buffer = CALLOC(1, size);
> +	uint32_t *ptr = (uint32_t*)buffer;
> +
> +	if (!buffer)
> +		return NULL;
> +
> +	*ptr++ = size;
> +	ptr++; /* CRC32 is calculated at the end. */
> +
> +	ptr = write_data(ptr, &shader->config, sizeof(shader->config));
> +	ptr = write_data(ptr, &shader->info, sizeof(shader->info));
> +	ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size);
> +	ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size);
> +	ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
> +	ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);

Suggestion: assert((char *)ptr - (char *)buffer == size);

> +
> +	/* Compute CRC32. */
> +	ptr = (uint32_t*)buffer;
> +	ptr++;
> +	*ptr = util_hash_crc32(ptr + 1, size - 8);
> +
> +	return buffer;
> +}
> +
> +static bool si_load_shader_binary(struct si_shader *shader, void *binary)
> +{
> +	uint32_t *ptr = (uint32_t*)binary;
> +	uint32_t size = *ptr++;
> +	uint32_t crc32 = *ptr++;
> +	unsigned chunk_size;
> +
> +	if (util_hash_crc32(ptr, size - 8) != crc32) {
> +		fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n");
> +		return false;
> +	}
> +
> +	ptr = read_data(ptr, &shader->config, sizeof(shader->config));
> +	ptr = read_data(ptr, &shader->info, sizeof(shader->info));
> +	ptr = read_chunk(ptr, (void**)&shader->binary.code,
> +			 &shader->binary.code_size);
> +	ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
> +			 &shader->binary.rodata_size);
> +	ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size);
> +	shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]);
> +	ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size);
> +
> +	return true;
> +}
> +
> +/**
> + * Insert a shader into the cache. It's assumed the shader is not in the cache.
> + * Use si_shader_cache_load_shader before calling this.
> + *
> + * Returns true if the tgsi_binary should be deleted after this.
> + */
> +static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
> +					  void *tgsi_binary,
> +					  struct si_shader *shader)
> +{
> +	struct hash_entry *entry;
> +	void *hw_binary = si_get_shader_binary(shader);
> +
> +	if (!hw_binary)
> +		return true;
> +
> +	entry = _mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary,
> +					hw_binary);
> +	return entry->key != tgsi_binary;

_mesa_hash_table_insert can return NULL, indicating that it didn't 
insert anything.

If it is successful, entry->key == tgsi_binary will always hold, so the 
second check is unnecessary.

Nicolai

> +}
> +
> +static bool si_shader_cache_load_shader(struct si_screen *sscreen,
> +					void *tgsi_binary,
> +				        struct si_shader *shader)
> +{
> +	struct hash_entry *entry =
> +		_mesa_hash_table_search(sscreen->shader_cache, tgsi_binary);
> +	if (!entry)
> +		return false;
> +
> +	return si_load_shader_binary(shader, entry->data);
> +}
> +
> +static uint32_t si_shader_cache_key_hash(const void *key)
> +{
> +	/* The first dword is the key size. */
> +	return util_hash_crc32(key, *(uint32_t*)key);
> +}
> +
> +static bool si_shader_cache_key_equals(const void *a, const void *b)
> +{
> +	uint32_t *keya = (uint32_t*)a;
> +	uint32_t *keyb = (uint32_t*)b;
> +
> +	/* The first dword is the key size. */
> +	if (*keya != *keyb)
> +		return false;
> +
> +	return memcmp(keya, keyb, *keya) == 0;
> +}
> +
> +static void si_destroy_shader_cache_entry(struct hash_entry *entry)
> +{
> +	FREE((void*)entry->key);
> +	FREE(entry->data);
> +}
> +
> +bool si_init_shader_cache(struct si_screen *sscreen)
> +{
> +	pipe_mutex_init(sscreen->shader_cache_mutex);
> +	sscreen->shader_cache =
> +		_mesa_hash_table_create(NULL,
> +					si_shader_cache_key_hash,
> +					si_shader_cache_key_equals);
> +	return sscreen->shader_cache != NULL;
> +}
> +
> +void si_destroy_shader_cache(struct si_screen *sscreen)
> +{
> +	if (sscreen->shader_cache)
> +		_mesa_hash_table_destroy(sscreen->shader_cache,
> +					 si_destroy_shader_cache_entry);
> +	pipe_mutex_destroy(sscreen->shader_cache_mutex);
> +}
> +
> +/* SHADER STATES */
> +
>   static void si_set_tesseval_regs(struct si_shader *shader,
>   				 struct si_pm4_state *pm4)
>   {
> @@ -936,17 +1143,36 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
>   	if (sel->type != PIPE_SHADER_GEOMETRY &&
>   	    !sscreen->use_monolithic_shaders) {
>   		struct si_shader *shader = CALLOC_STRUCT(si_shader);
> +		void *tgsi_binary;
>
>   		if (!shader)
>   			goto error;
>
>   		shader->selector = sel;
>
> -		if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false,
> -					   &sctx->b.debug) != 0) {
> -			FREE(shader);
> -			goto error;
> +		tgsi_binary = si_get_tgsi_binary(sel);
> +
> +		/* Try to load the shader from the shader cache. */
> +		pipe_mutex_lock(sscreen->shader_cache_mutex);
> +
> +		if (tgsi_binary &&
> +		    si_shader_cache_load_shader(sscreen, tgsi_binary, shader)) {
> +			FREE(tgsi_binary);
> +		} else {
> +			/* Compile the shader if it hasn't been loaded from the cache. */
> +			if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false,
> +						   &sctx->b.debug) != 0) {
> +				FREE(shader);
> +				FREE(tgsi_binary);
> +				pipe_mutex_unlock(sscreen->shader_cache_mutex);
> +				goto error;
> +			}
> +
> +			if (si_shader_cache_insert_shader(sscreen, tgsi_binary, shader))
> +				FREE(tgsi_binary);
>   		}
> +		pipe_mutex_unlock(sscreen->shader_cache_mutex);
> +
>   		sel->main_shader_part = shader;
>   	}
>
>


More information about the mesa-dev mailing list