<div dir="ltr"><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed, May 8, 2019 at 1:52 AM Marek Olšák <<a href="mailto:maraeo@gmail.com">maraeo@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Fri, May 3, 2019 at 7:19 AM Nicolai Hähnle <<a href="mailto:nhaehnle@gmail.com" target="_blank">nhaehnle@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com" target="_blank">nicolai.haehnle@amd.com</a>><br>
<br>
---<br>
src/amd/common/ac_binary.c | 2 +<br>
src/gallium/drivers/radeonsi/si_compute.c | 14 +--<br>
src/gallium/drivers/radeonsi/si_shader.c | 112 +++-------------------<br>
src/gallium/drivers/radeonsi/si_shader.h | 25 +----<br>
4 files changed, 27 insertions(+), 126 deletions(-)<br>
<br>
diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c<br>
index 44251886b5f..d0ca55e0e0d 100644<br>
--- a/src/amd/common/ac_binary.c<br>
+++ b/src/amd/common/ac_binary.c<br>
@@ -218,26 +218,28 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes,<br>
unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i + 4));<br>
switch (reg) {<br>
case R_00B028_SPI_SHADER_PGM_RSRC1_PS:<br>
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:<br>
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:<br>
case R_00B848_COMPUTE_PGM_RSRC1:<br>
case R_00B428_SPI_SHADER_PGM_RSRC1_HS:<br>
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);<br>
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);<br>
conf->float_mode = G_00B028_FLOAT_MODE(value);<br>
+ conf->rsrc1 = value;<br>
break;<br>
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:<br>
conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));<br>
break;<br>
case R_00B84C_COMPUTE_PGM_RSRC2:<br>
conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));<br>
+ conf->rsrc2 = value;<br>
break;<br>
case R_0286CC_SPI_PS_INPUT_ENA:<br>
conf->spi_ps_input_ena = value;<br>
break;<br>
case R_0286D0_SPI_PS_INPUT_ADDR:<br>
conf->spi_ps_input_addr = value;<br>
break;<br>
case R_0286E8_SPI_TMPRING_SIZE:<br>
case R_00B860_COMPUTE_TMPRING_SIZE:<br>
/* WAVESIZE is in units of 256 dwords. */<br>
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c<br>
index 541d7e6f118..02d7bac406a 100644<br>
--- a/src/gallium/drivers/radeonsi/si_compute.c<br>
+++ b/src/gallium/drivers/radeonsi/si_compute.c<br>
@@ -59,21 +59,21 @@ static const amd_kernel_code_t *si_compute_get_code_object(<br>
uint64_t symbol_offset)<br>
{<br>
if (!program->use_code_object_v2) {<br>
return NULL;<br>
}<br>
return (const amd_kernel_code_t*)<br>
(program->shader.binary.code + symbol_offset);<br>
}<br>
<br>
static void code_object_to_config(const amd_kernel_code_t *code_object,<br>
- struct si_shader_config *out_config) {<br>
+ struct ac_shader_config *out_config) {<br>
<br>
uint32_t rsrc1 = code_object->compute_pgm_resource_registers;<br>
uint32_t rsrc2 = code_object->compute_pgm_resource_registers >> 32;<br>
out_config->num_sgprs = code_object->wavefront_sgpr_count;<br>
out_config->num_vgprs = code_object->workitem_vgpr_count;<br>
out_config->float_mode = G_00B028_FLOAT_MODE(rsrc1);<br>
out_config->rsrc1 = rsrc1;<br>
out_config->lds_size = MAX2(out_config->lds_size, G_00B84C_LDS_SIZE(rsrc2));<br>
out_config->rsrc2 = rsrc2;<br>
out_config->scratch_bytes_per_wave =<br>
@@ -241,22 +241,22 @@ static void *si_create_compute_state(<br>
const amd_kernel_code_t *code_object =<br>
si_compute_get_code_object(program, 0);<br>
code_object_to_config(code_object, &program->shader.config);<br>
if (program->shader.binary.reloc_count != 0) {<br>
fprintf(stderr, "Error: %d unsupported relocations\n",<br>
program->shader.binary.reloc_count);<br>
FREE(program);<br>
return NULL;<br>
}<br>
} else {<br>
- si_shader_binary_read_config(&program->shader.binary,<br>
- &program->shader.config, 0);<br>
+ ac_shader_binary_read_config(&program->shader.binary,<br>
+ &program->shader.config, 0, false);<br>
}<br>
si_shader_dump(sctx->screen, &program->shader, &sctx->debug,<br>
PIPE_SHADER_COMPUTE, stderr, true);<br>
if (si_shader_binary_upload(sctx->screen, &program->shader) < 0) {<br>
fprintf(stderr, "LLVM failed to upload shader\n");<br>
FREE(program);<br>
return NULL;<br>
}<br>
}<br>
<br>
@@ -362,21 +362,21 @@ static void si_initialize_compute(struct si_context *sctx)<br>
bc_va >> 8);<br>
}<br>
}<br>
<br>
sctx->cs_shader_state.emitted_program = NULL;<br>
sctx->cs_shader_state.initialized = true;<br>
}<br>
<br>
static bool si_setup_compute_scratch_buffer(struct si_context *sctx,<br>
struct si_shader *shader,<br>
- struct si_shader_config *config)<br>
+ struct ac_shader_config *config)<br>
{<br>
uint64_t scratch_bo_size, scratch_needed;<br>
scratch_bo_size = 0;<br>
scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves;<br>
if (sctx->compute_scratch_buffer)<br>
scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;<br>
<br>
if (scratch_bo_size < scratch_needed) {<br>
si_resource_reference(&sctx->compute_scratch_buffer, NULL);<br>
<br>
@@ -405,38 +405,38 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,<br>
return true;<br>
}<br>
<br>
static bool si_switch_compute_shader(struct si_context *sctx,<br>
struct si_compute *program,<br>
struct si_shader *shader,<br>
const amd_kernel_code_t *code_object,<br>
unsigned offset)<br>
{<br>
struct radeon_cmdbuf *cs = sctx->gfx_cs;<br>
- struct si_shader_config inline_config = {0};<br>
- struct si_shader_config *config;<br>
+ struct ac_shader_config inline_config = {0};<br>
+ struct ac_shader_config *config;<br>
uint64_t shader_va;<br>
<br>
if (sctx->cs_shader_state.emitted_program == program &&<br>
sctx->cs_shader_state.offset == offset)<br>
return true;<br>
<br>
if (program->ir_type != PIPE_SHADER_IR_NATIVE) {<br>
config = &shader->config;<br>
} else {<br>
unsigned lds_blocks;<br>
<br>
config = &inline_config;<br>
if (code_object) {<br>
code_object_to_config(code_object, config);<br>
} else {<br>
- si_shader_binary_read_config(&shader->binary, config, offset);<br>
+ ac_shader_binary_read_config(&shader->binary, config, offset, false);<br>
}<br>
<br>
lds_blocks = config->lds_size;<br>
/* XXX: We are over allocating LDS. For SI, the shader reports<br>
* LDS in blocks of 256 bytes, so if there are 4 bytes lds<br>
* allocated in the shader and 4 bytes allocated by the state<br>
* tracker, then we will set LDS_SIZE to 512 bytes rather than 256.<br>
*/<br>
if (sctx->chip_class <= SI) {<br>
lds_blocks += align(program->local_size, 256) >> 8;<br>
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c<br>
index f6d882cf583..da43447013d 100644<br>
--- a/src/gallium/drivers/radeonsi/si_shader.c<br>
+++ b/src/gallium/drivers/radeonsi/si_shader.c<br>
@@ -4962,104 +4962,20 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,<br>
/* The stipple pattern is 32x32, each row has 32 bits. */<br>
offset = LLVMBuildMul(builder, address[1],<br>
LLVMConstInt(ctx->i32, 4, 0), "");<br>
row = buffer_load_const(ctx, desc, offset);<br>
row = ac_to_integer(&ctx->ac, row);<br>
bit = LLVMBuildLShr(builder, row, address[0], "");<br>
bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");<br>
ac_build_kill_if_false(&ctx->ac, bit);<br>
}<br>
<br>
-void si_shader_binary_read_config(struct ac_shader_binary *binary,<br>
- struct si_shader_config *conf,<br>
- unsigned symbol_offset)<br>
-{<br>
- unsigned i;<br>
- const unsigned char *config =<br>
- ac_shader_binary_config_start(binary, symbol_offset);<br>
- bool really_needs_scratch = false;<br>
-<br>
- /* LLVM adds SGPR spills to the scratch size.<br>
- * Find out if we really need the scratch buffer.<br>
- */<br>
- for (i = 0; i < binary->reloc_count; i++) {<br>
- const struct ac_shader_reloc *reloc = &binary->relocs[i];<br>
-<br>
- if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||<br>
- !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {<br>
- really_needs_scratch = true;<br>
- break;<br>
- }<br>
- }<br>
-<br>
- /* XXX: We may be able to emit some of these values directly rather than<br>
- * extracting fields to be emitted later.<br>
- */<br>
-<br>
- for (i = 0; i < binary->config_size_per_symbol; i+= 8) {<br>
- unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));<br>
- unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));<br>
- switch (reg) {<br>
- case R_00B028_SPI_SHADER_PGM_RSRC1_PS:<br>
- case R_00B128_SPI_SHADER_PGM_RSRC1_VS:<br>
- case R_00B228_SPI_SHADER_PGM_RSRC1_GS:<br>
- case R_00B428_SPI_SHADER_PGM_RSRC1_HS:<br>
- case R_00B848_COMPUTE_PGM_RSRC1:<br>
- conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);<br>
- conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);<br>
- conf->float_mode = G_00B028_FLOAT_MODE(value);<br>
- conf->rsrc1 = value;<br>
- break;<br>
- case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:<br>
- conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));<br>
- break;<br>
- case R_00B84C_COMPUTE_PGM_RSRC2:<br>
- conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));<br>
- conf->rsrc2 = value;<br>
- break;<br>
- case R_0286CC_SPI_PS_INPUT_ENA:<br>
- conf->spi_ps_input_ena = value;<br>
- break;<br>
- case R_0286D0_SPI_PS_INPUT_ADDR:<br>
- conf->spi_ps_input_addr = value;<br>
- break;<br>
- case R_0286E8_SPI_TMPRING_SIZE:<br>
- case R_00B860_COMPUTE_TMPRING_SIZE:<br>
- /* WAVESIZE is in units of 256 dwords. */<br>
- if (really_needs_scratch)<br>
- conf->scratch_bytes_per_wave =<br>
- G_00B860_WAVESIZE(value) * 256 * 4;<br>
- break;<br>
- case 0x4: /* SPILLED_SGPRS */<br>
- conf->spilled_sgprs = value;<br>
- break;<br>
- case 0x8: /* SPILLED_VGPRS */<br>
- conf->spilled_vgprs = value;<br>
- break;<br>
- default:<br>
- {<br>
- static bool printed;<br>
-<br>
- if (!printed) {<br>
- fprintf(stderr, "Warning: LLVM emitted unknown "<br>
- "config register: 0x%x\n", reg);<br>
- printed = true;<br>
- }<br>
- }<br>
- break;<br>
- }<br>
- }<br>
-<br>
- if (!conf->spi_ps_input_addr)<br>
- conf->spi_ps_input_addr = conf->spi_ps_input_ena;<br>
-}<br>
-<br>
void si_shader_apply_scratch_relocs(struct si_shader *shader,<br>
uint64_t scratch_va)<br>
{<br>
unsigned i;<br>
uint32_t scratch_rsrc_dword0 = scratch_va;<br>
uint32_t scratch_rsrc_dword1 =<br>
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);<br>
<br>
/* Enable scratch coalescing. */<br>
scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);<br>
@@ -5213,21 +5129,21 @@ static void si_shader_dump_disassembly(const struct ac_shader_binary *binary,<br>
fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,<br>
binary->code[i + 3], binary->code[i + 2],<br>
binary->code[i + 1], binary->code[i]);<br>
}<br>
}<br>
}<br>
<br>
static void si_calculate_max_simd_waves(struct si_shader *shader)<br>
{<br>
struct si_screen *sscreen = shader->selector->screen;<br>
- struct si_shader_config *conf = &shader->config;<br>
+ struct ac_shader_config *conf = &shader->config;<br>
unsigned num_inputs = shader->selector->info.num_inputs;<br>
unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256;<br>
unsigned lds_per_wave = 0;<br>
unsigned max_simd_waves;<br>
<br>
max_simd_waves = ac_get_max_simd_waves(sscreen->info.family);<br>
<br>
/* Compute LDS usage for PS. */<br>
switch (shader->selector->type) {<br>
case PIPE_SHADER_FRAGMENT:<br>
@@ -5262,46 +5178,46 @@ static void si_calculate_max_simd_waves(struct si_shader *shader)<br>
}<br>
<br>
if (conf->num_vgprs)<br>
max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);<br>
<br>
/* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above<br>
* 16KB makes some SIMDs unoccupied). */<br>
if (lds_per_wave)<br>
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);<br>
<br>
- conf->max_simd_waves = max_simd_waves;<br>
+ shader->max_simd_waves = max_simd_waves;<br>
}<br>
<br>
void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,<br>
struct pipe_debug_callback *debug)<br>
{<br>
- const struct si_shader_config *conf = &shader->config;<br>
+ const struct ac_shader_config *conf = &shader->config;<br>
<br>
pipe_debug_message(debug, SHADER_INFO,<br>
"Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "<br>
"LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d "<br>
"Spilled VGPRs: %d PrivMem VGPRs: %d",<br>
conf->num_sgprs, conf->num_vgprs,<br>
si_get_shader_binary_size(shader),<br>
conf->lds_size, conf->scratch_bytes_per_wave,<br>
- conf->max_simd_waves, conf->spilled_sgprs,<br>
- conf->spilled_vgprs, conf->private_mem_vgprs);<br>
+ shader->max_simd_waves, conf->spilled_sgprs,<br>
+ conf->spilled_vgprs, shader->private_mem_vgprs);<br>
}<br>
<br>
static void si_shader_dump_stats(struct si_screen *sscreen,<br>
const struct si_shader *shader,<br>
unsigned processor,<br>
FILE *file,<br>
bool check_debug_option)<br>
{<br>
- const struct si_shader_config *conf = &shader->config;<br>
+ const struct ac_shader_config *conf = &shader->config;<br>
<br>
if (!check_debug_option ||<br>
si_can_dump_shader(sscreen, processor)) {<br>
if (processor == PIPE_SHADER_FRAGMENT) {<br>
fprintf(file, "*** SHADER CONFIG ***\n"<br>
"SPI_PS_INPUT_ADDR = 0x%04x\n"<br>
"SPI_PS_INPUT_ENA = 0x%04x\n",<br>
conf->spi_ps_input_addr, conf->spi_ps_input_ena);<br>
}<br>
<br>
@@ -5311,24 +5227,24 @@ static void si_shader_dump_stats(struct si_screen *sscreen,<br>
"Spilled SGPRs: %d\n"<br>
"Spilled VGPRs: %d\n"<br>
"Private memory VGPRs: %d\n"<br>
"Code Size: %d bytes\n"<br>
"LDS: %d blocks\n"<br>
"Scratch: %d bytes per wave\n"<br>
"Max Waves: %d\n"<br>
"********************\n\n\n",<br>
conf->num_sgprs, conf->num_vgprs,<br>
conf->spilled_sgprs, conf->spilled_vgprs,<br>
- conf->private_mem_vgprs,<br>
+ shader->private_mem_vgprs,<br>
si_get_shader_binary_size(shader),<br>
conf->lds_size, conf->scratch_bytes_per_wave,<br>
- conf->max_simd_waves);<br>
+ shader->max_simd_waves);<br>
}<br>
}<br>
<br>
const char *si_get_shader_name(const struct si_shader *shader, unsigned processor)<br>
{<br>
switch (processor) {<br>
case PIPE_SHADER_VERTEX:<br>
if (shader->key.as_es)<br>
return "Vertex Shader as ES";<br>
else if (shader->key.as_ls)<br>
@@ -5399,21 +5315,21 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,<br>
debug, "epilog", file);<br>
fprintf(file, "\n");<br>
}<br>
<br>
si_shader_dump_stats(sscreen, shader, processor, file,<br>
check_debug_option);<br>
}<br>
<br>
static int si_compile_llvm(struct si_screen *sscreen,<br>
struct ac_shader_binary *binary,<br>
- struct si_shader_config *conf,<br>
+ struct ac_shader_config *conf,<br>
struct ac_llvm_compiler *compiler,<br>
LLVMModuleRef mod,<br>
struct pipe_debug_callback *debug,<br>
unsigned processor,<br>
const char *name,<br>
bool less_optimized)<br>
{<br>
int r = 0;<br>
unsigned count = p_atomic_inc_return(&sscreen->num_compilations);<br>
<br>
@@ -5433,21 +5349,21 @@ static int si_compile_llvm(struct si_screen *sscreen,<br>
LLVMDisposeMessage(ir);<br>
}<br>
<br>
if (!si_replace_shader(count, binary)) {<br>
r = si_llvm_compile(mod, binary, compiler, debug,<br>
less_optimized);<br>
if (r)<br>
return r;<br>
}<br>
<br>
- si_shader_binary_read_config(binary, conf, 0);<br>
+ ac_shader_binary_read_config(binary, conf, 0, false);<br>
<br>
/* Enable 64-bit and 16-bit denormals, because there is no performance<br>
* cost.<br>
*<br>
* If denormals are enabled, all floating-point output modifiers are<br>
* ignored.<br>
*<br>
* Don't enable denormals for 32-bit floats, because:<br>
* - Floating-point output modifiers would be ignored by the hw.<br>
* - Some opcodes don't support denormals, such as v_mad_f32. We would<br>
@@ -6799,21 +6715,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,<br>
need_prolog ? 1 : 0, 0);<br>
}<br>
<br>
si_llvm_optimize_module(&ctx);<br>
<br>
/* Post-optimization transformations and analysis. */<br>
si_optimize_vs_outputs(&ctx);<br>
<br>
if ((debug && debug->debug_message) ||<br>
si_can_dump_shader(sscreen, ctx.type)) {<br>
- ctx.shader->config.private_mem_vgprs =<br>
+ ctx.shader->private_mem_vgprs =<br>
ac_count_scratch_private_memory(ctx.main_fn);<br>
}<br>
<br>
/* Make sure the input is a pointer and not integer followed by inttoptr. */<br>
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) ==<br>
LLVMPointerTypeKind);<br>
<br>
/* Compile to bytecode. */<br>
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,<br>
ctx.ac.module, debug, ctx.type,<br>
@@ -7954,23 +7870,23 @@ int si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compile<br>
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,<br>
shader->previous_stage->config.num_sgprs);<br>
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,<br>
shader->previous_stage->config.num_vgprs);<br>
shader->config.spilled_sgprs =<br>
MAX2(shader->config.spilled_sgprs,<br>
shader->previous_stage->config.spilled_sgprs);<br>
shader->config.spilled_vgprs =<br>
MAX2(shader->config.spilled_vgprs,<br>
shader->previous_stage->config.spilled_vgprs);<br>
- shader->config.private_mem_vgprs =<br>
- MAX2(shader->config.private_mem_vgprs,<br>
- shader->previous_stage->config.private_mem_vgprs);<br>
+ shader->private_mem_vgprs =<br>
+ MAX2(shader->private_mem_vgprs,<br>
+ shader->previous_stage->private_mem_vgprs);<br>
shader->config.scratch_bytes_per_wave =<br>
MAX2(shader->config.scratch_bytes_per_wave,<br>
shader->previous_stage->config.scratch_bytes_per_wave);<br>
shader->info.uses_instanceid |=<br>
shader->previous_stage->info.uses_instanceid;<br>
}<br>
if (shader->prolog2) {<br>
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,<br>
shader->prolog2->config.num_sgprs);<br>
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,<br>
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h<br>
index ecf7f8bbd7a..6c8f70dc94b 100644<br>
--- a/src/gallium/drivers/radeonsi/si_shader.h<br>
+++ b/src/gallium/drivers/radeonsi/si_shader.h<br>
@@ -552,36 +552,20 @@ struct si_shader_key {<br>
* but forces monolithic shaders to be used as soon as<br>
* possible, because it's in the "opt" group.<br>
*/<br>
unsigned prefer_mono:1;<br>
} opt;<br>
};<br>
<br>
/* Restore the pack alignment to default. */<br>
#pragma pack(pop)<br>
<br>
-struct si_shader_config {<br>
- unsigned num_sgprs;<br>
- unsigned num_vgprs;<br>
- unsigned spilled_sgprs;<br>
- unsigned spilled_vgprs;<br>
- unsigned private_mem_vgprs;<br>
- unsigned lds_size;<br>
- unsigned max_simd_waves;<br>
- unsigned spi_ps_input_ena;<br>
- unsigned spi_ps_input_addr;<br>
- unsigned float_mode;<br>
- unsigned scratch_bytes_per_wave;<br>
- unsigned rsrc1;<br>
- unsigned rsrc2;<br>
-};<br>
-<br>
/* GCN-specific shader info. */<br>
struct si_shader_info {<br>
ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];<br>
ubyte num_input_sgprs;<br>
ubyte num_input_vgprs;<br>
signed char face_vgpr_index;<br>
signed char ancillary_vgpr_index;<br>
bool uses_instanceid;<br>
ubyte nr_pos_exports;<br>
ubyte nr_param_exports;<br>
@@ -605,22 +589,24 @@ struct si_shader {<br>
struct si_shader_key key;<br>
struct util_queue_fence ready;<br>
bool compilation_failed;<br>
bool is_monolithic;<br>
bool is_optimized;<br>
bool is_binary_shared;<br>
bool is_gs_copy_shader;<br>
<br>
/* The following data is all that's needed for binary shaders. */<br>
struct ac_shader_binary binary;<br>
- struct si_shader_config config;<br>
+ struct ac_shader_config config;<br>
struct si_shader_info info;<br>
+ unsigned private_mem_vgprs;<br>
+ unsigned max_simd_waves;<br></blockquote><div><br></div><div>The shader cache stores "config" but not these new members.<br></div></div></div></blockquote><div><br></div>My updated version of the patch moves these 2 variables to si_shader_info, which trivially resolves the concern.</div><div class="gmail_quote"><br></div><div class="gmail_quote">Marek<br></div></div>