<div dir="ltr"><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed, May 8, 2019 at 1:52 AM Marek Olšák <<a href="mailto:maraeo@gmail.com">maraeo@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Fri, May 3, 2019 at 7:19 AM Nicolai Hähnle <<a href="mailto:nhaehnle@gmail.com" target="_blank">nhaehnle@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com" target="_blank">nicolai.haehnle@amd.com</a>><br>
<br>
---<br>
 src/amd/common/ac_binary.c                |   2 +<br>
 src/gallium/drivers/radeonsi/si_compute.c |  14 +--<br>
 src/gallium/drivers/radeonsi/si_shader.c  | 112 +++-------------------<br>
 src/gallium/drivers/radeonsi/si_shader.h  |  25 +----<br>
 4 files changed, 27 insertions(+), 126 deletions(-)<br>
<br>
diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c<br>
index 44251886b5f..d0ca55e0e0d 100644<br>
--- a/src/amd/common/ac_binary.c<br>
+++ b/src/amd/common/ac_binary.c<br>
@@ -218,26 +218,28 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes,<br>
                unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i + 4));<br>
                switch (reg) {<br>
                case R_00B028_SPI_SHADER_PGM_RSRC1_PS:<br>
                case R_00B128_SPI_SHADER_PGM_RSRC1_VS:<br>
                case R_00B228_SPI_SHADER_PGM_RSRC1_GS:<br>
                case R_00B848_COMPUTE_PGM_RSRC1:<br>
                case R_00B428_SPI_SHADER_PGM_RSRC1_HS:<br>
                        conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);<br>
                        conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);<br>
                        conf->float_mode =  G_00B028_FLOAT_MODE(value);<br>
+                       conf->rsrc1 = value;<br>
                        break;<br>
                case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:<br>
                        conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));<br>
                        break;<br>
                case R_00B84C_COMPUTE_PGM_RSRC2:<br>
                        conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));<br>
+                       conf->rsrc2 = value;<br>
                        break;<br>
                case R_0286CC_SPI_PS_INPUT_ENA:<br>
                        conf->spi_ps_input_ena = value;<br>
                        break;<br>
                case R_0286D0_SPI_PS_INPUT_ADDR:<br>
                        conf->spi_ps_input_addr = value;<br>
                        break;<br>
                case R_0286E8_SPI_TMPRING_SIZE:<br>
                case R_00B860_COMPUTE_TMPRING_SIZE:<br>
                        /* WAVESIZE is in units of 256 dwords. */<br>
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c<br>
index 541d7e6f118..02d7bac406a 100644<br>
--- a/src/gallium/drivers/radeonsi/si_compute.c<br>
+++ b/src/gallium/drivers/radeonsi/si_compute.c<br>
@@ -59,21 +59,21 @@ static const amd_kernel_code_t *si_compute_get_code_object(<br>
        uint64_t symbol_offset)<br>
 {<br>
        if (!program->use_code_object_v2) {<br>
                return NULL;<br>
        }<br>
        return (const amd_kernel_code_t*)<br>
                (program->shader.binary.code + symbol_offset);<br>
 }<br>
<br>
 static void code_object_to_config(const amd_kernel_code_t *code_object,<br>
-                                 struct si_shader_config *out_config) {<br>
+                                 struct ac_shader_config *out_config) {<br>
<br>
        uint32_t rsrc1 = code_object->compute_pgm_resource_registers;<br>
        uint32_t rsrc2 = code_object->compute_pgm_resource_registers >> 32;<br>
        out_config->num_sgprs = code_object->wavefront_sgpr_count;<br>
        out_config->num_vgprs = code_object->workitem_vgpr_count;<br>
        out_config->float_mode = G_00B028_FLOAT_MODE(rsrc1);<br>
        out_config->rsrc1 = rsrc1;<br>
        out_config->lds_size = MAX2(out_config->lds_size, G_00B84C_LDS_SIZE(rsrc2));<br>
        out_config->rsrc2 = rsrc2;<br>
        out_config->scratch_bytes_per_wave =<br>
@@ -241,22 +241,22 @@ static void *si_create_compute_state(<br>
                        const amd_kernel_code_t *code_object =<br>
                                si_compute_get_code_object(program, 0);<br>
                        code_object_to_config(code_object, &program->shader.config);<br>
                        if (program->shader.binary.reloc_count != 0) {<br>
                                fprintf(stderr, "Error: %d unsupported relocations\n",<br>
                                        program->shader.binary.reloc_count);<br>
                                FREE(program);<br>
                                return NULL;<br>
                        }<br>
                } else {<br>
-                       si_shader_binary_read_config(&program->shader.binary,<br>
-                                    &program->shader.config, 0);<br>
+                       ac_shader_binary_read_config(&program->shader.binary,<br>
+                                    &program->shader.config, 0, false);<br>
                }<br>
                si_shader_dump(sctx->screen, &program->shader, &sctx->debug,<br>
                               PIPE_SHADER_COMPUTE, stderr, true);<br>
                if (si_shader_binary_upload(sctx->screen, &program->shader) < 0) {<br>
                        fprintf(stderr, "LLVM failed to upload shader\n");<br>
                        FREE(program);<br>
                        return NULL;<br>
                }<br>
        }<br>
<br>
@@ -362,21 +362,21 @@ static void si_initialize_compute(struct si_context *sctx)<br>
                                              bc_va >> 8);<br>
                }<br>
        }<br>
<br>
        sctx->cs_shader_state.emitted_program = NULL;<br>
        sctx->cs_shader_state.initialized = true;<br>
 }<br>
<br>
 static bool si_setup_compute_scratch_buffer(struct si_context *sctx,<br>
                                             struct si_shader *shader,<br>
-                                            struct si_shader_config *config)<br>
+                                            struct ac_shader_config *config)<br>
 {<br>
        uint64_t scratch_bo_size, scratch_needed;<br>
        scratch_bo_size = 0;<br>
        scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves;<br>
        if (sctx->compute_scratch_buffer)<br>
                scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;<br>
<br>
        if (scratch_bo_size < scratch_needed) {<br>
                si_resource_reference(&sctx->compute_scratch_buffer, NULL);<br>
<br>
@@ -405,38 +405,38 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,<br>
        return true;<br>
 }<br>
<br>
 static bool si_switch_compute_shader(struct si_context *sctx,<br>
                                      struct si_compute *program,<br>
                                     struct si_shader *shader,<br>
                                     const amd_kernel_code_t *code_object,<br>
                                     unsigned offset)<br>
 {<br>
        struct radeon_cmdbuf *cs = sctx->gfx_cs;<br>
-       struct si_shader_config inline_config = {0};<br>
-       struct si_shader_config *config;<br>
+       struct ac_shader_config inline_config = {0};<br>
+       struct ac_shader_config *config;<br>
        uint64_t shader_va;<br>
<br>
        if (sctx->cs_shader_state.emitted_program == program &&<br>
            sctx->cs_shader_state.offset == offset)<br>
                return true;<br>
<br>
        if (program->ir_type != PIPE_SHADER_IR_NATIVE) {<br>
                config = &shader->config;<br>
        } else {<br>
                unsigned lds_blocks;<br>
<br>
                config = &inline_config;<br>
                if (code_object) {<br>
                        code_object_to_config(code_object, config);<br>
                } else {<br>
-                       si_shader_binary_read_config(&shader->binary, config, offset);<br>
+                       ac_shader_binary_read_config(&shader->binary, config, offset, false);<br>
                }<br>
<br>
                lds_blocks = config->lds_size;<br>
                /* XXX: We are over allocating LDS.  For SI, the shader reports<br>
                * LDS in blocks of 256 bytes, so if there are 4 bytes lds<br>
                * allocated in the shader and 4 bytes allocated by the state<br>
                * tracker, then we will set LDS_SIZE to 512 bytes rather than 256.<br>
                */<br>
                if (sctx->chip_class <= SI) {<br>
                        lds_blocks += align(program->local_size, 256) >> 8;<br>
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c<br>
index f6d882cf583..da43447013d 100644<br>
--- a/src/gallium/drivers/radeonsi/si_shader.c<br>
+++ b/src/gallium/drivers/radeonsi/si_shader.c<br>
@@ -4962,104 +4962,20 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,<br>
        /* The stipple pattern is 32x32, each row has 32 bits. */<br>
        offset = LLVMBuildMul(builder, address[1],<br>
                              LLVMConstInt(ctx->i32, 4, 0), "");<br>
        row = buffer_load_const(ctx, desc, offset);<br>
        row = ac_to_integer(&ctx->ac, row);<br>
        bit = LLVMBuildLShr(builder, row, address[0], "");<br>
        bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");<br>
        ac_build_kill_if_false(&ctx->ac, bit);<br>
 }<br>
<br>
-void si_shader_binary_read_config(struct ac_shader_binary *binary,<br>
-                                 struct si_shader_config *conf,<br>
-                                 unsigned symbol_offset)<br>
-{<br>
-       unsigned i;<br>
-       const unsigned char *config =<br>
-               ac_shader_binary_config_start(binary, symbol_offset);<br>
-       bool really_needs_scratch = false;<br>
-<br>
-       /* LLVM adds SGPR spills to the scratch size.<br>
-        * Find out if we really need the scratch buffer.<br>
-        */<br>
-       for (i = 0; i < binary->reloc_count; i++) {<br>
-               const struct ac_shader_reloc *reloc = &binary->relocs[i];<br>
-<br>
-               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||<br>
-                   !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {<br>
-                       really_needs_scratch = true;<br>
-                       break;<br>
-               }<br>
-       }<br>
-<br>
-       /* XXX: We may be able to emit some of these values directly rather than<br>
-        * extracting fields to be emitted later.<br>
-        */<br>
-<br>
-       for (i = 0; i < binary->config_size_per_symbol; i+= 8) {<br>
-               unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));<br>
-               unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));<br>
-               switch (reg) {<br>
-               case R_00B028_SPI_SHADER_PGM_RSRC1_PS:<br>
-               case R_00B128_SPI_SHADER_PGM_RSRC1_VS:<br>
-               case R_00B228_SPI_SHADER_PGM_RSRC1_GS:<br>
-               case R_00B428_SPI_SHADER_PGM_RSRC1_HS:<br>
-               case R_00B848_COMPUTE_PGM_RSRC1:<br>
-                       conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);<br>
-                       conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);<br>
-                       conf->float_mode =  G_00B028_FLOAT_MODE(value);<br>
-                       conf->rsrc1 = value;<br>
-                       break;<br>
-               case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:<br>
-                       conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));<br>
-                       break;<br>
-               case R_00B84C_COMPUTE_PGM_RSRC2:<br>
-                       conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));<br>
-                       conf->rsrc2 = value;<br>
-                       break;<br>
-               case R_0286CC_SPI_PS_INPUT_ENA:<br>
-                       conf->spi_ps_input_ena = value;<br>
-                       break;<br>
-               case R_0286D0_SPI_PS_INPUT_ADDR:<br>
-                       conf->spi_ps_input_addr = value;<br>
-                       break;<br>
-               case R_0286E8_SPI_TMPRING_SIZE:<br>
-               case R_00B860_COMPUTE_TMPRING_SIZE:<br>
-                       /* WAVESIZE is in units of 256 dwords. */<br>
-                       if (really_needs_scratch)<br>
-                               conf->scratch_bytes_per_wave =<br>
-                                       G_00B860_WAVESIZE(value) * 256 * 4;<br>
-                       break;<br>
-               case 0x4: /* SPILLED_SGPRS */<br>
-                       conf->spilled_sgprs = value;<br>
-                       break;<br>
-               case 0x8: /* SPILLED_VGPRS */<br>
-                       conf->spilled_vgprs = value;<br>
-                       break;<br>
-               default:<br>
-                       {<br>
-                               static bool printed;<br>
-<br>
-                               if (!printed) {<br>
-                                       fprintf(stderr, "Warning: LLVM emitted unknown "<br>
-                                               "config register: 0x%x\n", reg);<br>
-                                       printed = true;<br>
-                               }<br>
-                       }<br>
-                       break;<br>
-               }<br>
-       }<br>
-<br>
-       if (!conf->spi_ps_input_addr)<br>
-               conf->spi_ps_input_addr = conf->spi_ps_input_ena;<br>
-}<br>
-<br>
 void si_shader_apply_scratch_relocs(struct si_shader *shader,<br>
                                    uint64_t scratch_va)<br>
 {<br>
        unsigned i;<br>
        uint32_t scratch_rsrc_dword0 = scratch_va;<br>
        uint32_t scratch_rsrc_dword1 =<br>
                S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);<br>
<br>
        /* Enable scratch coalescing. */<br>
        scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);<br>
@@ -5213,21 +5129,21 @@ static void si_shader_dump_disassembly(const struct ac_shader_binary *binary,<br>
                        fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,<br>
                                binary->code[i + 3], binary->code[i + 2],<br>
                                binary->code[i + 1], binary->code[i]);<br>
                }<br>
        }<br>
 }<br>
<br>
 static void si_calculate_max_simd_waves(struct si_shader *shader)<br>
 {<br>
        struct si_screen *sscreen = shader->selector->screen;<br>
-       struct si_shader_config *conf = &shader->config;<br>
+       struct ac_shader_config *conf = &shader->config;<br>
        unsigned num_inputs = shader->selector->info.num_inputs;<br>
        unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256;<br>
        unsigned lds_per_wave = 0;<br>
        unsigned max_simd_waves;<br>
<br>
        max_simd_waves = ac_get_max_simd_waves(sscreen->info.family);<br>
<br>
        /* Compute LDS usage for PS. */<br>
        switch (shader->selector->type) {<br>
        case PIPE_SHADER_FRAGMENT:<br>
@@ -5262,46 +5178,46 @@ static void si_calculate_max_simd_waves(struct si_shader *shader)<br>
        }<br>
<br>
        if (conf->num_vgprs)<br>
                max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);<br>
<br>
        /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above<br>
         * 16KB makes some SIMDs unoccupied). */<br>
        if (lds_per_wave)<br>
                max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);<br>
<br>
-       conf->max_simd_waves = max_simd_waves;<br>
+       shader->max_simd_waves = max_simd_waves;<br>
 }<br>
<br>
 void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,<br>
                                        struct pipe_debug_callback *debug)<br>
 {<br>
-       const struct si_shader_config *conf = &shader->config;<br>
+       const struct ac_shader_config *conf = &shader->config;<br>
<br>
        pipe_debug_message(debug, SHADER_INFO,<br>
                           "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "<br>
                           "LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d "<br>
                           "Spilled VGPRs: %d PrivMem VGPRs: %d",<br>
                           conf->num_sgprs, conf->num_vgprs,<br>
                           si_get_shader_binary_size(shader),<br>
                           conf->lds_size, conf->scratch_bytes_per_wave,<br>
-                          conf->max_simd_waves, conf->spilled_sgprs,<br>
-                          conf->spilled_vgprs, conf->private_mem_vgprs);<br>
+                          shader->max_simd_waves, conf->spilled_sgprs,<br>
+                          conf->spilled_vgprs, shader->private_mem_vgprs);<br>
 }<br>
<br>
 static void si_shader_dump_stats(struct si_screen *sscreen,<br>
                                 const struct si_shader *shader,<br>
                                 unsigned processor,<br>
                                 FILE *file,<br>
                                 bool check_debug_option)<br>
 {<br>
-       const struct si_shader_config *conf = &shader->config;<br>
+       const struct ac_shader_config *conf = &shader->config;<br>
<br>
        if (!check_debug_option ||<br>
            si_can_dump_shader(sscreen, processor)) {<br>
                if (processor == PIPE_SHADER_FRAGMENT) {<br>
                        fprintf(file, "*** SHADER CONFIG ***\n"<br>
                                "SPI_PS_INPUT_ADDR = 0x%04x\n"<br>
                                "SPI_PS_INPUT_ENA  = 0x%04x\n",<br>
                                conf->spi_ps_input_addr, conf->spi_ps_input_ena);<br>
                }<br>
<br>
@@ -5311,24 +5227,24 @@ static void si_shader_dump_stats(struct si_screen *sscreen,<br>
                        "Spilled SGPRs: %d\n"<br>
                        "Spilled VGPRs: %d\n"<br>
                        "Private memory VGPRs: %d\n"<br>
                        "Code Size: %d bytes\n"<br>
                        "LDS: %d blocks\n"<br>
                        "Scratch: %d bytes per wave\n"<br>
                        "Max Waves: %d\n"<br>
                        "********************\n\n\n",<br>
                        conf->num_sgprs, conf->num_vgprs,<br>
                        conf->spilled_sgprs, conf->spilled_vgprs,<br>
-                       conf->private_mem_vgprs,<br>
+                       shader->private_mem_vgprs,<br>
                        si_get_shader_binary_size(shader),<br>
                        conf->lds_size, conf->scratch_bytes_per_wave,<br>
-                       conf->max_simd_waves);<br>
+                       shader->max_simd_waves);<br>
        }<br>
 }<br>
<br>
 const char *si_get_shader_name(const struct si_shader *shader, unsigned processor)<br>
 {<br>
        switch (processor) {<br>
        case PIPE_SHADER_VERTEX:<br>
                if (shader->key.as_es)<br>
                        return "Vertex Shader as ES";<br>
                else if (shader->key.as_ls)<br>
@@ -5399,21 +5315,21 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,<br>
                                                   debug, "epilog", file);<br>
                fprintf(file, "\n");<br>
        }<br>
<br>
        si_shader_dump_stats(sscreen, shader, processor, file,<br>
                             check_debug_option);<br>
 }<br>
<br>
 static int si_compile_llvm(struct si_screen *sscreen,<br>
                           struct ac_shader_binary *binary,<br>
-                          struct si_shader_config *conf,<br>
+                          struct ac_shader_config *conf,<br>
                           struct ac_llvm_compiler *compiler,<br>
                           LLVMModuleRef mod,<br>
                           struct pipe_debug_callback *debug,<br>
                           unsigned processor,<br>
                           const char *name,<br>
                           bool less_optimized)<br>
 {<br>
        int r = 0;<br>
        unsigned count = p_atomic_inc_return(&sscreen->num_compilations);<br>
<br>
@@ -5433,21 +5349,21 @@ static int si_compile_llvm(struct si_screen *sscreen,<br>
                LLVMDisposeMessage(ir);<br>
        }<br>
<br>
        if (!si_replace_shader(count, binary)) {<br>
                r = si_llvm_compile(mod, binary, compiler, debug,<br>
                                    less_optimized);<br>
                if (r)<br>
                        return r;<br>
        }<br>
<br>
-       si_shader_binary_read_config(binary, conf, 0);<br>
+       ac_shader_binary_read_config(binary, conf, 0, false);<br>
<br>
        /* Enable 64-bit and 16-bit denormals, because there is no performance<br>
         * cost.<br>
         *<br>
         * If denormals are enabled, all floating-point output modifiers are<br>
         * ignored.<br>
         *<br>
         * Don't enable denormals for 32-bit floats, because:<br>
         * - Floating-point output modifiers would be ignored by the hw.<br>
         * - Some opcodes don't support denormals, such as v_mad_f32. We would<br>
@@ -6799,21 +6715,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,<br>
                                          need_prolog ? 1 : 0, 0);<br>
        }<br>
<br>
        si_llvm_optimize_module(&ctx);<br>
<br>
        /* Post-optimization transformations and analysis. */<br>
        si_optimize_vs_outputs(&ctx);<br>
<br>
        if ((debug && debug->debug_message) ||<br>
            si_can_dump_shader(sscreen, ctx.type)) {<br>
-               ctx.shader->config.private_mem_vgprs =<br>
+               ctx.shader->private_mem_vgprs =<br>
                        ac_count_scratch_private_memory(ctx.main_fn);<br>
        }<br>
<br>
        /* Make sure the input is a pointer and not integer followed by inttoptr. */<br>
        assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) ==<br>
               LLVMPointerTypeKind);<br>
<br>
        /* Compile to bytecode. */<br>
        r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,<br>
                            ctx.ac.module, debug, ctx.type,<br>
@@ -7954,23 +7870,23 @@ int si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compile<br>
                        shader->config.num_sgprs = MAX2(shader->config.num_sgprs,<br>
                                                        shader->previous_stage->config.num_sgprs);<br>
                        shader->config.num_vgprs = MAX2(shader->config.num_vgprs,<br>
                                                        shader->previous_stage->config.num_vgprs);<br>
                        shader->config.spilled_sgprs =<br>
                                MAX2(shader->config.spilled_sgprs,<br>
                                     shader->previous_stage->config.spilled_sgprs);<br>
                        shader->config.spilled_vgprs =<br>
                                MAX2(shader->config.spilled_vgprs,<br>
                                     shader->previous_stage->config.spilled_vgprs);<br>
-                       shader->config.private_mem_vgprs =<br>
-                               MAX2(shader->config.private_mem_vgprs,<br>
-                                    shader->previous_stage->config.private_mem_vgprs);<br>
+                       shader->private_mem_vgprs =<br>
+                               MAX2(shader->private_mem_vgprs,<br>
+                                    shader->previous_stage->private_mem_vgprs);<br>
                        shader->config.scratch_bytes_per_wave =<br>
                                MAX2(shader->config.scratch_bytes_per_wave,<br>
                                     shader->previous_stage->config.scratch_bytes_per_wave);<br>
                        shader->info.uses_instanceid |=<br>
                                shader->previous_stage->info.uses_instanceid;<br>
                }<br>
                if (shader->prolog2) {<br>
                        shader->config.num_sgprs = MAX2(shader->config.num_sgprs,<br>
                                                        shader->prolog2->config.num_sgprs);<br>
                        shader->config.num_vgprs = MAX2(shader->config.num_vgprs,<br>
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h<br>
index ecf7f8bbd7a..6c8f70dc94b 100644<br>
--- a/src/gallium/drivers/radeonsi/si_shader.h<br>
+++ b/src/gallium/drivers/radeonsi/si_shader.h<br>
@@ -552,36 +552,20 @@ struct si_shader_key {<br>
                 * but forces monolithic shaders to be used as soon as<br>
                 * possible, because it's in the "opt" group.<br>
                 */<br>
                unsigned        prefer_mono:1;<br>
        } opt;<br>
 };<br>
<br>
 /* Restore the pack alignment to default. */<br>
 #pragma pack(pop)<br>
<br>
-struct si_shader_config {<br>
-       unsigned                        num_sgprs;<br>
-       unsigned                        num_vgprs;<br>
-       unsigned                        spilled_sgprs;<br>
-       unsigned                        spilled_vgprs;<br>
-       unsigned                        private_mem_vgprs;<br>
-       unsigned                        lds_size;<br>
-       unsigned                        max_simd_waves;<br>
-       unsigned                        spi_ps_input_ena;<br>
-       unsigned                        spi_ps_input_addr;<br>
-       unsigned                        float_mode;<br>
-       unsigned                        scratch_bytes_per_wave;<br>
-       unsigned                        rsrc1;<br>
-       unsigned                        rsrc2;<br>
-};<br>
-<br>
 /* GCN-specific shader info. */<br>
 struct si_shader_info {<br>
        ubyte                   vs_output_param_offset[SI_MAX_VS_OUTPUTS];<br>
        ubyte                   num_input_sgprs;<br>
        ubyte                   num_input_vgprs;<br>
        signed char             face_vgpr_index;<br>
        signed char             ancillary_vgpr_index;<br>
        bool                    uses_instanceid;<br>
        ubyte                   nr_pos_exports;<br>
        ubyte                   nr_param_exports;<br>
@@ -605,22 +589,24 @@ struct si_shader {<br>
        struct si_shader_key            key;<br>
        struct util_queue_fence         ready;<br>
        bool                            compilation_failed;<br>
        bool                            is_monolithic;<br>
        bool                            is_optimized;<br>
        bool                            is_binary_shared;<br>
        bool                            is_gs_copy_shader;<br>
<br>
        /* The following data is all that's needed for binary shaders. */<br>
        struct ac_shader_binary binary;<br>
-       struct si_shader_config         config;<br>
+       struct ac_shader_config         config;<br>
        struct si_shader_info           info;<br>
+       unsigned                        private_mem_vgprs;<br>
+       unsigned                        max_simd_waves;<br></blockquote><div><br></div><div>The shader cache stores "config" but not these new members.<br></div></div></div></blockquote><div><br></div>My updated version of the patch moves these 2 variables to si_shader_info, which trivially resolves the concern.</div><div class="gmail_quote"><br></div><div class="gmail_quote">Marek<br></div></div>