[Mesa-dev] [PATCH 04/10] radeonsi: use ac_shader_config

Wed May 8 05:52:12 UTC 2019

On Fri, May 3, 2019 at 7:19 AM Nicolai Hähnle <nhaehnle at gmail.com> wrote:

> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> ---
>  src/amd/common/ac_binary.c                |   2 +
>  src/gallium/drivers/radeonsi/si_compute.c |  14 +--
>  src/gallium/drivers/radeonsi/si_shader.c  | 112 +++-------------------
>  src/gallium/drivers/radeonsi/si_shader.h  |  25 +----
>  4 files changed, 27 insertions(+), 126 deletions(-)
>
> diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c
> index 44251886b5f..d0ca55e0e0d 100644
> --- a/src/amd/common/ac_binary.c
> +++ b/src/amd/common/ac_binary.c
> @@ -218,26 +218,28 @@ void ac_parse_shader_binary_config(const char *data,
> size_t nbytes,
>                 unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i +
> 4));
>                 switch (reg) {
>                 case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
>                 case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
>                 case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
>                 case R_00B848_COMPUTE_PGM_RSRC1:
>                 case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
>                         conf->num_sgprs = MAX2(conf->num_sgprs,
> (G_00B028_SGPRS(value) + 1) * 8);
>                         conf->num_vgprs = MAX2(conf->num_vgprs,
> (G_00B028_VGPRS(value) + 1) * 4);
>                         conf->float_mode =  G_00B028_FLOAT_MODE(value);
> +                       conf->rsrc1 = value;
>                         break;
>                 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
>                         conf->lds_size = MAX2(conf->lds_size,
> G_00B02C_EXTRA_LDS_SIZE(value));
>                         break;
>                 case R_00B84C_COMPUTE_PGM_RSRC2:
>                         conf->lds_size = MAX2(conf->lds_size,
> G_00B84C_LDS_SIZE(value));
> +                       conf->rsrc2 = value;
>                         break;
>                 case R_0286CC_SPI_PS_INPUT_ENA:
>                         conf->spi_ps_input_ena = value;
>                         break;
>                 case R_0286D0_SPI_PS_INPUT_ADDR:
>                         conf->spi_ps_input_addr = value;
>                         break;
>                 case R_0286E8_SPI_TMPRING_SIZE:
>                 case R_00B860_COMPUTE_TMPRING_SIZE:
>                         /* WAVESIZE is in units of 256 dwords. */
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 541d7e6f118..02d7bac406a 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -59,21 +59,21 @@ static const amd_kernel_code_t
> *si_compute_get_code_object(
>         uint64_t symbol_offset)
>  {
>         if (!program->use_code_object_v2) {
>                 return NULL;
>         }
>         return (const amd_kernel_code_t*)
>                 (program->shader.binary.code + symbol_offset);
>  }
>
>  static void code_object_to_config(const amd_kernel_code_t *code_object,
> -                                 struct si_shader_config *out_config) {
> +                                 struct ac_shader_config *out_config) {
>
>         uint32_t rsrc1 = code_object->compute_pgm_resource_registers;
>         uint32_t rsrc2 = code_object->compute_pgm_resource_registers >> 32;
>         out_config->num_sgprs = code_object->wavefront_sgpr_count;
>         out_config->num_vgprs = code_object->workitem_vgpr_count;
>         out_config->float_mode = G_00B028_FLOAT_MODE(rsrc1);
>         out_config->rsrc1 = rsrc1;
>         out_config->lds_size = MAX2(out_config->lds_size,
> G_00B84C_LDS_SIZE(rsrc2));
>         out_config->rsrc2 = rsrc2;
>         out_config->scratch_bytes_per_wave =
> @@ -241,22 +241,22 @@ static void *si_create_compute_state(
>                         const amd_kernel_code_t *code_object =
>                                 si_compute_get_code_object(program, 0);
>                         code_object_to_config(code_object,
> &program->shader.config);
>                         if (program->shader.binary.reloc_count != 0) {
>                                 fprintf(stderr, "Error: %d unsupported
> relocations\n",
>
> program->shader.binary.reloc_count);
>                                 FREE(program);
>                                 return NULL;
>                         }
>                 } else {
> -
>  si_shader_binary_read_config(&program->shader.binary,
> -                                    &program->shader.config, 0);
> +
>  ac_shader_binary_read_config(&program->shader.binary,
> +                                    &program->shader.config, 0, false);
>                 }
>                 si_shader_dump(sctx->screen, &program->shader,
> &sctx->debug,
>                                PIPE_SHADER_COMPUTE, stderr, true);
>                 if (si_shader_binary_upload(sctx->screen,
> &program->shader) < 0) {
>                         fprintf(stderr, "LLVM failed to upload shader\n");
>                         FREE(program);
>                         return NULL;
>                 }
>         }
>
> @@ -362,21 +362,21 @@ static void si_initialize_compute(struct si_context
> *sctx)
>                                               bc_va >> 8);
>                 }
>         }
>
>         sctx->cs_shader_state.emitted_program = NULL;
>         sctx->cs_shader_state.initialized = true;
>  }
>
>  static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
>                                              struct si_shader *shader,
> -                                            struct si_shader_config
> *config)
> +                                            struct ac_shader_config
> *config)
>  {
>         uint64_t scratch_bo_size, scratch_needed;
>         scratch_bo_size = 0;
>         scratch_needed = config->scratch_bytes_per_wave *
> sctx->scratch_waves;
>         if (sctx->compute_scratch_buffer)
>                 scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
>
>         if (scratch_bo_size < scratch_needed) {
>                 si_resource_reference(&sctx->compute_scratch_buffer, NULL);
>
> @@ -405,38 +405,38 @@ static bool si_setup_compute_scratch_buffer(struct
> si_context *sctx,
>         return true;
>  }
>
>  static bool si_switch_compute_shader(struct si_context *sctx,
>                                       struct si_compute *program,
>                                      struct si_shader *shader,
>                                      const amd_kernel_code_t *code_object,
>                                      unsigned offset)
>  {
>         struct radeon_cmdbuf *cs = sctx->gfx_cs;
> -       struct si_shader_config inline_config = {0};
> -       struct si_shader_config *config;
> +       struct ac_shader_config inline_config = {0};
> +       struct ac_shader_config *config;
>         uint64_t shader_va;
>
>         if (sctx->cs_shader_state.emitted_program == program &&
>             sctx->cs_shader_state.offset == offset)
>                 return true;
>
>         if (program->ir_type != PIPE_SHADER_IR_NATIVE) {
>                 config = &shader->config;
>         } else {
>                 unsigned lds_blocks;
>
>                 config = &inline_config;
>                 if (code_object) {
>                         code_object_to_config(code_object, config);
>                 } else {
> -                       si_shader_binary_read_config(&shader->binary,
> config, offset);
> +                       ac_shader_binary_read_config(&shader->binary,
> config, offset, false);
>                 }
>
>                 lds_blocks = config->lds_size;
>                 /* XXX: We are over allocating LDS.  For SI, the shader
> reports
>                 * LDS in blocks of 256 bytes, so if there are 4 bytes lds
>                 * allocated in the shader and 4 bytes allocated by the
> state
>                 * tracker, then we will set LDS_SIZE to 512 bytes rather
> than 256.
>                 */
>                 if (sctx->chip_class <= SI) {
>                         lds_blocks += align(program->local_size, 256) >> 8;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index f6d882cf583..da43447013d 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -4962,104 +4962,20 @@ static void si_llvm_emit_polygon_stipple(struct
> si_shader_context *ctx,
>         /* The stipple pattern is 32x32, each row has 32 bits. */
>         offset = LLVMBuildMul(builder, address[1],
>                               LLVMConstInt(ctx->i32, 4, 0), "");
>         row = buffer_load_const(ctx, desc, offset);
>         row = ac_to_integer(&ctx->ac, row);
>         bit = LLVMBuildLShr(builder, row, address[0], "");
>         bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
>         ac_build_kill_if_false(&ctx->ac, bit);
>  }
>
> -void si_shader_binary_read_config(struct ac_shader_binary *binary,
> -                                 struct si_shader_config *conf,
> -                                 unsigned symbol_offset)
> -{
> -       unsigned i;
> -       const unsigned char *config =
> -               ac_shader_binary_config_start(binary, symbol_offset);
> -       bool really_needs_scratch = false;
> -
> -       /* LLVM adds SGPR spills to the scratch size.
> -        * Find out if we really need the scratch buffer.
> -        */
> -       for (i = 0; i < binary->reloc_count; i++) {
> -               const struct ac_shader_reloc *reloc = &binary->relocs[i];
> -
> -               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
> -                   !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
> -                       really_needs_scratch = true;
> -                       break;
> -               }
> -       }
> -
> -       /* XXX: We may be able to emit some of these values directly
> rather than
> -        * extracting fields to be emitted later.
> -        */
> -
> -       for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
> -               unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
> -               unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i
> + 4));
> -               switch (reg) {
> -               case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
> -               case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
> -               case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
> -               case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
> -               case R_00B848_COMPUTE_PGM_RSRC1:
> -                       conf->num_sgprs = MAX2(conf->num_sgprs,
> (G_00B028_SGPRS(value) + 1) * 8);
> -                       conf->num_vgprs = MAX2(conf->num_vgprs,
> (G_00B028_VGPRS(value) + 1) * 4);
> -                       conf->float_mode =  G_00B028_FLOAT_MODE(value);
> -                       conf->rsrc1 = value;
> -                       break;
> -               case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
> -                       conf->lds_size = MAX2(conf->lds_size,
> G_00B02C_EXTRA_LDS_SIZE(value));
> -                       break;
> -               case R_00B84C_COMPUTE_PGM_RSRC2:
> -                       conf->lds_size = MAX2(conf->lds_size,
> G_00B84C_LDS_SIZE(value));
> -                       conf->rsrc2 = value;
> -                       break;
> -               case R_0286CC_SPI_PS_INPUT_ENA:
> -                       conf->spi_ps_input_ena = value;
> -                       break;
> -               case R_0286D0_SPI_PS_INPUT_ADDR:
> -                       conf->spi_ps_input_addr = value;
> -                       break;
> -               case R_0286E8_SPI_TMPRING_SIZE:
> -               case R_00B860_COMPUTE_TMPRING_SIZE:
> -                       /* WAVESIZE is in units of 256 dwords. */
> -                       if (really_needs_scratch)
> -                               conf->scratch_bytes_per_wave =
> -                                       G_00B860_WAVESIZE(value) * 256 * 4;
> -                       break;
> -               case 0x4: /* SPILLED_SGPRS */
> -                       conf->spilled_sgprs = value;
> -                       break;
> -               case 0x8: /* SPILLED_VGPRS */
> -                       conf->spilled_vgprs = value;
> -                       break;
> -               default:
> -                       {
> -                               static bool printed;
> -
> -                               if (!printed) {
> -                                       fprintf(stderr, "Warning: LLVM
> emitted unknown "
> -                                               "config register: 0x%x\n",
> reg);
> -                                       printed = true;
> -                               }
> -                       }
> -                       break;
> -               }
> -       }
> -
> -       if (!conf->spi_ps_input_addr)
> -               conf->spi_ps_input_addr = conf->spi_ps_input_ena;
> -}
> -
>  void si_shader_apply_scratch_relocs(struct si_shader *shader,
>                                     uint64_t scratch_va)
>  {
>         unsigned i;
>         uint32_t scratch_rsrc_dword0 = scratch_va;
>         uint32_t scratch_rsrc_dword1 =
>                 S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
>
>         /* Enable scratch coalescing. */
>         scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
> @@ -5213,21 +5129,21 @@ static void si_shader_dump_disassembly(const
> struct ac_shader_binary *binary,
>                         fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
>                                 binary->code[i + 3], binary->code[i + 2],
>                                 binary->code[i + 1], binary->code[i]);
>                 }
>         }
>  }
>
>  static void si_calculate_max_simd_waves(struct si_shader *shader)
>  {
>         struct si_screen *sscreen = shader->selector->screen;
> -       struct si_shader_config *conf = &shader->config;
> +       struct ac_shader_config *conf = &shader->config;
>         unsigned num_inputs = shader->selector->info.num_inputs;
>         unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 :
> 256;
>         unsigned lds_per_wave = 0;
>         unsigned max_simd_waves;
>
>         max_simd_waves = ac_get_max_simd_waves(sscreen->info.family);
>
>         /* Compute LDS usage for PS. */
>         switch (shader->selector->type) {
>         case PIPE_SHADER_FRAGMENT:
> @@ -5262,46 +5178,46 @@ static void si_calculate_max_simd_waves(struct
> si_shader *shader)
>         }
>
>         if (conf->num_vgprs)
>                 max_simd_waves = MIN2(max_simd_waves, 256 /
> conf->num_vgprs);
>
>         /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage
> above
>          * 16KB makes some SIMDs unoccupied). */
>         if (lds_per_wave)
>                 max_simd_waves = MIN2(max_simd_waves, 16384 /
> lds_per_wave);
>
> -       conf->max_simd_waves = max_simd_waves;
> +       shader->max_simd_waves = max_simd_waves;
>  }
>
>  void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
>                                         struct pipe_debug_callback *debug)
>  {
> -       const struct si_shader_config *conf = &shader->config;
> +       const struct ac_shader_config *conf = &shader->config;
>
>         pipe_debug_message(debug, SHADER_INFO,
>                            "Shader Stats: SGPRS: %d VGPRS: %d Code Size:
> %d "
>                            "LDS: %d Scratch: %d Max Waves: %d Spilled
> SGPRs: %d "
>                            "Spilled VGPRs: %d PrivMem VGPRs: %d",
>                            conf->num_sgprs, conf->num_vgprs,
>                            si_get_shader_binary_size(shader),
>                            conf->lds_size, conf->scratch_bytes_per_wave,
> -                          conf->max_simd_waves, conf->spilled_sgprs,
> -                          conf->spilled_vgprs, conf->private_mem_vgprs);
> +                          shader->max_simd_waves, conf->spilled_sgprs,
> +                          conf->spilled_vgprs, shader->private_mem_vgprs);
>  }
>
>  static void si_shader_dump_stats(struct si_screen *sscreen,
>                                  const struct si_shader *shader,
>                                  unsigned processor,
>                                  FILE *file,
>                                  bool check_debug_option)
>  {
> -       const struct si_shader_config *conf = &shader->config;
> +       const struct ac_shader_config *conf = &shader->config;
>
>         if (!check_debug_option ||
>             si_can_dump_shader(sscreen, processor)) {
>                 if (processor == PIPE_SHADER_FRAGMENT) {
>                         fprintf(file, "*** SHADER CONFIG ***\n"
>                                 "SPI_PS_INPUT_ADDR = 0x%04x\n"
>                                 "SPI_PS_INPUT_ENA  = 0x%04x\n",
>                                 conf->spi_ps_input_addr,
> conf->spi_ps_input_ena);
>                 }
>
> @@ -5311,24 +5227,24 @@ static void si_shader_dump_stats(struct si_screen
> *sscreen,
>                         "Spilled SGPRs: %d\n"
>                         "Spilled VGPRs: %d\n"
>                         "Private memory VGPRs: %d\n"
>                         "Code Size: %d bytes\n"
>                         "LDS: %d blocks\n"
>                         "Scratch: %d bytes per wave\n"
>                         "Max Waves: %d\n"
>                         "********************\n\n\n",
>                         conf->num_sgprs, conf->num_vgprs,
>                         conf->spilled_sgprs, conf->spilled_vgprs,
> -                       conf->private_mem_vgprs,
> +                       shader->private_mem_vgprs,
>                         si_get_shader_binary_size(shader),
>                         conf->lds_size, conf->scratch_bytes_per_wave,
> -                       conf->max_simd_waves);
> +                       shader->max_simd_waves);
>         }
>  }
>
>  const char *si_get_shader_name(const struct si_shader *shader, unsigned
> processor)
>  {
>         switch (processor) {
>         case PIPE_SHADER_VERTEX:
>                 if (shader->key.as_es)
>                         return "Vertex Shader as ES";
>                 else if (shader->key.as_ls)
> @@ -5399,21 +5315,21 @@ void si_shader_dump(struct si_screen *sscreen,
> const struct si_shader *shader,
>                                                    debug, "epilog", file);
>                 fprintf(file, "\n");
>         }
>
>         si_shader_dump_stats(sscreen, shader, processor, file,
>                              check_debug_option);
>  }
>
>  static int si_compile_llvm(struct si_screen *sscreen,
>                            struct ac_shader_binary *binary,
> -                          struct si_shader_config *conf,
> +                          struct ac_shader_config *conf,
>                            struct ac_llvm_compiler *compiler,
>                            LLVMModuleRef mod,
>                            struct pipe_debug_callback *debug,
>                            unsigned processor,
>                            const char *name,
>                            bool less_optimized)
>  {
>         int r = 0;
>         unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
>
> @@ -5433,21 +5349,21 @@ static int si_compile_llvm(struct si_screen
> *sscreen,
>                 LLVMDisposeMessage(ir);
>         }
>
>         if (!si_replace_shader(count, binary)) {
>                 r = si_llvm_compile(mod, binary, compiler, debug,
>                                     less_optimized);
>                 if (r)
>                         return r;
>         }
>
> -       si_shader_binary_read_config(binary, conf, 0);
> +       ac_shader_binary_read_config(binary, conf, 0, false);
>
>         /* Enable 64-bit and 16-bit denormals, because there is no
> performance
>          * cost.
>          *
>          * If denormals are enabled, all floating-point output modifiers
> are
>          * ignored.
>          *
>          * Don't enable denormals for 32-bit floats, because:
>          * - Floating-point output modifiers would be ignored by the hw.
>          * - Some opcodes don't support denormals, such as v_mad_f32. We
> would
> @@ -6799,21 +6715,21 @@ int si_compile_tgsi_shader(struct si_screen
> *sscreen,
>                                           need_prolog ? 1 : 0, 0);
>         }
>
>         si_llvm_optimize_module(&ctx);
>
>         /* Post-optimization transformations and analysis. */
>         si_optimize_vs_outputs(&ctx);
>
>         if ((debug && debug->debug_message) ||
>             si_can_dump_shader(sscreen, ctx.type)) {
> -               ctx.shader->config.private_mem_vgprs =
> +               ctx.shader->private_mem_vgprs =
>                         ac_count_scratch_private_memory(ctx.main_fn);
>         }
>
>         /* Make sure the input is a pointer and not integer followed by
> inttoptr. */
>         assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) ==
>                LLVMPointerTypeKind);
>
>         /* Compile to bytecode. */
>         r = si_compile_llvm(sscreen, &shader->binary, &shader->config,
> compiler,
>                             ctx.ac.module, debug, ctx.type,
> @@ -7954,23 +7870,23 @@ int si_shader_create(struct si_screen *sscreen,
> struct ac_llvm_compiler *compile
>                         shader->config.num_sgprs =
> MAX2(shader->config.num_sgprs,
>
> shader->previous_stage->config.num_sgprs);
>                         shader->config.num_vgprs =
> MAX2(shader->config.num_vgprs,
>
> shader->previous_stage->config.num_vgprs);
>                         shader->config.spilled_sgprs =
>                                 MAX2(shader->config.spilled_sgprs,
>
>  shader->previous_stage->config.spilled_sgprs);
>                         shader->config.spilled_vgprs =
>                                 MAX2(shader->config.spilled_vgprs,
>
>  shader->previous_stage->config.spilled_vgprs);
> -                       shader->config.private_mem_vgprs =
> -                               MAX2(shader->config.private_mem_vgprs,
> -
> shader->previous_stage->config.private_mem_vgprs);
> +                       shader->private_mem_vgprs =
> +                               MAX2(shader->private_mem_vgprs,
> +
> shader->previous_stage->private_mem_vgprs);
>                         shader->config.scratch_bytes_per_wave =
>                                 MAX2(shader->config.scratch_bytes_per_wave,
>
>  shader->previous_stage->config.scratch_bytes_per_wave);
>                         shader->info.uses_instanceid |=
>
> shader->previous_stage->info.uses_instanceid;
>                 }
>                 if (shader->prolog2) {
>                         shader->config.num_sgprs =
> MAX2(shader->config.num_sgprs,
>
> shader->prolog2->config.num_sgprs);
>                         shader->config.num_vgprs =
> MAX2(shader->config.num_vgprs,
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
> b/src/gallium/drivers/radeonsi/si_shader.h
> index ecf7f8bbd7a..6c8f70dc94b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -552,36 +552,20 @@ struct si_shader_key {
>                  * but forces monolithic shaders to be used as soon as
>                  * possible, because it's in the "opt" group.
>                  */
>                 unsigned        prefer_mono:1;
>         } opt;
>  };
>
>  /* Restore the pack alignment to default. */
>  #pragma pack(pop)
>
> -struct si_shader_config {
> -       unsigned                        num_sgprs;
> -       unsigned                        num_vgprs;
> -       unsigned                        spilled_sgprs;
> -       unsigned                        spilled_vgprs;
> -       unsigned                        private_mem_vgprs;
> -       unsigned                        lds_size;
> -       unsigned                        max_simd_waves;
> -       unsigned                        spi_ps_input_ena;
> -       unsigned                        spi_ps_input_addr;
> -       unsigned                        float_mode;
> -       unsigned                        scratch_bytes_per_wave;
> -       unsigned                        rsrc1;
> -       unsigned                        rsrc2;
> -};
> -
>  /* GCN-specific shader info. */
>  struct si_shader_info {
>         ubyte                   vs_output_param_offset[SI_MAX_VS_OUTPUTS];
>         ubyte                   num_input_sgprs;
>         ubyte                   num_input_vgprs;
>         signed char             face_vgpr_index;
>         signed char             ancillary_vgpr_index;
>         bool                    uses_instanceid;
>         ubyte                   nr_pos_exports;
>         ubyte                   nr_param_exports;
> @@ -605,22 +589,24 @@ struct si_shader {
>         struct si_shader_key            key;
>         struct util_queue_fence         ready;
>         bool                            compilation_failed;
>         bool                            is_monolithic;
>         bool                            is_optimized;
>         bool                            is_binary_shared;
>         bool                            is_gs_copy_shader;
>
>         /* The following data is all that's needed for binary shaders. */
>         struct ac_shader_binary binary;
> -       struct si_shader_config         config;
> +       struct ac_shader_config         config;
>         struct si_shader_info           info;
> +       unsigned                        private_mem_vgprs;
> +       unsigned                        max_simd_waves;
>

The shader cache stores "config" but not these new members.

Marek
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190508/2ca94e28/attachment-0001.html>