[Mesa-dev] [PATCH 04/10] radeonsi: use ac_shader_config

Wed Jun 12 22:50:49 UTC 2019

On Wed, May 8, 2019 at 1:52 AM Marek Olšák <maraeo at gmail.com> wrote:

> On Fri, May 3, 2019 at 7:19 AM Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>
>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>
>> ---
>>  src/amd/common/ac_binary.c                |   2 +
>>  src/gallium/drivers/radeonsi/si_compute.c |  14 +--
>>  src/gallium/drivers/radeonsi/si_shader.c  | 112 +++-------------------
>>  src/gallium/drivers/radeonsi/si_shader.h  |  25 +----
>>  4 files changed, 27 insertions(+), 126 deletions(-)
>>
>> diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c
>> index 44251886b5f..d0ca55e0e0d 100644
>> --- a/src/amd/common/ac_binary.c
>> +++ b/src/amd/common/ac_binary.c
>> @@ -218,26 +218,28 @@ void ac_parse_shader_binary_config(const char
>> *data, size_t nbytes,
>>                 unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i +
>> 4));
>>                 switch (reg) {
>>                 case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
>>                 case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
>>                 case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
>>                 case R_00B848_COMPUTE_PGM_RSRC1:
>>                 case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
>>                         conf->num_sgprs = MAX2(conf->num_sgprs,
>> (G_00B028_SGPRS(value) + 1) * 8);
>>                         conf->num_vgprs = MAX2(conf->num_vgprs,
>> (G_00B028_VGPRS(value) + 1) * 4);
>>                         conf->float_mode =  G_00B028_FLOAT_MODE(value);
>> +                       conf->rsrc1 = value;
>>                         break;
>>                 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
>>                         conf->lds_size = MAX2(conf->lds_size,
>> G_00B02C_EXTRA_LDS_SIZE(value));
>>                         break;
>>                 case R_00B84C_COMPUTE_PGM_RSRC2:
>>                         conf->lds_size = MAX2(conf->lds_size,
>> G_00B84C_LDS_SIZE(value));
>> +                       conf->rsrc2 = value;
>>                         break;
>>                 case R_0286CC_SPI_PS_INPUT_ENA:
>>                         conf->spi_ps_input_ena = value;
>>                         break;
>>                 case R_0286D0_SPI_PS_INPUT_ADDR:
>>                         conf->spi_ps_input_addr = value;
>>                         break;
>>                 case R_0286E8_SPI_TMPRING_SIZE:
>>                 case R_00B860_COMPUTE_TMPRING_SIZE:
>>                         /* WAVESIZE is in units of 256 dwords. */
>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c
>> b/src/gallium/drivers/radeonsi/si_compute.c
>> index 541d7e6f118..02d7bac406a 100644
>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>> @@ -59,21 +59,21 @@ static const amd_kernel_code_t
>> *si_compute_get_code_object(
>>         uint64_t symbol_offset)
>>  {
>>         if (!program->use_code_object_v2) {
>>                 return NULL;
>>         }
>>         return (const amd_kernel_code_t*)
>>                 (program->shader.binary.code + symbol_offset);
>>  }
>>
>>  static void code_object_to_config(const amd_kernel_code_t *code_object,
>> -                                 struct si_shader_config *out_config) {
>> +                                 struct ac_shader_config *out_config) {
>>
>>         uint32_t rsrc1 = code_object->compute_pgm_resource_registers;
>>         uint32_t rsrc2 = code_object->compute_pgm_resource_registers >>
>> 32;
>>         out_config->num_sgprs = code_object->wavefront_sgpr_count;
>>         out_config->num_vgprs = code_object->workitem_vgpr_count;
>>         out_config->float_mode = G_00B028_FLOAT_MODE(rsrc1);
>>         out_config->rsrc1 = rsrc1;
>>         out_config->lds_size = MAX2(out_config->lds_size,
>> G_00B84C_LDS_SIZE(rsrc2));
>>         out_config->rsrc2 = rsrc2;
>>         out_config->scratch_bytes_per_wave =
>> @@ -241,22 +241,22 @@ static void *si_create_compute_state(
>>                         const amd_kernel_code_t *code_object =
>>                                 si_compute_get_code_object(program, 0);
>>                         code_object_to_config(code_object,
>> &program->shader.config);
>>                         if (program->shader.binary.reloc_count != 0) {
>>                                 fprintf(stderr, "Error: %d unsupported
>> relocations\n",
>>
>> program->shader.binary.reloc_count);
>>                                 FREE(program);
>>                                 return NULL;
>>                         }
>>                 } else {
>> -
>>  si_shader_binary_read_config(&program->shader.binary,
>> -                                    &program->shader.config, 0);
>> +
>>  ac_shader_binary_read_config(&program->shader.binary,
>> +                                    &program->shader.config, 0, false);
>>                 }
>>                 si_shader_dump(sctx->screen, &program->shader,
>> &sctx->debug,
>>                                PIPE_SHADER_COMPUTE, stderr, true);
>>                 if (si_shader_binary_upload(sctx->screen,
>> &program->shader) < 0) {
>>                         fprintf(stderr, "LLVM failed to upload shader\n");
>>                         FREE(program);
>>                         return NULL;
>>                 }
>>         }
>>
>> @@ -362,21 +362,21 @@ static void si_initialize_compute(struct si_context
>> *sctx)
>>                                               bc_va >> 8);
>>                 }
>>         }
>>
>>         sctx->cs_shader_state.emitted_program = NULL;
>>         sctx->cs_shader_state.initialized = true;
>>  }
>>
>>  static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
>>                                              struct si_shader *shader,
>> -                                            struct si_shader_config
>> *config)
>> +                                            struct ac_shader_config
>> *config)
>>  {
>>         uint64_t scratch_bo_size, scratch_needed;
>>         scratch_bo_size = 0;
>>         scratch_needed = config->scratch_bytes_per_wave *
>> sctx->scratch_waves;
>>         if (sctx->compute_scratch_buffer)
>>                 scratch_bo_size =
>> sctx->compute_scratch_buffer->b.b.width0;
>>
>>         if (scratch_bo_size < scratch_needed) {
>>                 si_resource_reference(&sctx->compute_scratch_buffer,
>> NULL);
>>
>> @@ -405,38 +405,38 @@ static bool si_setup_compute_scratch_buffer(struct
>> si_context *sctx,
>>         return true;
>>  }
>>
>>  static bool si_switch_compute_shader(struct si_context *sctx,
>>                                       struct si_compute *program,
>>                                      struct si_shader *shader,
>>                                      const amd_kernel_code_t *code_object,
>>                                      unsigned offset)
>>  {
>>         struct radeon_cmdbuf *cs = sctx->gfx_cs;
>> -       struct si_shader_config inline_config = {0};
>> -       struct si_shader_config *config;
>> +       struct ac_shader_config inline_config = {0};
>> +       struct ac_shader_config *config;
>>         uint64_t shader_va;
>>
>>         if (sctx->cs_shader_state.emitted_program == program &&
>>             sctx->cs_shader_state.offset == offset)
>>                 return true;
>>
>>         if (program->ir_type != PIPE_SHADER_IR_NATIVE) {
>>                 config = &shader->config;
>>         } else {
>>                 unsigned lds_blocks;
>>
>>                 config = &inline_config;
>>                 if (code_object) {
>>                         code_object_to_config(code_object, config);
>>                 } else {
>> -                       si_shader_binary_read_config(&shader->binary,
>> config, offset);
>> +                       ac_shader_binary_read_config(&shader->binary,
>> config, offset, false);
>>                 }
>>
>>                 lds_blocks = config->lds_size;
>>                 /* XXX: We are over allocating LDS.  For SI, the shader
>> reports
>>                 * LDS in blocks of 256 bytes, so if there are 4 bytes lds
>>                 * allocated in the shader and 4 bytes allocated by the
>> state
>>                 * tracker, then we will set LDS_SIZE to 512 bytes rather
>> than 256.
>>                 */
>>                 if (sctx->chip_class <= SI) {
>>                         lds_blocks += align(program->local_size, 256) >>
>> 8;
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>> b/src/gallium/drivers/radeonsi/si_shader.c
>> index f6d882cf583..da43447013d 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -4962,104 +4962,20 @@ static void si_llvm_emit_polygon_stipple(struct
>> si_shader_context *ctx,
>>         /* The stipple pattern is 32x32, each row has 32 bits. */
>>         offset = LLVMBuildMul(builder, address[1],
>>                               LLVMConstInt(ctx->i32, 4, 0), "");
>>         row = buffer_load_const(ctx, desc, offset);
>>         row = ac_to_integer(&ctx->ac, row);
>>         bit = LLVMBuildLShr(builder, row, address[0], "");
>>         bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
>>         ac_build_kill_if_false(&ctx->ac, bit);
>>  }
>>
>> -void si_shader_binary_read_config(struct ac_shader_binary *binary,
>> -                                 struct si_shader_config *conf,
>> -                                 unsigned symbol_offset)
>> -{
>> -       unsigned i;
>> -       const unsigned char *config =
>> -               ac_shader_binary_config_start(binary, symbol_offset);
>> -       bool really_needs_scratch = false;
>> -
>> -       /* LLVM adds SGPR spills to the scratch size.
>> -        * Find out if we really need the scratch buffer.
>> -        */
>> -       for (i = 0; i < binary->reloc_count; i++) {
>> -               const struct ac_shader_reloc *reloc = &binary->relocs[i];
>> -
>> -               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
>> -                   !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
>> -                       really_needs_scratch = true;
>> -                       break;
>> -               }
>> -       }
>> -
>> -       /* XXX: We may be able to emit some of these values directly
>> rather than
>> -        * extracting fields to be emitted later.
>> -        */
>> -
>> -       for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
>> -               unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
>> -               unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i
>> + 4));
>> -               switch (reg) {
>> -               case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
>> -               case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
>> -               case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
>> -               case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
>> -               case R_00B848_COMPUTE_PGM_RSRC1:
>> -                       conf->num_sgprs = MAX2(conf->num_sgprs,
>> (G_00B028_SGPRS(value) + 1) * 8);
>> -                       conf->num_vgprs = MAX2(conf->num_vgprs,
>> (G_00B028_VGPRS(value) + 1) * 4);
>> -                       conf->float_mode =  G_00B028_FLOAT_MODE(value);
>> -                       conf->rsrc1 = value;
>> -                       break;
>> -               case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
>> -                       conf->lds_size = MAX2(conf->lds_size,
>> G_00B02C_EXTRA_LDS_SIZE(value));
>> -                       break;
>> -               case R_00B84C_COMPUTE_PGM_RSRC2:
>> -                       conf->lds_size = MAX2(conf->lds_size,
>> G_00B84C_LDS_SIZE(value));
>> -                       conf->rsrc2 = value;
>> -                       break;
>> -               case R_0286CC_SPI_PS_INPUT_ENA:
>> -                       conf->spi_ps_input_ena = value;
>> -                       break;
>> -               case R_0286D0_SPI_PS_INPUT_ADDR:
>> -                       conf->spi_ps_input_addr = value;
>> -                       break;
>> -               case R_0286E8_SPI_TMPRING_SIZE:
>> -               case R_00B860_COMPUTE_TMPRING_SIZE:
>> -                       /* WAVESIZE is in units of 256 dwords. */
>> -                       if (really_needs_scratch)
>> -                               conf->scratch_bytes_per_wave =
>> -                                       G_00B860_WAVESIZE(value) * 256 *
>> 4;
>> -                       break;
>> -               case 0x4: /* SPILLED_SGPRS */
>> -                       conf->spilled_sgprs = value;
>> -                       break;
>> -               case 0x8: /* SPILLED_VGPRS */
>> -                       conf->spilled_vgprs = value;
>> -                       break;
>> -               default:
>> -                       {
>> -                               static bool printed;
>> -
>> -                               if (!printed) {
>> -                                       fprintf(stderr, "Warning: LLVM
>> emitted unknown "
>> -                                               "config register:
>> 0x%x\n", reg);
>> -                                       printed = true;
>> -                               }
>> -                       }
>> -                       break;
>> -               }
>> -       }
>> -
>> -       if (!conf->spi_ps_input_addr)
>> -               conf->spi_ps_input_addr = conf->spi_ps_input_ena;
>> -}
>> -
>>  void si_shader_apply_scratch_relocs(struct si_shader *shader,
>>                                     uint64_t scratch_va)
>>  {
>>         unsigned i;
>>         uint32_t scratch_rsrc_dword0 = scratch_va;
>>         uint32_t scratch_rsrc_dword1 =
>>                 S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
>>
>>         /* Enable scratch coalescing. */
>>         scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
>> @@ -5213,21 +5129,21 @@ static void si_shader_dump_disassembly(const
>> struct ac_shader_binary *binary,
>>                         fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
>>                                 binary->code[i + 3], binary->code[i + 2],
>>                                 binary->code[i + 1], binary->code[i]);
>>                 }
>>         }
>>  }
>>
>>  static void si_calculate_max_simd_waves(struct si_shader *shader)
>>  {
>>         struct si_screen *sscreen = shader->selector->screen;
>> -       struct si_shader_config *conf = &shader->config;
>> +       struct ac_shader_config *conf = &shader->config;
>>         unsigned num_inputs = shader->selector->info.num_inputs;
>>         unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 :
>> 256;
>>         unsigned lds_per_wave = 0;
>>         unsigned max_simd_waves;
>>
>>         max_simd_waves = ac_get_max_simd_waves(sscreen->info.family);
>>
>>         /* Compute LDS usage for PS. */
>>         switch (shader->selector->type) {
>>         case PIPE_SHADER_FRAGMENT:
>> @@ -5262,46 +5178,46 @@ static void si_calculate_max_simd_waves(struct
>> si_shader *shader)
>>         }
>>
>>         if (conf->num_vgprs)
>>                 max_simd_waves = MIN2(max_simd_waves, 256 /
>> conf->num_vgprs);
>>
>>         /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage
>> above
>>          * 16KB makes some SIMDs unoccupied). */
>>         if (lds_per_wave)
>>                 max_simd_waves = MIN2(max_simd_waves, 16384 /
>> lds_per_wave);
>>
>> -       conf->max_simd_waves = max_simd_waves;
>> +       shader->max_simd_waves = max_simd_waves;
>>  }
>>
>>  void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
>>                                         struct pipe_debug_callback *debug)
>>  {
>> -       const struct si_shader_config *conf = &shader->config;
>> +       const struct ac_shader_config *conf = &shader->config;
>>
>>         pipe_debug_message(debug, SHADER_INFO,
>>                            "Shader Stats: SGPRS: %d VGPRS: %d Code Size:
>> %d "
>>                            "LDS: %d Scratch: %d Max Waves: %d Spilled
>> SGPRs: %d "
>>                            "Spilled VGPRs: %d PrivMem VGPRs: %d",
>>                            conf->num_sgprs, conf->num_vgprs,
>>                            si_get_shader_binary_size(shader),
>>                            conf->lds_size, conf->scratch_bytes_per_wave,
>> -                          conf->max_simd_waves, conf->spilled_sgprs,
>> -                          conf->spilled_vgprs, conf->private_mem_vgprs);
>> +                          shader->max_simd_waves, conf->spilled_sgprs,
>> +                          conf->spilled_vgprs,
>> shader->private_mem_vgprs);
>>  }
>>
>>  static void si_shader_dump_stats(struct si_screen *sscreen,
>>                                  const struct si_shader *shader,
>>                                  unsigned processor,
>>                                  FILE *file,
>>                                  bool check_debug_option)
>>  {
>> -       const struct si_shader_config *conf = &shader->config;
>> +       const struct ac_shader_config *conf = &shader->config;
>>
>>         if (!check_debug_option ||
>>             si_can_dump_shader(sscreen, processor)) {
>>                 if (processor == PIPE_SHADER_FRAGMENT) {
>>                         fprintf(file, "*** SHADER CONFIG ***\n"
>>                                 "SPI_PS_INPUT_ADDR = 0x%04x\n"
>>                                 "SPI_PS_INPUT_ENA  = 0x%04x\n",
>>                                 conf->spi_ps_input_addr,
>> conf->spi_ps_input_ena);
>>                 }
>>
>> @@ -5311,24 +5227,24 @@ static void si_shader_dump_stats(struct si_screen
>> *sscreen,
>>                         "Spilled SGPRs: %d\n"
>>                         "Spilled VGPRs: %d\n"
>>                         "Private memory VGPRs: %d\n"
>>                         "Code Size: %d bytes\n"
>>                         "LDS: %d blocks\n"
>>                         "Scratch: %d bytes per wave\n"
>>                         "Max Waves: %d\n"
>>                         "********************\n\n\n",
>>                         conf->num_sgprs, conf->num_vgprs,
>>                         conf->spilled_sgprs, conf->spilled_vgprs,
>> -                       conf->private_mem_vgprs,
>> +                       shader->private_mem_vgprs,
>>                         si_get_shader_binary_size(shader),
>>                         conf->lds_size, conf->scratch_bytes_per_wave,
>> -                       conf->max_simd_waves);
>> +                       shader->max_simd_waves);
>>         }
>>  }
>>
>>  const char *si_get_shader_name(const struct si_shader *shader, unsigned
>> processor)
>>  {
>>         switch (processor) {
>>         case PIPE_SHADER_VERTEX:
>>                 if (shader->key.as_es)
>>                         return "Vertex Shader as ES";
>>                 else if (shader->key.as_ls)
>> @@ -5399,21 +5315,21 @@ void si_shader_dump(struct si_screen *sscreen,
>> const struct si_shader *shader,
>>                                                    debug, "epilog", file);
>>                 fprintf(file, "\n");
>>         }
>>
>>         si_shader_dump_stats(sscreen, shader, processor, file,
>>                              check_debug_option);
>>  }
>>
>>  static int si_compile_llvm(struct si_screen *sscreen,
>>                            struct ac_shader_binary *binary,
>> -                          struct si_shader_config *conf,
>> +                          struct ac_shader_config *conf,
>>                            struct ac_llvm_compiler *compiler,
>>                            LLVMModuleRef mod,
>>                            struct pipe_debug_callback *debug,
>>                            unsigned processor,
>>                            const char *name,
>>                            bool less_optimized)
>>  {
>>         int r = 0;
>>         unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
>>
>> @@ -5433,21 +5349,21 @@ static int si_compile_llvm(struct si_screen
>> *sscreen,
>>                 LLVMDisposeMessage(ir);
>>         }
>>
>>         if (!si_replace_shader(count, binary)) {
>>                 r = si_llvm_compile(mod, binary, compiler, debug,
>>                                     less_optimized);
>>                 if (r)
>>                         return r;
>>         }
>>
>> -       si_shader_binary_read_config(binary, conf, 0);
>> +       ac_shader_binary_read_config(binary, conf, 0, false);
>>
>>         /* Enable 64-bit and 16-bit denormals, because there is no
>> performance
>>          * cost.
>>          *
>>          * If denormals are enabled, all floating-point output modifiers
>> are
>>          * ignored.
>>          *
>>          * Don't enable denormals for 32-bit floats, because:
>>          * - Floating-point output modifiers would be ignored by the hw.
>>          * - Some opcodes don't support denormals, such as v_mad_f32. We
>> would
>> @@ -6799,21 +6715,21 @@ int si_compile_tgsi_shader(struct si_screen
>> *sscreen,
>>                                           need_prolog ? 1 : 0, 0);
>>         }
>>
>>         si_llvm_optimize_module(&ctx);
>>
>>         /* Post-optimization transformations and analysis. */
>>         si_optimize_vs_outputs(&ctx);
>>
>>         if ((debug && debug->debug_message) ||
>>             si_can_dump_shader(sscreen, ctx.type)) {
>> -               ctx.shader->config.private_mem_vgprs =
>> +               ctx.shader->private_mem_vgprs =
>>                         ac_count_scratch_private_memory(ctx.main_fn);
>>         }
>>
>>         /* Make sure the input is a pointer and not integer followed by
>> inttoptr. */
>>         assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0)))
>> ==
>>                LLVMPointerTypeKind);
>>
>>         /* Compile to bytecode. */
>>         r = si_compile_llvm(sscreen, &shader->binary, &shader->config,
>> compiler,
>>                             ctx.ac.module, debug, ctx.type,
>> @@ -7954,23 +7870,23 @@ int si_shader_create(struct si_screen *sscreen,
>> struct ac_llvm_compiler *compile
>>                         shader->config.num_sgprs =
>> MAX2(shader->config.num_sgprs,
>>
>> shader->previous_stage->config.num_sgprs);
>>                         shader->config.num_vgprs =
>> MAX2(shader->config.num_vgprs,
>>
>> shader->previous_stage->config.num_vgprs);
>>                         shader->config.spilled_sgprs =
>>                                 MAX2(shader->config.spilled_sgprs,
>>
>>  shader->previous_stage->config.spilled_sgprs);
>>                         shader->config.spilled_vgprs =
>>                                 MAX2(shader->config.spilled_vgprs,
>>
>>  shader->previous_stage->config.spilled_vgprs);
>> -                       shader->config.private_mem_vgprs =
>> -                               MAX2(shader->config.private_mem_vgprs,
>> -
>> shader->previous_stage->config.private_mem_vgprs);
>> +                       shader->private_mem_vgprs =
>> +                               MAX2(shader->private_mem_vgprs,
>> +
>> shader->previous_stage->private_mem_vgprs);
>>                         shader->config.scratch_bytes_per_wave =
>>
>> MAX2(shader->config.scratch_bytes_per_wave,
>>
>>  shader->previous_stage->config.scratch_bytes_per_wave);
>>                         shader->info.uses_instanceid |=
>>
>> shader->previous_stage->info.uses_instanceid;
>>                 }
>>                 if (shader->prolog2) {
>>                         shader->config.num_sgprs =
>> MAX2(shader->config.num_sgprs,
>>
>> shader->prolog2->config.num_sgprs);
>>                         shader->config.num_vgprs =
>> MAX2(shader->config.num_vgprs,
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
>> b/src/gallium/drivers/radeonsi/si_shader.h
>> index ecf7f8bbd7a..6c8f70dc94b 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>> @@ -552,36 +552,20 @@ struct si_shader_key {
>>                  * but forces monolithic shaders to be used as soon as
>>                  * possible, because it's in the "opt" group.
>>                  */
>>                 unsigned        prefer_mono:1;
>>         } opt;
>>  };
>>
>>  /* Restore the pack alignment to default. */
>>  #pragma pack(pop)
>>
>> -struct si_shader_config {
>> -       unsigned                        num_sgprs;
>> -       unsigned                        num_vgprs;
>> -       unsigned                        spilled_sgprs;
>> -       unsigned                        spilled_vgprs;
>> -       unsigned                        private_mem_vgprs;
>> -       unsigned                        lds_size;
>> -       unsigned                        max_simd_waves;
>> -       unsigned                        spi_ps_input_ena;
>> -       unsigned                        spi_ps_input_addr;
>> -       unsigned                        float_mode;
>> -       unsigned                        scratch_bytes_per_wave;
>> -       unsigned                        rsrc1;
>> -       unsigned                        rsrc2;
>> -};
>> -
>>  /* GCN-specific shader info. */
>>  struct si_shader_info {
>>         ubyte                   vs_output_param_offset[SI_MAX_VS_OUTPUTS];
>>         ubyte                   num_input_sgprs;
>>         ubyte                   num_input_vgprs;
>>         signed char             face_vgpr_index;
>>         signed char             ancillary_vgpr_index;
>>         bool                    uses_instanceid;
>>         ubyte                   nr_pos_exports;
>>         ubyte                   nr_param_exports;
>> @@ -605,22 +589,24 @@ struct si_shader {
>>         struct si_shader_key            key;
>>         struct util_queue_fence         ready;
>>         bool                            compilation_failed;
>>         bool                            is_monolithic;
>>         bool                            is_optimized;
>>         bool                            is_binary_shared;
>>         bool                            is_gs_copy_shader;
>>
>>         /* The following data is all that's needed for binary shaders. */
>>         struct ac_shader_binary binary;
>> -       struct si_shader_config         config;
>> +       struct ac_shader_config         config;
>>         struct si_shader_info           info;
>> +       unsigned                        private_mem_vgprs;
>> +       unsigned                        max_simd_waves;
>>
>
> The shader cache stores "config" but not these new members.
>

My updated version of the patch moves these 2 variables to si_shader_info,
which trivially resolves the concern.

Marek
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190612/99cf5fb0/attachment-0001.html>