[Mesa-dev] [PATCH 04/10] radeonsi: use ac_shader_config
Marek Olšák
maraeo at gmail.com
Wed Jun 12 22:50:49 UTC 2019
On Wed, May 8, 2019 at 1:52 AM Marek Olšák <maraeo at gmail.com> wrote:
> On Fri, May 3, 2019 at 7:19 AM Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>
>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>
>> ---
>> src/amd/common/ac_binary.c | 2 +
>> src/gallium/drivers/radeonsi/si_compute.c | 14 +--
>> src/gallium/drivers/radeonsi/si_shader.c | 112 +++-------------------
>> src/gallium/drivers/radeonsi/si_shader.h | 25 +----
>> 4 files changed, 27 insertions(+), 126 deletions(-)
>>
>> diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c
>> index 44251886b5f..d0ca55e0e0d 100644
>> --- a/src/amd/common/ac_binary.c
>> +++ b/src/amd/common/ac_binary.c
>> @@ -218,26 +218,28 @@ void ac_parse_shader_binary_config(const char
>> *data, size_t nbytes,
>> unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i +
>> 4));
>> switch (reg) {
>> case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
>> case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
>> case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
>> case R_00B848_COMPUTE_PGM_RSRC1:
>> case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
>> conf->num_sgprs = MAX2(conf->num_sgprs,
>> (G_00B028_SGPRS(value) + 1) * 8);
>> conf->num_vgprs = MAX2(conf->num_vgprs,
>> (G_00B028_VGPRS(value) + 1) * 4);
>> conf->float_mode = G_00B028_FLOAT_MODE(value);
>> + conf->rsrc1 = value;
>> break;
>> case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
>> conf->lds_size = MAX2(conf->lds_size,
>> G_00B02C_EXTRA_LDS_SIZE(value));
>> break;
>> case R_00B84C_COMPUTE_PGM_RSRC2:
>> conf->lds_size = MAX2(conf->lds_size,
>> G_00B84C_LDS_SIZE(value));
>> + conf->rsrc2 = value;
>> break;
>> case R_0286CC_SPI_PS_INPUT_ENA:
>> conf->spi_ps_input_ena = value;
>> break;
>> case R_0286D0_SPI_PS_INPUT_ADDR:
>> conf->spi_ps_input_addr = value;
>> break;
>> case R_0286E8_SPI_TMPRING_SIZE:
>> case R_00B860_COMPUTE_TMPRING_SIZE:
>> /* WAVESIZE is in units of 256 dwords. */
>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c
>> b/src/gallium/drivers/radeonsi/si_compute.c
>> index 541d7e6f118..02d7bac406a 100644
>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>> @@ -59,21 +59,21 @@ static const amd_kernel_code_t
>> *si_compute_get_code_object(
>> uint64_t symbol_offset)
>> {
>> if (!program->use_code_object_v2) {
>> return NULL;
>> }
>> return (const amd_kernel_code_t*)
>> (program->shader.binary.code + symbol_offset);
>> }
>>
>> static void code_object_to_config(const amd_kernel_code_t *code_object,
>> - struct si_shader_config *out_config) {
>> + struct ac_shader_config *out_config) {
>>
>> uint32_t rsrc1 = code_object->compute_pgm_resource_registers;
>> uint32_t rsrc2 = code_object->compute_pgm_resource_registers >>
>> 32;
>> out_config->num_sgprs = code_object->wavefront_sgpr_count;
>> out_config->num_vgprs = code_object->workitem_vgpr_count;
>> out_config->float_mode = G_00B028_FLOAT_MODE(rsrc1);
>> out_config->rsrc1 = rsrc1;
>> out_config->lds_size = MAX2(out_config->lds_size,
>> G_00B84C_LDS_SIZE(rsrc2));
>> out_config->rsrc2 = rsrc2;
>> out_config->scratch_bytes_per_wave =
>> @@ -241,22 +241,22 @@ static void *si_create_compute_state(
>> const amd_kernel_code_t *code_object =
>> si_compute_get_code_object(program, 0);
>> code_object_to_config(code_object,
>> &program->shader.config);
>> if (program->shader.binary.reloc_count != 0) {
>> fprintf(stderr, "Error: %d unsupported
>> relocations\n",
>>
>> program->shader.binary.reloc_count);
>> FREE(program);
>> return NULL;
>> }
>> } else {
>> -
>> si_shader_binary_read_config(&program->shader.binary,
>> - &program->shader.config, 0);
>> +
>> ac_shader_binary_read_config(&program->shader.binary,
>> + &program->shader.config, 0, false);
>> }
>> si_shader_dump(sctx->screen, &program->shader,
>> &sctx->debug,
>> PIPE_SHADER_COMPUTE, stderr, true);
>> if (si_shader_binary_upload(sctx->screen,
>> &program->shader) < 0) {
>> fprintf(stderr, "LLVM failed to upload shader\n");
>> FREE(program);
>> return NULL;
>> }
>> }
>>
>> @@ -362,21 +362,21 @@ static void si_initialize_compute(struct si_context
>> *sctx)
>> bc_va >> 8);
>> }
>> }
>>
>> sctx->cs_shader_state.emitted_program = NULL;
>> sctx->cs_shader_state.initialized = true;
>> }
>>
>> static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
>> struct si_shader *shader,
>> - struct si_shader_config
>> *config)
>> + struct ac_shader_config
>> *config)
>> {
>> uint64_t scratch_bo_size, scratch_needed;
>> scratch_bo_size = 0;
>> scratch_needed = config->scratch_bytes_per_wave *
>> sctx->scratch_waves;
>> if (sctx->compute_scratch_buffer)
>> scratch_bo_size =
>> sctx->compute_scratch_buffer->b.b.width0;
>>
>> if (scratch_bo_size < scratch_needed) {
>> si_resource_reference(&sctx->compute_scratch_buffer,
>> NULL);
>>
>> @@ -405,38 +405,38 @@ static bool si_setup_compute_scratch_buffer(struct
>> si_context *sctx,
>> return true;
>> }
>>
>> static bool si_switch_compute_shader(struct si_context *sctx,
>> struct si_compute *program,
>> struct si_shader *shader,
>> const amd_kernel_code_t *code_object,
>> unsigned offset)
>> {
>> struct radeon_cmdbuf *cs = sctx->gfx_cs;
>> - struct si_shader_config inline_config = {0};
>> - struct si_shader_config *config;
>> + struct ac_shader_config inline_config = {0};
>> + struct ac_shader_config *config;
>> uint64_t shader_va;
>>
>> if (sctx->cs_shader_state.emitted_program == program &&
>> sctx->cs_shader_state.offset == offset)
>> return true;
>>
>> if (program->ir_type != PIPE_SHADER_IR_NATIVE) {
>> config = &shader->config;
>> } else {
>> unsigned lds_blocks;
>>
>> config = &inline_config;
>> if (code_object) {
>> code_object_to_config(code_object, config);
>> } else {
>> - si_shader_binary_read_config(&shader->binary,
>> config, offset);
>> + ac_shader_binary_read_config(&shader->binary,
>> config, offset, false);
>> }
>>
>> lds_blocks = config->lds_size;
>> /* XXX: We are over allocating LDS. For SI, the shader
>> reports
>> * LDS in blocks of 256 bytes, so if there are 4 bytes lds
>> * allocated in the shader and 4 bytes allocated by the
>> state
>> * tracker, then we will set LDS_SIZE to 512 bytes rather
>> than 256.
>> */
>> if (sctx->chip_class <= SI) {
>> lds_blocks += align(program->local_size, 256) >>
>> 8;
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>> b/src/gallium/drivers/radeonsi/si_shader.c
>> index f6d882cf583..da43447013d 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -4962,104 +4962,20 @@ static void si_llvm_emit_polygon_stipple(struct
>> si_shader_context *ctx,
>> /* The stipple pattern is 32x32, each row has 32 bits. */
>> offset = LLVMBuildMul(builder, address[1],
>> LLVMConstInt(ctx->i32, 4, 0), "");
>> row = buffer_load_const(ctx, desc, offset);
>> row = ac_to_integer(&ctx->ac, row);
>> bit = LLVMBuildLShr(builder, row, address[0], "");
>> bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
>> ac_build_kill_if_false(&ctx->ac, bit);
>> }
>>
>> -void si_shader_binary_read_config(struct ac_shader_binary *binary,
>> - struct si_shader_config *conf,
>> - unsigned symbol_offset)
>> -{
>> - unsigned i;
>> - const unsigned char *config =
>> - ac_shader_binary_config_start(binary, symbol_offset);
>> - bool really_needs_scratch = false;
>> -
>> - /* LLVM adds SGPR spills to the scratch size.
>> - * Find out if we really need the scratch buffer.
>> - */
>> - for (i = 0; i < binary->reloc_count; i++) {
>> - const struct ac_shader_reloc *reloc = &binary->relocs[i];
>> -
>> - if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
>> - !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
>> - really_needs_scratch = true;
>> - break;
>> - }
>> - }
>> -
>> - /* XXX: We may be able to emit some of these values directly
>> rather than
>> - * extracting fields to be emitted later.
>> - */
>> -
>> - for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
>> - unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
>> - unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i
>> + 4));
>> - switch (reg) {
>> - case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
>> - case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
>> - case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
>> - case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
>> - case R_00B848_COMPUTE_PGM_RSRC1:
>> - conf->num_sgprs = MAX2(conf->num_sgprs,
>> (G_00B028_SGPRS(value) + 1) * 8);
>> - conf->num_vgprs = MAX2(conf->num_vgprs,
>> (G_00B028_VGPRS(value) + 1) * 4);
>> - conf->float_mode = G_00B028_FLOAT_MODE(value);
>> - conf->rsrc1 = value;
>> - break;
>> - case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
>> - conf->lds_size = MAX2(conf->lds_size,
>> G_00B02C_EXTRA_LDS_SIZE(value));
>> - break;
>> - case R_00B84C_COMPUTE_PGM_RSRC2:
>> - conf->lds_size = MAX2(conf->lds_size,
>> G_00B84C_LDS_SIZE(value));
>> - conf->rsrc2 = value;
>> - break;
>> - case R_0286CC_SPI_PS_INPUT_ENA:
>> - conf->spi_ps_input_ena = value;
>> - break;
>> - case R_0286D0_SPI_PS_INPUT_ADDR:
>> - conf->spi_ps_input_addr = value;
>> - break;
>> - case R_0286E8_SPI_TMPRING_SIZE:
>> - case R_00B860_COMPUTE_TMPRING_SIZE:
>> - /* WAVESIZE is in units of 256 dwords. */
>> - if (really_needs_scratch)
>> - conf->scratch_bytes_per_wave =
>> - G_00B860_WAVESIZE(value) * 256 *
>> 4;
>> - break;
>> - case 0x4: /* SPILLED_SGPRS */
>> - conf->spilled_sgprs = value;
>> - break;
>> - case 0x8: /* SPILLED_VGPRS */
>> - conf->spilled_vgprs = value;
>> - break;
>> - default:
>> - {
>> - static bool printed;
>> -
>> - if (!printed) {
>> - fprintf(stderr, "Warning: LLVM
>> emitted unknown "
>> - "config register:
>> 0x%x\n", reg);
>> - printed = true;
>> - }
>> - }
>> - break;
>> - }
>> - }
>> -
>> - if (!conf->spi_ps_input_addr)
>> - conf->spi_ps_input_addr = conf->spi_ps_input_ena;
>> -}
>> -
>> void si_shader_apply_scratch_relocs(struct si_shader *shader,
>> uint64_t scratch_va)
>> {
>> unsigned i;
>> uint32_t scratch_rsrc_dword0 = scratch_va;
>> uint32_t scratch_rsrc_dword1 =
>> S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
>>
>> /* Enable scratch coalescing. */
>> scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
>> @@ -5213,21 +5129,21 @@ static void si_shader_dump_disassembly(const
>> struct ac_shader_binary *binary,
>> fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
>> binary->code[i + 3], binary->code[i + 2],
>> binary->code[i + 1], binary->code[i]);
>> }
>> }
>> }
>>
>> static void si_calculate_max_simd_waves(struct si_shader *shader)
>> {
>> struct si_screen *sscreen = shader->selector->screen;
>> - struct si_shader_config *conf = &shader->config;
>> + struct ac_shader_config *conf = &shader->config;
>> unsigned num_inputs = shader->selector->info.num_inputs;
>> unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 :
>> 256;
>> unsigned lds_per_wave = 0;
>> unsigned max_simd_waves;
>>
>> max_simd_waves = ac_get_max_simd_waves(sscreen->info.family);
>>
>> /* Compute LDS usage for PS. */
>> switch (shader->selector->type) {
>> case PIPE_SHADER_FRAGMENT:
>> @@ -5262,46 +5178,46 @@ static void si_calculate_max_simd_waves(struct
>> si_shader *shader)
>> }
>>
>> if (conf->num_vgprs)
>> max_simd_waves = MIN2(max_simd_waves, 256 /
>> conf->num_vgprs);
>>
>> /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage
>> above
>> * 16KB makes some SIMDs unoccupied). */
>> if (lds_per_wave)
>> max_simd_waves = MIN2(max_simd_waves, 16384 /
>> lds_per_wave);
>>
>> - conf->max_simd_waves = max_simd_waves;
>> + shader->max_simd_waves = max_simd_waves;
>> }
>>
>> void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
>> struct pipe_debug_callback *debug)
>> {
>> - const struct si_shader_config *conf = &shader->config;
>> + const struct ac_shader_config *conf = &shader->config;
>>
>> pipe_debug_message(debug, SHADER_INFO,
>> "Shader Stats: SGPRS: %d VGPRS: %d Code Size:
>> %d "
>> "LDS: %d Scratch: %d Max Waves: %d Spilled
>> SGPRs: %d "
>> "Spilled VGPRs: %d PrivMem VGPRs: %d",
>> conf->num_sgprs, conf->num_vgprs,
>> si_get_shader_binary_size(shader),
>> conf->lds_size, conf->scratch_bytes_per_wave,
>> - conf->max_simd_waves, conf->spilled_sgprs,
>> - conf->spilled_vgprs, conf->private_mem_vgprs);
>> + shader->max_simd_waves, conf->spilled_sgprs,
>> + conf->spilled_vgprs,
>> shader->private_mem_vgprs);
>> }
>>
>> static void si_shader_dump_stats(struct si_screen *sscreen,
>> const struct si_shader *shader,
>> unsigned processor,
>> FILE *file,
>> bool check_debug_option)
>> {
>> - const struct si_shader_config *conf = &shader->config;
>> + const struct ac_shader_config *conf = &shader->config;
>>
>> if (!check_debug_option ||
>> si_can_dump_shader(sscreen, processor)) {
>> if (processor == PIPE_SHADER_FRAGMENT) {
>> fprintf(file, "*** SHADER CONFIG ***\n"
>> "SPI_PS_INPUT_ADDR = 0x%04x\n"
>> "SPI_PS_INPUT_ENA = 0x%04x\n",
>> conf->spi_ps_input_addr,
>> conf->spi_ps_input_ena);
>> }
>>
>> @@ -5311,24 +5227,24 @@ static void si_shader_dump_stats(struct si_screen
>> *sscreen,
>> "Spilled SGPRs: %d\n"
>> "Spilled VGPRs: %d\n"
>> "Private memory VGPRs: %d\n"
>> "Code Size: %d bytes\n"
>> "LDS: %d blocks\n"
>> "Scratch: %d bytes per wave\n"
>> "Max Waves: %d\n"
>> "********************\n\n\n",
>> conf->num_sgprs, conf->num_vgprs,
>> conf->spilled_sgprs, conf->spilled_vgprs,
>> - conf->private_mem_vgprs,
>> + shader->private_mem_vgprs,
>> si_get_shader_binary_size(shader),
>> conf->lds_size, conf->scratch_bytes_per_wave,
>> - conf->max_simd_waves);
>> + shader->max_simd_waves);
>> }
>> }
>>
>> const char *si_get_shader_name(const struct si_shader *shader, unsigned
>> processor)
>> {
>> switch (processor) {
>> case PIPE_SHADER_VERTEX:
>> if (shader->key.as_es)
>> return "Vertex Shader as ES";
>> else if (shader->key.as_ls)
>> @@ -5399,21 +5315,21 @@ void si_shader_dump(struct si_screen *sscreen,
>> const struct si_shader *shader,
>> debug, "epilog", file);
>> fprintf(file, "\n");
>> }
>>
>> si_shader_dump_stats(sscreen, shader, processor, file,
>> check_debug_option);
>> }
>>
>> static int si_compile_llvm(struct si_screen *sscreen,
>> struct ac_shader_binary *binary,
>> - struct si_shader_config *conf,
>> + struct ac_shader_config *conf,
>> struct ac_llvm_compiler *compiler,
>> LLVMModuleRef mod,
>> struct pipe_debug_callback *debug,
>> unsigned processor,
>> const char *name,
>> bool less_optimized)
>> {
>> int r = 0;
>> unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
>>
>> @@ -5433,21 +5349,21 @@ static int si_compile_llvm(struct si_screen
>> *sscreen,
>> LLVMDisposeMessage(ir);
>> }
>>
>> if (!si_replace_shader(count, binary)) {
>> r = si_llvm_compile(mod, binary, compiler, debug,
>> less_optimized);
>> if (r)
>> return r;
>> }
>>
>> - si_shader_binary_read_config(binary, conf, 0);
>> + ac_shader_binary_read_config(binary, conf, 0, false);
>>
>> /* Enable 64-bit and 16-bit denormals, because there is no
>> performance
>> * cost.
>> *
>> * If denormals are enabled, all floating-point output modifiers
>> are
>> * ignored.
>> *
>> * Don't enable denormals for 32-bit floats, because:
>> * - Floating-point output modifiers would be ignored by the hw.
>> * - Some opcodes don't support denormals, such as v_mad_f32. We
>> would
>> @@ -6799,21 +6715,21 @@ int si_compile_tgsi_shader(struct si_screen
>> *sscreen,
>> need_prolog ? 1 : 0, 0);
>> }
>>
>> si_llvm_optimize_module(&ctx);
>>
>> /* Post-optimization transformations and analysis. */
>> si_optimize_vs_outputs(&ctx);
>>
>> if ((debug && debug->debug_message) ||
>> si_can_dump_shader(sscreen, ctx.type)) {
>> - ctx.shader->config.private_mem_vgprs =
>> + ctx.shader->private_mem_vgprs =
>> ac_count_scratch_private_memory(ctx.main_fn);
>> }
>>
>> /* Make sure the input is a pointer and not integer followed by
>> inttoptr. */
>> assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0)))
>> ==
>> LLVMPointerTypeKind);
>>
>> /* Compile to bytecode. */
>> r = si_compile_llvm(sscreen, &shader->binary, &shader->config,
>> compiler,
>> ctx.ac.module, debug, ctx.type,
>> @@ -7954,23 +7870,23 @@ int si_shader_create(struct si_screen *sscreen,
>> struct ac_llvm_compiler *compile
>> shader->config.num_sgprs =
>> MAX2(shader->config.num_sgprs,
>>
>> shader->previous_stage->config.num_sgprs);
>> shader->config.num_vgprs =
>> MAX2(shader->config.num_vgprs,
>>
>> shader->previous_stage->config.num_vgprs);
>> shader->config.spilled_sgprs =
>> MAX2(shader->config.spilled_sgprs,
>>
>> shader->previous_stage->config.spilled_sgprs);
>> shader->config.spilled_vgprs =
>> MAX2(shader->config.spilled_vgprs,
>>
>> shader->previous_stage->config.spilled_vgprs);
>> - shader->config.private_mem_vgprs =
>> - MAX2(shader->config.private_mem_vgprs,
>> -
>> shader->previous_stage->config.private_mem_vgprs);
>> + shader->private_mem_vgprs =
>> + MAX2(shader->private_mem_vgprs,
>> +
>> shader->previous_stage->private_mem_vgprs);
>> shader->config.scratch_bytes_per_wave =
>>
>> MAX2(shader->config.scratch_bytes_per_wave,
>>
>> shader->previous_stage->config.scratch_bytes_per_wave);
>> shader->info.uses_instanceid |=
>>
>> shader->previous_stage->info.uses_instanceid;
>> }
>> if (shader->prolog2) {
>> shader->config.num_sgprs =
>> MAX2(shader->config.num_sgprs,
>>
>> shader->prolog2->config.num_sgprs);
>> shader->config.num_vgprs =
>> MAX2(shader->config.num_vgprs,
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
>> b/src/gallium/drivers/radeonsi/si_shader.h
>> index ecf7f8bbd7a..6c8f70dc94b 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>> @@ -552,36 +552,20 @@ struct si_shader_key {
>> * but forces monolithic shaders to be used as soon as
>> * possible, because it's in the "opt" group.
>> */
>> unsigned prefer_mono:1;
>> } opt;
>> };
>>
>> /* Restore the pack alignment to default. */
>> #pragma pack(pop)
>>
>> -struct si_shader_config {
>> - unsigned num_sgprs;
>> - unsigned num_vgprs;
>> - unsigned spilled_sgprs;
>> - unsigned spilled_vgprs;
>> - unsigned private_mem_vgprs;
>> - unsigned lds_size;
>> - unsigned max_simd_waves;
>> - unsigned spi_ps_input_ena;
>> - unsigned spi_ps_input_addr;
>> - unsigned float_mode;
>> - unsigned scratch_bytes_per_wave;
>> - unsigned rsrc1;
>> - unsigned rsrc2;
>> -};
>> -
>> /* GCN-specific shader info. */
>> struct si_shader_info {
>> ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
>> ubyte num_input_sgprs;
>> ubyte num_input_vgprs;
>> signed char face_vgpr_index;
>> signed char ancillary_vgpr_index;
>> bool uses_instanceid;
>> ubyte nr_pos_exports;
>> ubyte nr_param_exports;
>> @@ -605,22 +589,24 @@ struct si_shader {
>> struct si_shader_key key;
>> struct util_queue_fence ready;
>> bool compilation_failed;
>> bool is_monolithic;
>> bool is_optimized;
>> bool is_binary_shared;
>> bool is_gs_copy_shader;
>>
>> /* The following data is all that's needed for binary shaders. */
>> struct ac_shader_binary binary;
>> - struct si_shader_config config;
>> + struct ac_shader_config config;
>> struct si_shader_info info;
>> + unsigned private_mem_vgprs;
>> + unsigned max_simd_waves;
>>
>
> The shader cache stores "config" but not these new members.
>
My updated version of the patch moves these 2 variables to si_shader_info,
which trivially resolves the concern.
Marek
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190612/99cf5fb0/attachment-0001.html>
More information about the mesa-dev
mailing list