[Mesa-dev] [PATCH 16/61] radeonsi/gfx9: add si_shader::previous_stage for merged shaders
Marek Olšák
maraeo at gmail.com
Mon Apr 24 08:45:13 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_debug.c | 4 ++++
src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++++++++++++++++++++++++++-
src/gallium/drivers/radeonsi/si_shader.h | 1 +
3 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 9d0c0c5..038c8b4 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -632,20 +632,24 @@ static void si_print_annotated_shader(struct si_shader *shader,
* Buffer size / 4 is the upper bound of the instruction count.
*/
unsigned num_inst = 0;
struct si_shader_inst *instructions =
calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
if (shader->prolog) {
si_add_split_disasm(shader->prolog->binary.disasm_string,
start_addr, &num_inst, instructions);
}
+ if (shader->previous_stage) {
+ si_add_split_disasm(shader->previous_stage->binary.disasm_string,
+ start_addr, &num_inst, instructions);
+ }
si_add_split_disasm(shader->binary.disasm_string,
start_addr, &num_inst, instructions);
if (shader->epilog) {
si_add_split_disasm(shader->epilog->binary.disasm_string,
start_addr, &num_inst, instructions);
}
fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
si_get_shader_name(shader, shader->selector->type));
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index f6cd313..ffec302 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6141,38 +6141,43 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
}
}
}
static unsigned si_get_shader_binary_size(struct si_shader *shader)
{
unsigned size = shader->binary.code_size;
if (shader->prolog)
size += shader->prolog->binary.code_size;
+ if (shader->previous_stage)
+ size += shader->previous_stage->binary.code_size;
if (shader->epilog)
size += shader->epilog->binary.code_size;
return size;
}
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
{
const struct ac_shader_binary *prolog =
shader->prolog ? &shader->prolog->binary : NULL;
+ const struct ac_shader_binary *previous_stage =
+ shader->previous_stage ? &shader->previous_stage->binary : NULL;
const struct ac_shader_binary *epilog =
shader->epilog ? &shader->epilog->binary : NULL;
const struct ac_shader_binary *mainb = &shader->binary;
unsigned bo_size = si_get_shader_binary_size(shader) +
(!epilog ? mainb->rodata_size : 0);
unsigned char *ptr;
assert(!prolog || !prolog->rodata_size);
- assert((!prolog && !epilog) || !mainb->rodata_size);
+ assert(!previous_stage || !previous_stage->rodata_size);
+ assert((!prolog && !previous_stage && !epilog) || !mainb->rodata_size);
assert(!epilog || !epilog->rodata_size);
/* GFX9 can fetch at most 128 bytes past the end of the shader.
* Prevent VM faults.
*/
if (sscreen->b.chip_class >= GFX9)
bo_size += 128;
r600_resource_reference(&shader->bo, NULL);
shader->bo = (struct r600_resource*)
@@ -6183,20 +6188,25 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
return -ENOMEM;
/* Upload. */
ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE);
if (prolog) {
util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
ptr += prolog->code_size;
}
+ if (previous_stage) {
+ util_memcpy_cpu_to_le32(ptr, previous_stage->code,
+ previous_stage->code_size);
+ ptr += previous_stage->code_size;
+ }
util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
ptr += mainb->code_size;
if (epilog)
util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size);
else if (mainb->rodata_size > 0)
util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size);
sscreen->b.ws->buffer_unmap(shader->bo->buf);
@@ -6390,20 +6400,23 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
}
if (!check_debug_option ||
(r600_can_dump_shader(&sscreen->b, processor) &&
!(sscreen->b.debug_flags & DBG_NO_ASM))) {
fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
if (shader->prolog)
si_shader_dump_disassembly(&shader->prolog->binary,
debug, "prolog", file);
+ if (shader->previous_stage)
+ si_shader_dump_disassembly(&shader->previous_stage->binary,
+ debug, "previous stage", file);
si_shader_dump_disassembly(&shader->binary, debug, "main", file);
if (shader->epilog)
si_shader_dump_disassembly(&shader->epilog->binary,
debug, "epilog", file);
fprintf(file, "\n");
}
si_shader_dump_stats(sscreen, shader, debug, processor, file,
@@ -8702,20 +8715,40 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
break;
}
/* Update SGPR and VGPR counts. */
if (shader->prolog) {
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
shader->prolog->config.num_sgprs);
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
shader->prolog->config.num_vgprs);
}
+ if (shader->previous_stage) {
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+ shader->previous_stage->config.num_sgprs);
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->previous_stage->config.num_vgprs);
+ shader->config.spilled_sgprs =
+ MAX2(shader->config.spilled_sgprs,
+ shader->previous_stage->config.spilled_sgprs);
+ shader->config.spilled_vgprs =
+ MAX2(shader->config.spilled_vgprs,
+ shader->previous_stage->config.spilled_vgprs);
+ shader->config.private_mem_vgprs =
+ MAX2(shader->config.private_mem_vgprs,
+ shader->previous_stage->config.private_mem_vgprs);
+ shader->config.scratch_bytes_per_wave =
+ MAX2(shader->config.scratch_bytes_per_wave,
+ shader->previous_stage->config.scratch_bytes_per_wave);
+ shader->info.uses_instanceid |=
+ shader->previous_stage->info.uses_instanceid;
+ }
if (shader->epilog) {
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
shader->epilog->config.num_sgprs);
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
shader->epilog->config.num_vgprs);
}
}
si_fix_resource_usage(sscreen, shader);
si_shader_dump(sscreen, shader, debug, sel->info.processor,
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 582c427..92293c4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -510,20 +510,21 @@ struct si_shader_info {
ubyte nr_param_exports;
};
struct si_shader {
struct si_compiler_ctx_state compiler_ctx_state;
struct si_shader_selector *selector;
struct si_shader *next_variant;
struct si_shader_part *prolog;
+ struct si_shader *previous_stage; /* for GFX9 */
struct si_shader_part *epilog;
struct si_pm4_state *pm4;
struct r600_resource *bo;
struct r600_resource *scratch_bo;
struct si_shader_key key;
struct util_queue_fence optimized_ready;
bool compilation_failed;
bool is_monolithic;
bool is_optimized;
--
2.7.4
More information about the mesa-dev
mailing list