[Mesa-dev] [PATCH 31/61] radeonsi: add si_shader::prolog2

Marek Olšák maraeo at gmail.com
Mon Apr 24 08:45:28 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

For a GS prolog in merged ES-GS.
---
 src/gallium/drivers/radeonsi/si_debug.c  |  4 ++++
 src/gallium/drivers/radeonsi/si_shader.c | 21 ++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_shader.h |  1 +
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 038c8b4..9634901 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -636,20 +636,24 @@ static void si_print_annotated_shader(struct si_shader *shader,
 		calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
 
 	if (shader->prolog) {
 		si_add_split_disasm(shader->prolog->binary.disasm_string,
 				    start_addr, &num_inst, instructions);
 	}
 	if (shader->previous_stage) {
 		si_add_split_disasm(shader->previous_stage->binary.disasm_string,
 				    start_addr, &num_inst, instructions);
 	}
+	if (shader->prolog2) {
+		si_add_split_disasm(shader->prolog2->binary.disasm_string,
+				    start_addr, &num_inst, instructions);
+	}
 	si_add_split_disasm(shader->binary.disasm_string,
 			    start_addr, &num_inst, instructions);
 	if (shader->epilog) {
 		si_add_split_disasm(shader->epilog->binary.disasm_string,
 				    start_addr, &num_inst, instructions);
 	}
 
 	fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
 		si_get_shader_name(shader, shader->selector->type));
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 9e51622..27cd0f2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6341,41 +6341,47 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
 }
 
 static unsigned si_get_shader_binary_size(struct si_shader *shader)
 {
 	unsigned size = shader->binary.code_size;
 
 	if (shader->prolog)
 		size += shader->prolog->binary.code_size;
 	if (shader->previous_stage)
 		size += shader->previous_stage->binary.code_size;
+	if (shader->prolog2)
+		size += shader->prolog2->binary.code_size;
 	if (shader->epilog)
 		size += shader->epilog->binary.code_size;
 	return size;
 }
 
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
 {
 	const struct ac_shader_binary *prolog =
 		shader->prolog ? &shader->prolog->binary : NULL;
 	const struct ac_shader_binary *previous_stage =
 		shader->previous_stage ? &shader->previous_stage->binary : NULL;
+	const struct ac_shader_binary *prolog2 =
+		shader->prolog2 ? &shader->prolog2->binary : NULL;
 	const struct ac_shader_binary *epilog =
 		shader->epilog ? &shader->epilog->binary : NULL;
 	const struct ac_shader_binary *mainb = &shader->binary;
 	unsigned bo_size = si_get_shader_binary_size(shader) +
 			   (!epilog ? mainb->rodata_size : 0);
 	unsigned char *ptr;
 
 	assert(!prolog || !prolog->rodata_size);
 	assert(!previous_stage || !previous_stage->rodata_size);
-	assert((!prolog && !previous_stage && !epilog) || !mainb->rodata_size);
+	assert(!prolog2 || !prolog2->rodata_size);
+	assert((!prolog && !previous_stage && !prolog2 && !epilog) ||
+	       !mainb->rodata_size);
 	assert(!epilog || !epilog->rodata_size);
 
 	/* GFX9 can fetch at most 128 bytes past the end of the shader.
 	 * Prevent VM faults.
 	 */
 	if (sscreen->b.chip_class >= GFX9)
 		bo_size += 128;
 
 	r600_resource_reference(&shader->bo, NULL);
 	shader->bo = (struct r600_resource*)
@@ -6391,20 +6397,24 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
 
 	if (prolog) {
 		util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
 		ptr += prolog->code_size;
 	}
 	if (previous_stage) {
 		util_memcpy_cpu_to_le32(ptr, previous_stage->code,
 					previous_stage->code_size);
 		ptr += previous_stage->code_size;
 	}
+	if (prolog2) {
+		util_memcpy_cpu_to_le32(ptr, prolog2->code, prolog2->code_size);
+		ptr += prolog2->code_size;
+	}
 
 	util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
 	ptr += mainb->code_size;
 
 	if (epilog)
 		util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size);
 	else if (mainb->rodata_size > 0)
 		util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size);
 
 	sscreen->b.ws->buffer_unmap(shader->bo->buf);
@@ -6601,20 +6611,23 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
 	    (r600_can_dump_shader(&sscreen->b, processor) &&
 	     !(sscreen->b.debug_flags & DBG_NO_ASM))) {
 		fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
 
 		if (shader->prolog)
 			si_shader_dump_disassembly(&shader->prolog->binary,
 						   debug, "prolog", file);
 		if (shader->previous_stage)
 			si_shader_dump_disassembly(&shader->previous_stage->binary,
 						   debug, "previous stage", file);
+		if (shader->prolog2)
+			si_shader_dump_disassembly(&shader->prolog2->binary,
+						   debug, "prolog2", file);
 
 		si_shader_dump_disassembly(&shader->binary, debug, "main", file);
 
 		if (shader->epilog)
 			si_shader_dump_disassembly(&shader->epilog->binary,
 						   debug, "epilog", file);
 		fprintf(file, "\n");
 	}
 
 	si_shader_dump_stats(sscreen, shader, debug, processor, file,
@@ -9138,20 +9151,26 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 				     shader->previous_stage->config.spilled_vgprs);
 			shader->config.private_mem_vgprs =
 				MAX2(shader->config.private_mem_vgprs,
 				     shader->previous_stage->config.private_mem_vgprs);
 			shader->config.scratch_bytes_per_wave =
 				MAX2(shader->config.scratch_bytes_per_wave,
 				     shader->previous_stage->config.scratch_bytes_per_wave);
 			shader->info.uses_instanceid |=
 				shader->previous_stage->info.uses_instanceid;
 		}
+		if (shader->prolog2) {
+			shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+							shader->prolog2->config.num_sgprs);
+			shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+							shader->prolog2->config.num_vgprs);
+		}
 		if (shader->epilog) {
 			shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
 							shader->epilog->config.num_sgprs);
 			shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
 							shader->epilog->config.num_vgprs);
 		}
 	}
 
 	si_fix_resource_usage(sscreen, shader);
 	si_shader_dump(sscreen, shader, debug, sel->info.processor,
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 39eee86..76f7743 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -478,20 +478,21 @@ struct si_shader_info {
 };
 
 struct si_shader {
 	struct si_compiler_ctx_state	compiler_ctx_state;
 
 	struct si_shader_selector	*selector;
 	struct si_shader		*next_variant;
 
 	struct si_shader_part		*prolog;
 	struct si_shader		*previous_stage; /* for GFX9 */
+	struct si_shader_part		*prolog2;
 	struct si_shader_part		*epilog;
 
 	struct si_pm4_state		*pm4;
 	struct r600_resource		*bo;
 	struct r600_resource		*scratch_bo;
 	struct si_shader_key		key;
 	struct util_queue_fence		optimized_ready;
 	bool				compilation_failed;
 	bool				is_monolithic;
 	bool				is_optimized;
-- 
2.7.4



More information about the mesa-dev mailing list