Mesa (master): radeonsi/nir: add nir support for compiling compute shaders

Tue Feb 6 21:46:48 UTC 2018

Module: Mesa
Branch: master
Commit: e2ea9e11910a10ae10d6491d913ab1d57f97a0d4
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e2ea9e11910a10ae10d6491d913ab1d57f97a0d4

Author: Timothy Arceri <tarceri at itsqueeze.com>
Date:   Fri Feb  2 10:09:47 2018 +1100

radeonsi/nir: add nir support for compiling compute shaders

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

---

 src/gallium/drivers/radeonsi/si_compute.c | 44 ++++++++++++++++++++++---------
 src/gallium/drivers/radeonsi/si_compute.h |  5 +++-
 src/gallium/drivers/radeonsi/si_get.c     |  8 +++---
 3 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 91d6810a91..41927988cf 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -98,8 +98,19 @@ static void si_create_compute_state_async(void *job, int thread_index)
 	memset(&sel, 0, sizeof(sel));
 
 	sel.screen = program->screen;
-	tgsi_scan_shader(program->tokens, &sel.info);
-	sel.tokens = program->tokens;
+
+	if (program->ir_type == PIPE_SHADER_IR_TGSI) {
+		tgsi_scan_shader(program->ir.tgsi, &sel.info);
+		sel.tokens = program->ir.tgsi;
+	} else {
+		assert(program->ir_type == PIPE_SHADER_IR_NIR);
+		sel.nir = program->ir.nir;
+
+		si_nir_scan_shader(sel.nir, &sel.info);
+		si_lower_nir(&sel);
+	}
+
+
 	sel.type = PIPE_SHADER_COMPUTE;
 	sel.local_size = program->local_size;
 	si_get_active_slot_masks(&sel.info,
@@ -141,7 +152,9 @@ static void si_create_compute_state_async(void *job, int thread_index)
 			sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
 	}
 
-	FREE(program->tokens);
+	if (program->ir_type == PIPE_SHADER_IR_TGSI)
+		FREE(program->ir.tgsi);
+
 	program->shader.selector = NULL;
 }
 
@@ -161,11 +174,16 @@ static void *si_create_compute_state(
 	program->input_size = cso->req_input_mem;
 	program->use_code_object_v2 = cso->ir_type == PIPE_SHADER_IR_NATIVE;
 
-	if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
-		program->tokens = tgsi_dup_tokens(cso->prog);
-		if (!program->tokens) {
-			FREE(program);
-			return NULL;
+	if (cso->ir_type != PIPE_SHADER_IR_NATIVE) {
+		if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
+			program->ir.tgsi = tgsi_dup_tokens(cso->prog);
+			if (!program->ir.tgsi) {
+				FREE(program);
+				return NULL;
+			}
+		} else {
+			assert(cso->ir_type == PIPE_SHADER_IR_NIR);
+			program->ir.nir = (struct nir_shader *) cso->prog;
 		}
 
 		program->compiler_ctx_state.debug = sctx->debug;
@@ -230,7 +248,7 @@ static void si_bind_compute_state(struct pipe_context *ctx, void *state)
 		return;
 
 	/* Wait because we need active slot usage masks. */
-	if (program->ir_type == PIPE_SHADER_IR_TGSI)
+	if (program->ir_type != PIPE_SHADER_IR_NATIVE)
 		util_queue_fence_wait(&program->ready);
 
 	si_set_active_descriptors(sctx,
@@ -379,7 +397,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
 	    sctx->cs_shader_state.offset == offset)
 		return true;
 
-	if (program->ir_type == PIPE_SHADER_IR_TGSI) {
+	if (program->ir_type != PIPE_SHADER_IR_NATIVE) {
 		config = &shader->config;
 	} else {
 		unsigned lds_blocks;
@@ -805,7 +823,7 @@ static void si_launch_grid(
 		sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 				 SI_CONTEXT_CS_PARTIAL_FLUSH;
 
-	if (program->ir_type == PIPE_SHADER_IR_TGSI &&
+	if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
 	    program->shader.compilation_failed)
 		return;
 
@@ -870,7 +888,7 @@ static void si_launch_grid(
 					  RADEON_PRIO_COMPUTE_GLOBAL);
 	}
 
-	if (program->ir_type == PIPE_SHADER_IR_TGSI)
+	if (program->ir_type != PIPE_SHADER_IR_NATIVE)
 		si_setup_tgsi_grid(sctx, info);
 
 	si_emit_dispatch_packets(sctx, info);
@@ -891,7 +909,7 @@ static void si_launch_grid(
 
 void si_destroy_compute(struct si_compute *program)
 {
-	if (program->ir_type == PIPE_SHADER_IR_TGSI) {
+	if (program->ir_type != PIPE_SHADER_IR_NATIVE) {
 		util_queue_drop_job(&program->screen->shader_compiler_queue,
 				    &program->ready);
 		util_queue_fence_destroy(&program->ready);
diff --git a/src/gallium/drivers/radeonsi/si_compute.h b/src/gallium/drivers/radeonsi/si_compute.h
index c19b701fc7..ec411588f6 100644
--- a/src/gallium/drivers/radeonsi/si_compute.h
+++ b/src/gallium/drivers/radeonsi/si_compute.h
@@ -33,7 +33,10 @@
 struct si_compute {
 	struct pipe_reference reference;
 	struct si_screen *screen;
-	struct tgsi_token *tokens;
+	union {
+		struct tgsi_token *tgsi;
+		struct nir_shader *nir;
+	} ir;
 	struct util_queue_fence ready;
 	struct si_compiler_ctx_state compiler_ctx_state;
 
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 80023625b7..9a205c07f8 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -721,7 +721,7 @@ static boolean si_vid_is_format_supported(struct pipe_screen *screen,
 static unsigned get_max_threads_per_block(struct si_screen *screen,
 					  enum pipe_shader_ir ir_type)
 {
-	if (ir_type != PIPE_SHADER_IR_TGSI)
+	if (ir_type == PIPE_SHADER_IR_NATIVE)
 		return 256;
 
 	/* Only 16 waves per thread-group on gfx9. */
@@ -869,10 +869,10 @@ static int si_get_compute_param(struct pipe_screen *screen,
 	case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
 		if (ret) {
 			uint64_t *max_variable_threads_per_block = ret;
-			if (ir_type == PIPE_SHADER_IR_TGSI)
-				*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
-			else
+			if (ir_type == PIPE_SHADER_IR_NATIVE)
 				*max_variable_threads_per_block = 0;
+			else
+				*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
 		}
 		return sizeof(uint64_t);
 	}