Mesa (master): turnip: implement VK_EXT_shader_demote_to_helper_invocation

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Apr 19 17:41:37 UTC 2021


Module: Mesa
Branch: master
Commit: 9dd9424a85f761961a176c21740ad9158fed2304
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9dd9424a85f761961a176c21740ad9158fed2304

Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date:   Fri Mar  5 20:01:34 2021 +0200

turnip: implement VK_EXT_shader_demote_to_helper_invocation

The "demote" intrinsic has the semantics of D3D discard, which means
it doesn't change the control flow, allowing derivatives to work.

On A6xx there is no known way to check whether invocation was demoted,
thus we use nir_lower_is_helper_invocation.

Add "logical" OPC_DEMOTE which is later translated to "kill".
Such separation is necessary to run "kill" specific optimizations
which are invalid for "demote".

Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
Reviewed-by: Eric Anholt <eric at anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9460>

---

 src/freedreno/ir3/disasm-a3xx.c       |  1 +
 src/freedreno/ir3/instr-a3xx.h        |  3 +++
 src/freedreno/ir3/ir3.h               |  5 +++--
 src/freedreno/ir3/ir3_compiler_nir.c  | 15 ++++++++++++---
 src/freedreno/ir3/ir3_postsched.c     |  4 ++--
 src/freedreno/ir3/ir3_sched.c         |  8 ++++----
 src/freedreno/isa/encode.c            |  2 ++
 src/freedreno/vulkan/tu_device.c      |  6 ++++++
 src/freedreno/vulkan/tu_extensions.py |  1 +
 src/freedreno/vulkan/tu_shader.c      |  3 +++
 10 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index 575a6d49ff8..3f2678376ee 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -164,6 +164,7 @@ static const struct opc_info {
 	OPC(0, OPC_CALL,         call),
 	OPC(0, OPC_RET,          ret),
 	OPC(0, OPC_KILL,         kill),
+	OPC(0, OPC_DEMOTE,       demote),
 	OPC(0, OPC_END,          end),
 	OPC(0, OPC_EMIT,         emit),
 	OPC(0, OPC_CUT,          cut),
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index b7f2f95ccda..e7f31fd3b82 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -90,6 +90,9 @@ typedef enum {
 	OPC_BALL            = _OPC(0, 45),
 	OPC_BRAX            = _OPC(0, 46),
 
+	/* Logical opcode to distinguish kill and demote */
+	OPC_DEMOTE          = _OPC(0, 47),
+
 	/* category 1: */
 	OPC_MOV             = _OPC(1, 0),
 	OPC_MOVP            = _OPC(1, 1),
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 87c81449587..43056371f63 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -668,9 +668,9 @@ static inline bool is_flow(struct ir3_instruction *instr)
 	return (opc_cat(instr->opc) == 0);
 }
 
-static inline bool is_kill(struct ir3_instruction *instr)
+static inline bool is_kill_or_demote(struct ir3_instruction *instr)
 {
-	return instr->opc == OPC_KILL;
+	return instr->opc == OPC_KILL || instr->opc == OPC_DEMOTE;
 }
 
 static inline bool is_nop(struct ir3_instruction *instr)
@@ -1591,6 +1591,7 @@ ir3_##name(struct ir3_block *block,                                      \
 INSTR1(B)
 INSTR0(JUMP)
 INSTR1(KILL)
+INSTR1(DEMOTE)
 INSTR0(END)
 INSTR0(CHSH)
 INSTR0(CHMASK)
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 8065c302b46..698bb3477f9 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1950,10 +1950,13 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 		}
 		break;
 	case nir_intrinsic_discard_if:
-	case nir_intrinsic_discard: {
+	case nir_intrinsic_discard:
+	case nir_intrinsic_demote:
+	case nir_intrinsic_demote_if: {
 		struct ir3_instruction *cond, *kill;
 
-		if (intr->intrinsic == nir_intrinsic_discard_if) {
+		if (intr->intrinsic == nir_intrinsic_discard_if ||
+			intr->intrinsic == nir_intrinsic_demote_if) {
 			/* conditional discard: */
 			src = ir3_get_src(ctx, &intr->src[0]);
 			cond = src[0];
@@ -1970,7 +1973,13 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 		cond->regs[0]->num = regid(REG_P0, 0);
 		cond->regs[0]->flags &= ~IR3_REG_SSA;
 
-		kill = ir3_KILL(b, cond, 0);
+		if (intr->intrinsic == nir_intrinsic_demote ||
+			intr->intrinsic == nir_intrinsic_demote_if) {
+			kill = ir3_DEMOTE(b, cond, 0);
+		} else {
+			kill = ir3_KILL(b, cond, 0);
+		}
+
 		/* Side-effects should not be moved on a different side of the kill */
 		kill->barrier_class = IR3_BARRIER_IMAGE_W | IR3_BARRIER_BUFFER_W;
 		kill->barrier_conflict = IR3_BARRIER_IMAGE_W | IR3_BARRIER_BUFFER_W;
diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c
index 614f1bf2062..76b428c67e3 100644
--- a/src/freedreno/ir3/ir3_postsched.c
+++ b/src/freedreno/ir3/ir3_postsched.c
@@ -197,7 +197,7 @@ choose_instr(struct ir3_postsched_ctx *ctx)
 		if (d > 0)
 			continue;
 
-		if (!is_kill(n->instr))
+		if (!is_kill_or_demote(n->instr))
 			continue;
 
 		if (!chosen || (chosen->max_delay < n->max_delay))
@@ -562,7 +562,7 @@ sched_dag_init(struct ir3_postsched_ctx *ctx)
 
 		if (is_input(instr)) {
 			util_dynarray_append(&inputs, struct ir3_instruction *, instr);
-		} else if (is_kill(instr)) {
+		} else if (is_kill_or_demote(instr)) {
 			util_dynarray_foreach(&inputs, struct ir3_instruction *, instrp) {
 				struct ir3_instruction *input = *instrp;
 				struct ir3_postsched_node *in = input->data;
diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c
index 51b39dc1ee9..cec39bb9977 100644
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@@ -239,7 +239,7 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 	list_addtail(&instr->node, &instr->block->instr_list);
 	ctx->scheduled = instr;
 
-	if (is_kill(instr)){
+	if (is_kill_or_demote(instr)){
 		assert(ctx->remaining_kills > 0);
 		ctx->remaining_kills--;
 	}
@@ -409,7 +409,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
 	 *
 	 * TODO we could handle this by false-deps now, probably.
 	 */
-	if (is_kill(instr)) {
+	if (is_kill_or_demote(instr)) {
 		struct ir3 *ir = instr->block->shader;
 
 		for (unsigned i = 0; i < ir->baryfs_count; i++) {
@@ -1003,7 +1003,7 @@ sched_node_add_deps(struct ir3_instruction *instr)
 	/* NOTE that all inputs must be scheduled before a kill, so
 	 * mark these to be prioritized as well:
 	 */
-	if (is_kill(instr) || is_input(instr)) {
+	if (is_kill_or_demote(instr) || is_input(instr)) {
 		mark_kill_path(instr);
 	}
 
@@ -1073,7 +1073,7 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
 	ctx->remaining_kills = 0;
 	ctx->remaining_tex = 0;
 	foreach_instr_safe (instr, &ctx->unscheduled_list) {
-		if (is_kill(instr))
+		if (is_kill_or_demote(instr))
 			ctx->remaining_kills++;
 		if (is_tex_or_prefetch(instr))
 			ctx->remaining_tex++;
diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c
index d6922a3b9dd..1a29a53e78a 100644
--- a/src/freedreno/isa/encode.c
+++ b/src/freedreno/isa/encode.c
@@ -111,6 +111,8 @@ __instruction_case(struct encode_state *s, struct ir3_instruction *instr)
 		} else {
 			return OPC_MOV_GPR;
 		}
+	} else if (instr->opc == OPC_DEMOTE) {
+		return OPC_KILL;
 	} else if ((instr->block->shader->compiler->gpu_id > 600) &&
 			is_atomic(instr->opc) && (instr->flags & IR3_INSTR_G)) {
 		return instr->opc - OPC_ATOMIC_ADD + OPC_ATOMIC_B_ADD;
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index ffc31b696e0..d4a0556b8e3 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -622,6 +622,12 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
          features->nullDescriptor = true;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
+         VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
+            (VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
+         features->shaderDemoteToHelperInvocation = true;
+         break;
+      }
 
       default:
          break;
diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py
index 3665100621f..0938ece8e13 100644
--- a/src/freedreno/vulkan/tu_extensions.py
+++ b/src/freedreno/vulkan/tu_extensions.py
@@ -117,6 +117,7 @@ EXTENSIONS = [
     Extension('VK_KHR_spirv_1_4',                         1, True),
     Extension('VK_KHR_relaxed_block_layout',              1, True),
     Extension('VK_EXT_robustness2',                       1, True),
+    Extension('VK_EXT_shader_demote_to_helper_invocation', 1, True),
 ]
 
 MAX_API_VERSION = VkVersion(MAX_API_VERSION)
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c
index 6c4eff30ce3..f6636cf2d03 100644
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -76,6 +76,7 @@ tu_spirv_to_nir(struct tu_device *dev,
          .float_controls = true,
          .float16 = true,
          .storage_16bit = dev->physical_device->gpu_id >= 650,
+         .demote_to_helper_invocation = true,
       },
    };
 
@@ -185,6 +186,8 @@ tu_spirv_to_nir(struct tu_device *dev,
 
    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 
+   NIR_PASS_V(nir, nir_lower_is_helper_invocation);
+
    NIR_PASS_V(nir, nir_lower_system_values);
    NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options);
 



More information about the mesa-commit mailing list