[Mesa-dev] [PATCH 10/11] freedreno/ir3: add support for ssbo intrinsics

Ilia Mirkin imirkin at alum.mit.edu
Sat Sep 26 23:33:26 PDT 2015


The non-inc/dec encoding can use some work. But for now that's all we
get in practice.

We add an extra fake register for the benefit of instruction ordering,
based on the last memory op. This could be made smarter to work on a
per-buffer or even per-address basis, but the current solution is
simple.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 src/gallium/drivers/freedreno/ir3/ir3.c            |  2 +-
 src/gallium/drivers/freedreno/ir3/ir3.h            | 20 ++++++
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       | 74 ++++++++++++++++++++++
 src/gallium/drivers/freedreno/ir3/ir3_depth.c      |  3 +-
 src/gallium/drivers/freedreno/ir3/ir3_legalize.c   |  4 +-
 5 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index b24825c..389d9a0 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -527,7 +527,7 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
 	 * indicate to use the src_off encoding even if offset is zero
 	 * (but then what to do about dst_off?)
 	 */
-	if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
+	if (instr->cat6.src_offset || (instr->opc == OPC_LDG || is_atomic(instr))) {
 		instr_cat6a_t *cat6a = ptr;
 
 		cat6->src_off = true;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 12f2ebe..54974e5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -84,6 +84,7 @@ struct ir3_register {
 		 */
 		IR3_REG_SSA    = 0x2000,   /* 'instr' is ptr to assigning instr */
 		IR3_REG_PHI_SRC= 0x4000,   /* phi src, regs[0]->instr points to phi */
+		IR3_REG_FAKE   = 0x8000,   /* fake source, used for ordering */
 
 	} flags;
 	union {
@@ -580,6 +581,21 @@ static inline bool is_load(struct ir3_instruction *instr)
 	return false;
 }
 
+static inline bool
+is_atomic(struct ir3_instruction *instr)
+{
+	if (!is_mem(instr))
+		return false;
+	switch (instr->opc) {
+	case OPC_ATOMIC_ADD:
+	case OPC_ATOMIC_INC:
+	case OPC_ATOMIC_DEC:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static inline bool is_input(struct ir3_instruction *instr)
 {
 	/* in some cases, ldlv is used to fetch varying without
@@ -1071,6 +1087,10 @@ INSTR2(6, LDLV)
 INSTR2(6, LDG)
 INSTR3(6, STG)
 
+INSTR2(6, ATOMIC_ADD)
+INSTR1(6, ATOMIC_INC)
+INSTR1(6, ATOMIC_DEC)
+
 /* ************************************************************************* */
 /* split this out or find some helper to use.. like main/bitset.h.. */
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 84f1770..d193da1 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -121,6 +121,8 @@ struct ir3_compile {
 	 * can bail cleanly and fallback to TGSI compiler f/e
 	 */
 	bool error;
+
+	struct ir3_instruction *last_mem;
 };
 
 
@@ -1233,6 +1235,64 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
 	}
 }
 
+/* handles buffer reads/writes/atomics: */
+static void
+emit_intrinsic_buffer(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
+		struct ir3_instruction **dst)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *addr, *load, *src[3];
+	/* Buffer addresses are driver params: */
+	unsigned buf = regid(ctx->so->first_driver_param + IR3_BUFS_OFF, 0);
+	nir_const_value *index = nir_src_as_const_value(intr->src[0]);
+	nir_const_value *add;
+	unsigned offset = intr->const_index[0];
+
+	/* Get the buffer address: */
+	assert(index); /* XXX */
+	addr = create_uniform(ctx, buf + (index ? index->u[0] : 0));
+
+	/* Add the buffer offset to the address: */
+	if (intr->intrinsic != nir_intrinsic_load_ssbo &&
+		intr->intrinsic != nir_intrinsic_store_ssbo)
+		addr = ir3_ADD_S(b, addr, 0, get_src(ctx, &intr->src[1])[0], 0);
+	if (offset > 1024) {
+		addr = ir3_ADD_S(b, addr, 0, create_immed(b, offset), 0);
+		offset = 0;
+	}
+
+	switch (intr->intrinsic) {
+	case nir_intrinsic_load_ssbo:
+	case nir_intrinsic_load_ssbo_indirect:
+		load = ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
+		break;
+	case nir_intrinsic_ssbo_atomic_add:
+		add = nir_src_as_const_value(intr->src[2]);
+		if (add && add->i[0] == 1) {
+			load = ir3_ATOMIC_INC(b, addr, 0);
+		} else if (add && add->i[0] == -1) {
+			load = ir3_ATOMIC_DEC(b, addr, 0);
+		} else {
+			src[0] = create_immed(b, 0);
+			src[1] = get_src(ctx, &intr->src[2])[0];
+			load = ir3_ATOMIC_ADD(b, addr, 0, create_collect(b, src, 2), 0);
+		}
+		load->flags |= IR3_INSTR_G;
+		array_insert(ctx->ir->keeps, load);
+		break;
+	default:
+		unreachable("unexpected intrinsic");
+	}
+	load->cat6.type = TYPE_U32; /* XXX */
+	load->cat6.src_offset = offset;
+	if (ctx->last_mem)
+		ir3_reg_create(load, 0, IR3_REG_SSA | IR3_REG_FAKE)->instr =
+			ctx->last_mem;
+
+	dst[0] = ctx->last_mem = load;
+}
+
+
 /* handles array reads: */
 static void
 emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
@@ -1488,6 +1548,20 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 
 		break;
 	}
+	case nir_intrinsic_load_ssbo:
+	case nir_intrinsic_load_ssbo_indirect:
+	case nir_intrinsic_store_ssbo:
+	case nir_intrinsic_store_ssbo_indirect:
+	case nir_intrinsic_ssbo_atomic_add:
+	case nir_intrinsic_ssbo_atomic_min:
+	case nir_intrinsic_ssbo_atomic_max:
+	case nir_intrinsic_ssbo_atomic_and:
+	case nir_intrinsic_ssbo_atomic_or:
+	case nir_intrinsic_ssbo_atomic_xor:
+	case nir_intrinsic_ssbo_atomic_exchange:
+	case nir_intrinsic_ssbo_atomic_comp_swap:
+		emit_intrinsic_buffer(ctx, intr, dst);
+		break;
 	default:
 		compile_error(ctx, "Unhandled intrinsic type: %s\n",
 				nir_intrinsic_infos[intr->intrinsic].name);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index 97df0c2..33afe7f 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -60,7 +60,7 @@ int ir3_delayslots(struct ir3_instruction *assigner,
 	 * handled with sync bits
 	 */
 
-	if (is_meta(assigner))
+	if (is_meta(assigner) || consumer->regs[n + 1]->flags & IR3_REG_FAKE)
 		return 0;
 
 	if (writes_addr(assigner))
@@ -121,6 +121,7 @@ ir3_instr_depth(struct ir3_instruction *instr)
 		sd = ir3_delayslots(src, instr, i) + src->depth;
 
 		instr->depth = MAX2(instr->depth, sd);
+		instr->regs[i + 1]->flags &= ~IR3_REG_FAKE;
 	}
 
 	if (!is_meta(instr))
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
index e94293f..8c5fdc0 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
@@ -182,14 +182,14 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
 			 */
 			ctx->has_samp = true;
 			regmask_set(&needs_sy, n->regs[0]);
-		} else if (is_load(n)) {
+		} else if (is_load(n) || is_atomic(n)) {
 			regmask_set(&needs_sy, n->regs[0]);
 		}
 
 		/* both tex/sfu appear to not always immediately consume
 		 * their src register(s):
 		 */
-		if (is_tex(n) || is_sfu(n) || is_load(n)) {
+		if (is_tex(n) || is_sfu(n) || is_load(n) || is_atomic(n)) {
 			foreach_src(reg, n) {
 				if (reg_gpr(reg))
 					regmask_set(&needs_ss_war, reg);
-- 
2.4.9



More information about the mesa-dev mailing list