Mesa (main): nir_to_tgsi: Track our TGSI insns in blocks before emitting tokens.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Feb 10 01:12:37 UTC 2022
Module: Mesa
Branch: main
Commit: f4ce3178d99f66cab57c492cc5fe3d3a1788fee1
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f4ce3178d99f66cab57c492cc5fe3d3a1788fee1
Author: Emma Anholt <emma at anholt.net>
Date: Mon Jan 3 13:45:28 2022 -0800
nir_to_tgsi: Track our TGSI insns in blocks before emitting tokens.
To do register allocation well, we want to have a point before
ureg_insn_emit() to look at the liveness of the values and allocate them
to TGSI temporaries. In order to do that, we have to switch from
ureg_OPCODE() emitting TGSI tokens directly to a new ntt_OPCODE() that
stores the ureg args in a block structure.
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14404>
---
src/gallium/auxiliary/nir/nir_to_tgsi.c | 562 +++++++++++++++++++--------
src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 1 +
2 files changed, 404 insertions(+), 159 deletions(-)
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index 17776d08786..19d04d62de4 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -33,6 +33,28 @@
#include "util/debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_dynarray.h"
+
+struct ntt_insn {
+ enum tgsi_opcode opcode;
+ struct ureg_dst dst[2];
+ struct ureg_src src[4];
+ enum tgsi_texture_type tex_target;
+ enum tgsi_return_type tex_return_type;
+ struct tgsi_texture_offset tex_offset;
+
+ unsigned mem_qualifier;
+ enum pipe_format mem_format;
+
+ bool is_tex : 1;
+ bool is_mem : 1;
+ bool precise : 1;
+};
+
+struct ntt_block {
+ /* Array of struct ntt_insn */
+ struct util_dynarray insns;
+};
struct ntt_compile {
nir_shader *s;
@@ -57,6 +79,15 @@ struct ntt_compile {
nir_instr_liveness *liveness;
+ /* Map from nir_block to ntt_block */
+ struct hash_table *blocks;
+ struct ntt_block *cur_block;
+ unsigned current_if_else;
+ unsigned cf_label;
+
+ /* Whether we're currently emitting instructiosn for a precise NIR instruction. */
+ bool precise;
+
/* Mappings from driver_location to TGSI input/output number.
*
* We'll be declaring TGSI input/outputs in an arbitrary order, and they get
@@ -70,7 +101,99 @@ struct ntt_compile {
struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
};
+static struct ntt_block *
+ntt_block_from_nir(struct ntt_compile *c, struct nir_block *block)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(c->blocks, block);
+ return entry->data;
+}
+
static void ntt_emit_cf_list(struct ntt_compile *c, struct exec_list *list);
+static void ntt_emit_cf_list_ureg(struct ntt_compile *c, struct exec_list *list);
+
+static struct ntt_insn *
+ntt_insn(struct ntt_compile *c, enum tgsi_opcode opcode,
+ struct ureg_dst dst,
+ struct ureg_src src0, struct ureg_src src1,
+ struct ureg_src src2, struct ureg_src src3)
+{
+ struct ntt_insn insn = {
+ .opcode = opcode,
+ .dst = { dst, ureg_dst_undef() },
+ .src = { src0, src1, src2, src3 },
+ .precise = c->precise,
+ };
+ util_dynarray_append(&c->cur_block->insns, struct ntt_insn, insn);
+ return util_dynarray_top_ptr(&c->cur_block->insns, struct ntt_insn);
+}
+
+#define OP00( op ) \
+static inline void ntt_##op(struct ntt_compile *c) \
+{ \
+ ntt_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
+}
+
+#define OP01( op ) \
+static inline void ntt_##op(struct ntt_compile *c, \
+ struct ureg_src src0) \
+{ \
+ ntt_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), src0, ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
+}
+
+
+#define OP10( op ) \
+static inline void ntt_##op(struct ntt_compile *c, \
+ struct ureg_dst tgsi_dst_register) \
+{ \
+ ntt_insn(c, TGSI_OPCODE_##op, dst, ureg_src_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
+}
+
+#define OP11( op ) \
+static inline void ntt_##op(struct ntt_compile *c, \
+ struct ureg_dst dst, \
+ struct ureg_src src0) \
+{ \
+ ntt_insn(c, TGSI_OPCODE_##op, dst, src0, ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
+}
+
+#define OP12( op ) \
+static inline void ntt_##op(struct ntt_compile *c, \
+ struct ureg_dst dst, \
+ struct ureg_src src0, \
+ struct ureg_src src1) \
+{ \
+ ntt_insn(c, TGSI_OPCODE_##op, dst, src0, src1, ureg_src_undef(), ureg_src_undef()); \
+}
+
+#define OP13( op ) \
+static inline void ntt_##op(struct ntt_compile *c, \
+ struct ureg_dst dst, \
+ struct ureg_src src0, \
+ struct ureg_src src1, \
+ struct ureg_src src2) \
+{ \
+ ntt_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, ureg_src_undef()); \
+}
+
+#define OP14( op ) \
+static inline void ntt_##op(struct ntt_compile *c, \
+ struct ureg_dst dst, \
+ struct ureg_src src0, \
+ struct ureg_src src1, \
+ struct ureg_src src2, \
+ struct ureg_src src3) \
+{ \
+ ntt_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, src3); \
+}
+
+/* We hand-craft our tex instructions */
+#define OP12_TEX(op)
+#define OP14_TEX(op)
+
+/* Use a template include to generate a correctly-typed ntt_OP()
+ * function for each TGSI opcode:
+ */
+#include "gallium/auxiliary/tgsi/tgsi_opcode_tmp.h"
/**
* Interprets a nir_load_const used as a NIR src as a uint.
@@ -375,7 +498,7 @@ ntt_setup_inputs(struct ntt_compile *c)
struct ureg_dst temp = ureg_DECL_temporary(c->ureg);
if (c->native_integers) {
/* NIR is ~0 front and 0 back, while TGSI is +1 front */
- ureg_SGE(c->ureg, temp, decl, ureg_imm1f(c->ureg, 0));
+ ntt_SGE(c, temp, decl, ureg_imm1f(c->ureg, 0));
} else {
/* tgsi docs say that floating point FACE will be positive for
* frontface and negative for backface, but realistically
@@ -385,7 +508,7 @@ ntt_setup_inputs(struct ntt_compile *c)
* front face).
*/
temp.Saturate = true;
- ureg_MOV(c->ureg, temp, decl);
+ ntt_MOV(c, temp, decl);
}
decl = ureg_src(temp);
@@ -643,9 +766,9 @@ ntt_reladdr(struct ntt_compile *c, struct ureg_src addr, int addr_index)
}
if (c->native_integers)
- ureg_UARL(c->ureg, c->addr_reg[addr_index], addr);
+ ntt_UARL(c, c->addr_reg[addr_index], addr);
else
- ureg_ARL(c->ureg, c->addr_reg[addr_index], addr);
+ ntt_ARL(c, c->addr_reg[addr_index], addr);
return ureg_scalar(ureg_src(c->addr_reg[addr_index]), 0);
}
@@ -781,7 +904,7 @@ ntt_store_def(struct ntt_compile *c, nir_ssa_def *def, struct ureg_src src)
}
}
- ureg_MOV(c->ureg, ntt_get_ssa_def_decl(c, def), src);
+ ntt_MOV(c, ntt_get_ssa_def_decl(c, def), src);
}
static void
@@ -791,7 +914,7 @@ ntt_store(struct ntt_compile *c, nir_dest *dest, struct ureg_src src)
ntt_store_def(c, &dest->ssa, src);
else {
struct ureg_dst dst = ntt_get_dest(c, dest);
- ureg_MOV(c->ureg, dst, src);
+ ntt_MOV(c, dst, src);
}
}
@@ -802,26 +925,18 @@ ntt_emit_scalar(struct ntt_compile *c, unsigned tgsi_op,
struct ureg_src src1)
{
unsigned i;
- int num_src;
/* POW is the only 2-operand scalar op. */
- if (tgsi_op == TGSI_OPCODE_POW) {
- num_src = 2;
- } else {
- num_src = 1;
+ if (tgsi_op != TGSI_OPCODE_POW)
src1 = src0;
- }
for (i = 0; i < 4; i++) {
if (dst.WriteMask & (1 << i)) {
- struct ureg_dst this_dst = dst;
- struct ureg_src srcs[2] = {
- ureg_scalar(src0, i),
- ureg_scalar(src1, i),
- };
- this_dst.WriteMask = (1 << i);
-
- ureg_insn(c->ureg, tgsi_op, &this_dst, 1, srcs, num_src, false);
+ ntt_insn(c, tgsi_op,
+ ureg_writemask(dst, 1 << i),
+ ureg_scalar(src0, i),
+ ureg_scalar(src1, i),
+ ureg_src_undef(), ureg_src_undef());
}
}
}
@@ -836,11 +951,14 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
int src_64 = nir_src_bit_size(instr->src[0].src) == 64;
int num_srcs = nir_op_infos[instr->op].num_inputs;
- ureg_set_precise(c->ureg, instr->exact);
+ c->precise = instr->exact;
assert(num_srcs <= ARRAY_SIZE(src));
for (i = 0; i < num_srcs; i++)
src[i] = ntt_get_alu_src(c, instr, i);
+ for (; i < ARRAY_SIZE(src); i++)
+ src[i] = ureg_src_undef();
+
dst = ntt_get_dest(c, &instr->dest.dest);
if (instr->dest.saturate)
@@ -983,8 +1101,8 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
bool table_op64 = src_64;
if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op][table_op64] != 0) {
/* The normal path for NIR to TGSI ALU op translation */
- ureg_insn(c->ureg, op_map[instr->op][table_op64],
- &dst, 1, src, num_srcs, false);
+ ntt_insn(c, op_map[instr->op][table_op64],
+ dst, src[0], src[1], src[2], src[3]);
} else {
/* Special cases for NIR to TGSI ALU op translation. */
@@ -994,7 +1112,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
switch (instr->op) {
case nir_op_u2u64:
- ureg_AND(c->ureg, dst, ureg_swizzle(src[0],
+ ntt_AND(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
ureg_imm4u(c->ureg, ~0, 0, ~0, 0));
@@ -1003,29 +1121,29 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
case nir_op_i2i32:
case nir_op_u2u32:
assert(src_64);
- ureg_MOV(c->ureg, dst, ureg_swizzle(src[0],
+ ntt_MOV(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z,
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X));
break;
case nir_op_fabs:
if (c->options->lower_fabs)
- ureg_MAX(c->ureg, dst, src[0], ureg_negate(src[0]));
+ ntt_MAX(c, dst, src[0], ureg_negate(src[0]));
else
- ureg_MOV(c->ureg, dst, ureg_abs(src[0]));
+ ntt_MOV(c, dst, ureg_abs(src[0]));
break;
case nir_op_fsat:
if (dst_64) {
- ureg_MIN(c->ureg, dst, src[0], ntt_64bit_1f(c));
- ureg_MAX(c->ureg, dst, ureg_src(dst), ureg_imm1u(c->ureg, 0));
+ ntt_MIN(c, dst, src[0], ntt_64bit_1f(c));
+ ntt_MAX(c, dst, ureg_src(dst), ureg_imm1u(c->ureg, 0));
} else {
- ureg_MOV(c->ureg, ureg_saturate(dst), src[0]);
+ ntt_MOV(c, ureg_saturate(dst), src[0]);
}
break;
case nir_op_fneg:
- ureg_MOV(c->ureg, dst, ureg_negate(src[0]));
+ ntt_MOV(c, dst, ureg_negate(src[0]));
break;
/* NOTE: TGSI 32-bit math ops have the old "one source channel
@@ -1034,35 +1152,35 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
*/
case nir_op_frcp:
assert(!dst_64);
- ntt_emit_scalar(c, TGSI_OPCODE_RCP, dst, src[0], src[1]);
+ ntt_emit_scalar(c, TGSI_OPCODE_RCP, dst, src[0], ureg_src_undef());
break;
case nir_op_frsq:
assert(!dst_64);
- ntt_emit_scalar(c, TGSI_OPCODE_RSQ, dst, src[0], src[1]);
+ ntt_emit_scalar(c, TGSI_OPCODE_RSQ, dst, src[0], ureg_src_undef());
break;
case nir_op_fsqrt:
assert(!dst_64);
- ntt_emit_scalar(c, TGSI_OPCODE_SQRT, dst, src[0], src[1]);
+ ntt_emit_scalar(c, TGSI_OPCODE_SQRT, dst, src[0], ureg_src_undef());
break;
case nir_op_fexp2:
assert(!dst_64);
- ntt_emit_scalar(c, TGSI_OPCODE_EX2, dst, src[0], src[1]);
+ ntt_emit_scalar(c, TGSI_OPCODE_EX2, dst, src[0], ureg_src_undef());
break;
case nir_op_flog2:
assert(!dst_64);
- ntt_emit_scalar(c, TGSI_OPCODE_LG2, dst, src[0], src[1]);
+ ntt_emit_scalar(c, TGSI_OPCODE_LG2, dst, src[0], ureg_src_undef());
break;
case nir_op_b2f32:
- ureg_AND(c->ureg, dst, src[0], ureg_imm1f(c->ureg, 1.0));
+ ntt_AND(c, dst, src[0], ureg_imm1f(c->ureg, 1.0));
break;
case nir_op_b2f64:
- ureg_AND(c->ureg, dst,
+ ntt_AND(c, dst,
ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
@@ -1071,24 +1189,24 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
case nir_op_f2b32:
if (src_64)
- ureg_DSNE(c->ureg, dst, src[0], ureg_imm1f(c->ureg, 0));
+ ntt_DSNE(c, dst, src[0], ureg_imm1f(c->ureg, 0));
else
- ureg_FSNE(c->ureg, dst, src[0], ureg_imm1f(c->ureg, 0));
+ ntt_FSNE(c, dst, src[0], ureg_imm1f(c->ureg, 0));
break;
case nir_op_i2b32:
if (src_64) {
- ureg_U64SNE(c->ureg, dst, src[0], ureg_imm1u(c->ureg, 0));
+ ntt_U64SNE(c, dst, src[0], ureg_imm1u(c->ureg, 0));
} else
- ureg_USNE(c->ureg, dst, src[0], ureg_imm1u(c->ureg, 0));
+ ntt_USNE(c, dst, src[0], ureg_imm1u(c->ureg, 0));
break;
case nir_op_b2i32:
- ureg_AND(c->ureg, dst, src[0], ureg_imm1u(c->ureg, 1));
+ ntt_AND(c, dst, src[0], ureg_imm1u(c->ureg, 1));
break;
case nir_op_b2i64:
- ureg_AND(c->ureg, dst,
+ ntt_AND(c, dst,
ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
@@ -1096,21 +1214,21 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
break;
case nir_op_fsin:
- ntt_emit_scalar(c, TGSI_OPCODE_SIN, dst, src[0], src[1]);
+ ntt_emit_scalar(c, TGSI_OPCODE_SIN, dst, src[0], ureg_src_undef());
break;
case nir_op_fcos:
- ntt_emit_scalar(c, TGSI_OPCODE_COS, dst, src[0], src[1]);
+ ntt_emit_scalar(c, TGSI_OPCODE_COS, dst, src[0], ureg_src_undef());
break;
case nir_op_fsub:
assert(!dst_64);
- ureg_ADD(c->ureg, dst, src[0], ureg_negate(src[1]));
+ ntt_ADD(c, dst, src[0], ureg_negate(src[1]));
break;
case nir_op_isub:
assert(!dst_64);
- ureg_UADD(c->ureg, dst, src[0], ureg_negate(src[1]));
+ ntt_UADD(c, dst, src[0], ureg_negate(src[1]));
break;
case nir_op_fmod:
@@ -1122,40 +1240,40 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
break;
case nir_op_flrp:
- ureg_LRP(c->ureg, dst, src[2], src[1], src[0]);
+ ntt_LRP(c, dst, src[2], src[1], src[0]);
break;
case nir_op_pack_64_2x32_split:
- ureg_MOV(c->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XZ),
+ ntt_MOV(c, ureg_writemask(dst, TGSI_WRITEMASK_XZ),
ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
- ureg_MOV(c->ureg, ureg_writemask(dst, TGSI_WRITEMASK_YW),
+ ntt_MOV(c, ureg_writemask(dst, TGSI_WRITEMASK_YW),
ureg_swizzle(src[1],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
break;
case nir_op_unpack_64_2x32_split_x:
- ureg_MOV(c->ureg, dst, ureg_swizzle(src[0],
+ ntt_MOV(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z,
TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z));
break;
case nir_op_unpack_64_2x32_split_y:
- ureg_MOV(c->ureg, dst, ureg_swizzle(src[0],
+ ntt_MOV(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W));
break;
case nir_op_b32csel:
if (nir_src_bit_size(instr->src[1].src) == 64) {
- ureg_UCMP(c->ureg, dst, ureg_swizzle(src[0],
+ ntt_UCMP(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
src[1], src[2]);
} else {
- ureg_UCMP(c->ureg, dst, src[0], src[1], src[2]);
+ ntt_UCMP(c, dst, src[0], src[1], src[2]);
}
break;
@@ -1176,9 +1294,9 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
* We don't use this in general because some hardware (i915 FS) the
* LRP gets expanded to MUL/MAD.
*/
- ureg_LRP(c->ureg, dst, src[0], src[1], src[2]);
+ ntt_LRP(c, dst, src[0], src[1], src[2]);
} else {
- ureg_CMP(c->ureg, dst, ureg_negate(src[0]), src[1], src[2]);
+ ntt_CMP(c, dst, ureg_negate(src[0]), src[1], src[2]);
}
break;
@@ -1207,9 +1325,12 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
chan * 2, chan * 2 + 1,
chan * 2, chan * 2 + 1);
- ureg_insn(c->ureg, TGSI_OPCODE_DFRACEXP,
- dsts, 2,
- &chan_src, 1, false);
+ struct ntt_insn *insn = ntt_insn(c, TGSI_OPCODE_DFRACEXP,
+ dsts[0], chan_src,
+ ureg_src_undef(),
+ ureg_src_undef(),
+ ureg_src_undef());
+ insn->dst[1] = dsts[1];
}
ureg_release_temporary(c->ureg, temp);
@@ -1218,7 +1339,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
case nir_op_ldexp:
assert(dst_64); /* 32bit handled in table. */
- ureg_DLDEXP(c->ureg, dst, src[0],
+ ntt_DLDEXP(c, dst, src[0],
ureg_swizzle(src[1],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
@@ -1238,7 +1359,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
/* 64-bit op fixup movs */
if (!ureg_dst_is_undef(real_dst)) {
if (tgsi_64bit_compare) {
- ureg_MOV(c->ureg, real_dst,
+ ntt_MOV(c, real_dst,
ureg_swizzle(ureg_src(dst), 0, 2, 0, 2));
} else {
assert(tgsi_64bit_downconvert);
@@ -1246,7 +1367,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
uint32_t second_bit = real_dst.WriteMask & ~(1 << (ffs(real_dst.WriteMask) - 1));
if (second_bit)
swizzle[ffs(second_bit) - 1] = 1;
- ureg_MOV(c->ureg, real_dst, ureg_swizzle(ureg_src(dst),
+ ntt_MOV(c, real_dst, ureg_swizzle(ureg_src(dst),
swizzle[0],
swizzle[1],
swizzle[2],
@@ -1255,7 +1376,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
ureg_release_temporary(c->ureg, dst);
}
- ureg_set_precise(c->ureg, false);
+ c->precise = false;
}
static struct ureg_src
@@ -1345,7 +1466,7 @@ ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr)
* subtracting it off here.
*/
addr_temp = ureg_DECL_temporary(c->ureg);
- ureg_UADD(c->ureg, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, -c->first_ubo));
+ ntt_UADD(c, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, -c->first_ubo));
src = ureg_src_dimension_indirect(src,
ntt_reladdr(c, ureg_src(addr_temp), 1),
c->first_ubo);
@@ -1375,18 +1496,15 @@ ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr)
/* PIPE_CAP_LOAD_CONSTBUF: Not necessarily vec4 aligned, emit a
* TGSI_OPCODE_LOAD instruction from the const file.
*/
- struct ureg_dst dst = ntt_get_dest(c, &instr->dest);
- struct ureg_src srcs[2] = {
- src,
- ntt_get_src(c, instr->src[1]),
- };
- ureg_memory_insn(c->ureg, TGSI_OPCODE_LOAD,
- &dst, 1,
- srcs, ARRAY_SIZE(srcs),
- 0 /* qualifier */,
- 0 /* tex target */,
- 0 /* format: unused */
- );
+ struct ntt_insn *insn =
+ ntt_insn(c, TGSI_OPCODE_LOAD,
+ ntt_get_dest(c, &instr->dest),
+ src, ntt_get_src(c, instr->src[1]),
+ ureg_src_undef(), ureg_src_undef());
+ insn->is_mem = true;
+ insn->tex_target = 0;
+ insn->mem_qualifier = 0;
+ insn->mem_format = 0; /* unused */
}
ureg_release_temporary(c->ureg, addr_temp);
@@ -1441,7 +1559,7 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
memory.Index += nir_src_as_uint(instr->src[0]) / 4;
} else {
addr_temp = ureg_DECL_temporary(c->ureg);
- ureg_USHR(c->ureg, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, 2));
+ ntt_USHR(c, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, 2));
memory = ureg_src_indirect(memory, ntt_reladdr(c, ureg_src(addr_temp), 2));
}
memory = ureg_src_dimension(memory, nir_intrinsic_base(instr));
@@ -1566,12 +1684,11 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
dst = ntt_get_dest(c, &instr->dest);
}
- ureg_memory_insn(c->ureg, opcode,
- &dst, 1,
- src, num_src,
- qualifier,
- TGSI_TEXTURE_BUFFER,
- 0 /* format: unused */);
+ struct ntt_insn *insn = ntt_insn(c, opcode, dst, src[0], src[1], src[2], src[3]);
+ insn->tex_target = TGSI_TEXTURE_BUFFER;
+ insn->mem_qualifier = qualifier;
+ insn->mem_format = 0; /* unused */
+ insn->is_mem = true;
ureg_release_temporary(c->ureg, addr_temp);
}
@@ -1606,8 +1723,8 @@ ntt_emit_image_load_store(struct ntt_compile *c, nir_intrinsic_instr *instr)
if (dim == GLSL_SAMPLER_DIM_MS) {
temp = ureg_DECL_temporary(c->ureg);
- ureg_MOV(c->ureg, temp, coord);
- ureg_MOV(c->ureg, ureg_writemask(temp, 1 << (is_array ? 3 : 2)),
+ ntt_MOV(c, temp, coord);
+ ntt_MOV(c, ureg_writemask(temp, 1 << (is_array ? 3 : 2)),
ureg_scalar(ntt_get_src(c, instr->src[2]), TGSI_SWIZZLE_X));
coord = ureg_src(temp);
}
@@ -1667,10 +1784,11 @@ ntt_emit_image_load_store(struct ntt_compile *c, nir_intrinsic_instr *instr)
unreachable("bad op");
}
- ureg_memory_insn(c->ureg, op, &dst, 1, srcs, num_src,
- ntt_get_access_qualifier(instr),
- target,
- nir_intrinsic_format(instr));
+ struct ntt_insn *insn = ntt_insn(c, op, dst, srcs[0], srcs[1], srcs[2], srcs[3]);
+ insn->tex_target = target;
+ insn->mem_qualifier = ntt_get_access_qualifier(instr);
+ insn->mem_format = nir_intrinsic_format(instr);
+ insn->is_mem = true;
if (!ureg_dst_is_undef(temp))
ureg_release_temporary(c->ureg, temp);
@@ -1751,20 +1869,19 @@ ntt_emit_load_input(struct ntt_compile *c, nir_intrinsic_instr *instr)
if (c->centroid_inputs & (1ull << nir_intrinsic_base(instr))) {
ntt_store(c, &instr->dest, input);
} else {
- ureg_INTERP_CENTROID(c->ureg, ntt_get_dest(c, &instr->dest),
- input);
+ ntt_INTERP_CENTROID(c, ntt_get_dest(c, &instr->dest), input);
}
break;
case nir_intrinsic_load_barycentric_at_sample:
/* We stored the sample in the fake "bary" dest. */
- ureg_INTERP_SAMPLE(c->ureg, ntt_get_dest(c, &instr->dest), input,
+ ntt_INTERP_SAMPLE(c, ntt_get_dest(c, &instr->dest), input,
ntt_get_src(c, instr->src[0]));
break;
case nir_intrinsic_load_barycentric_at_offset:
/* We stored the offset in the fake "bary" dest. */
- ureg_INTERP_OFFSET(c->ureg, ntt_get_dest(c, &instr->dest), input,
+ ntt_INTERP_OFFSET(c, ntt_get_dest(c, &instr->dest), input,
ntt_get_src(c, instr->src[0]));
break;
@@ -1810,7 +1927,7 @@ ntt_emit_store_output(struct ntt_compile *c, nir_intrinsic_instr *instr)
src = ureg_swizzle(src, swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
- ureg_MOV(c->ureg, out, src);
+ ntt_MOV(c, out, src);
}
static void
@@ -1832,7 +1949,7 @@ ntt_emit_load_output(struct ntt_compile *c, nir_intrinsic_instr *instr)
out = ntt_ureg_dst_indirect(c, out, instr->src[0]);
}
- ureg_MOV(c->ureg, ntt_get_dest(c, &instr->dest), ureg_src(out));
+ ntt_MOV(c, ntt_get_dest(c, &instr->dest), ureg_src(out));
}
static void
@@ -1858,7 +1975,7 @@ ntt_emit_load_sysval(struct ntt_compile *c, nir_intrinsic_instr *instr)
switch (instr->intrinsic) {
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_instance_id:
- ureg_U2F(c->ureg, ntt_get_dest(c, &instr->dest), sv);
+ ntt_U2F(c, ntt_get_dest(c, &instr->dest), sv);
return;
default:
@@ -1928,7 +2045,7 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_discard:
- ureg_KILL(c->ureg);
+ ntt_KILL(c);
break;
case nir_intrinsic_discard_if: {
@@ -1936,12 +2053,12 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr)
if (c->native_integers) {
struct ureg_dst temp = ureg_writemask(ureg_DECL_temporary(c->ureg), 1);
- ureg_AND(c->ureg, temp, cond, ureg_imm1f(c->ureg, 1.0));
- ureg_KILL_IF(c->ureg, ureg_scalar(ureg_negate(ureg_src(temp)), 0));
+ ntt_AND(c, temp, cond, ureg_imm1f(c->ureg, 1.0));
+ ntt_KILL_IF(c, ureg_scalar(ureg_negate(ureg_src(temp)), 0));
ureg_release_temporary(c->ureg, temp);
} else {
/* For !native_integers, the bool got lowered to 1.0 or 0.0. */
- ureg_KILL_IF(c->ureg, ureg_negate(cond));
+ ntt_KILL_IF(c, ureg_negate(cond));
}
break;
}
@@ -2015,48 +2132,48 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_control_barrier:
case nir_intrinsic_memory_barrier_tcs_patch:
- ureg_BARRIER(c->ureg);
+ ntt_BARRIER(c);
break;
case nir_intrinsic_memory_barrier:
- ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg,
- TGSI_MEMBAR_SHADER_BUFFER |
- TGSI_MEMBAR_ATOMIC_BUFFER |
- TGSI_MEMBAR_SHADER_IMAGE |
- TGSI_MEMBAR_SHARED));
+ ntt_MEMBAR(c, ureg_imm1u(c->ureg,
+ TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED));
break;
case nir_intrinsic_memory_barrier_atomic_counter:
- ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_ATOMIC_BUFFER));
+ ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_ATOMIC_BUFFER));
break;
case nir_intrinsic_memory_barrier_buffer:
- ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_BUFFER));
+ ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_BUFFER));
break;
case nir_intrinsic_memory_barrier_image:
- ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_IMAGE));
+ ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_IMAGE));
break;
case nir_intrinsic_memory_barrier_shared:
- ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHARED));
+ ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHARED));
break;
case nir_intrinsic_group_memory_barrier:
- ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg,
- TGSI_MEMBAR_SHADER_BUFFER |
- TGSI_MEMBAR_ATOMIC_BUFFER |
- TGSI_MEMBAR_SHADER_IMAGE |
- TGSI_MEMBAR_SHARED |
- TGSI_MEMBAR_THREAD_GROUP));
+ ntt_MEMBAR(c, ureg_imm1u(c->ureg,
+ TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED |
+ TGSI_MEMBAR_THREAD_GROUP));
break;
case nir_intrinsic_end_primitive:
- ureg_ENDPRIM(c->ureg, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
+ ntt_ENDPRIM(c, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
break;
case nir_intrinsic_emit_vertex:
- ureg_EMIT(c->ureg, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
+ ntt_EMIT(c, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
break;
/* In TGSI we don't actually generate the barycentric coords, and emit
@@ -2218,20 +2335,19 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr)
unreachable("unknown texture type");
}
- struct tgsi_texture_offset tex_offsets[4];
- unsigned num_tex_offsets = 0;
+ struct tgsi_texture_offset tex_offset = {
+ .File = TGSI_FILE_NULL
+ };
int tex_offset_src = nir_tex_instr_src_index(instr, nir_tex_src_offset);
if (tex_offset_src >= 0) {
struct ureg_src offset = ntt_get_src(c, instr->src[tex_offset_src].src);
- tex_offsets[0].File = offset.File;
- tex_offsets[0].Index = offset.Index;
- tex_offsets[0].SwizzleX = offset.SwizzleX;
- tex_offsets[0].SwizzleY = offset.SwizzleY;
- tex_offsets[0].SwizzleZ = offset.SwizzleZ;
- tex_offsets[0].Padding = 0;
-
- num_tex_offsets = 1;
+ tex_offset.File = offset.File;
+ tex_offset.Index = offset.Index;
+ tex_offset.SwizzleX = offset.SwizzleX;
+ tex_offset.SwizzleY = offset.SwizzleY;
+ tex_offset.SwizzleZ = offset.SwizzleZ;
+ tex_offset.Padding = 0;
}
struct ureg_dst tex_dst;
@@ -2240,15 +2356,17 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr)
else
tex_dst = dst;
- ureg_tex_insn(c->ureg, tex_opcode,
- &tex_dst, 1,
- target,
- tex_type,
- tex_offsets, num_tex_offsets,
- s.srcs, s.i);
+ while (s.i < 4)
+ s.srcs[s.i++] = ureg_src_undef();
+
+ struct ntt_insn *insn = ntt_insn(c, tex_opcode, tex_dst, s.srcs[0], s.srcs[1], s.srcs[2], s.srcs[3]);
+ insn->tex_target = target;
+ insn->tex_return_type = tex_type;
+ insn->tex_offset = tex_offset;
+ insn->is_tex = true;
if (instr->op == nir_texop_query_levels) {
- ureg_MOV(c->ureg, dst, ureg_scalar(ureg_src(tex_dst), 3));
+ ntt_MOV(c, dst, ureg_scalar(ureg_src(tex_dst), 3));
ureg_release_temporary(c->ureg, tex_dst);
}
}
@@ -2258,11 +2376,11 @@ ntt_emit_jump(struct ntt_compile *c, nir_jump_instr *jump)
{
switch (jump->type) {
case nir_jump_break:
- ureg_BRK(c->ureg);
+ ntt_BRK(c);
break;
case nir_jump_continue:
- ureg_CONT(c->ureg);
+ ntt_CONT(c);
break;
default:
@@ -2326,38 +2444,27 @@ ntt_emit_instr(struct ntt_compile *c, nir_instr *instr)
static void
ntt_emit_if(struct ntt_compile *c, nir_if *if_stmt)
{
- unsigned label;
-
if (c->native_integers)
- ureg_UIF(c->ureg, c->if_cond, &label);
+ ntt_UIF(c, c->if_cond);
else
- ureg_IF(c->ureg, c->if_cond, &label);
+ ntt_IF(c, c->if_cond);
ntt_emit_cf_list(c, &if_stmt->then_list);
if (!nir_cf_list_is_empty_block(&if_stmt->else_list)) {
- ureg_fixup_label(c->ureg, label, ureg_get_instruction_number(c->ureg));
- ureg_ELSE(c->ureg, &label);
+ ntt_ELSE(c);
ntt_emit_cf_list(c, &if_stmt->else_list);
}
- ureg_fixup_label(c->ureg, label, ureg_get_instruction_number(c->ureg));
- ureg_ENDIF(c->ureg);
+ ntt_ENDIF(c);
}
static void
ntt_emit_loop(struct ntt_compile *c, nir_loop *loop)
{
- /* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
- * does reference BGNLOOP's. Follow the former behavior unless something comes up
- * with a need.
- */
- unsigned begin_label;
- ureg_BGNLOOP(c->ureg, &begin_label);
+ ntt_BGNLOOP(c);
ntt_emit_cf_list(c, &loop->body);
-
- unsigned end_label;
- ureg_ENDLOOP(c->ureg, &end_label);
+ ntt_ENDLOOP(c);
}
static void
@@ -2392,9 +2499,20 @@ ntt_src_live_interval_end_cb(nir_src *src, void *state)
static void
ntt_emit_block(struct ntt_compile *c, nir_block *block)
{
+ struct ntt_block *ntt_block = ntt_block_from_nir(c, block);
+ c->cur_block = ntt_block;
+
nir_foreach_instr(instr, block) {
ntt_emit_instr(c, instr);
+ /* Sanity check that we didn't accidentally ureg_OPCODE() instead of ntt_OPCODE(). */
+ if (ureg_get_instruction_number(c->ureg) != 0) {
+ fprintf(stderr, "Emitted ureg insn during: ");
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "\n");
+ unreachable("emitted ureg insn");
+ }
+
nir_foreach_src(instr, ntt_src_live_interval_end_cb, c);
}
@@ -2441,6 +2559,117 @@ ntt_emit_cf_list(struct ntt_compile *c, struct exec_list *list)
}
}
+static void
+ntt_emit_block_ureg(struct ntt_compile *c, struct nir_block *block)
+{
+ struct ntt_block *ntt_block = ntt_block_from_nir(c, block);
+
+ /* Emit the ntt insns to tgsi_ureg. */
+ util_dynarray_foreach(&ntt_block->insns, struct ntt_insn, insn) {
+ const struct tgsi_opcode_info *opcode_info =
+ tgsi_get_opcode_info(insn->opcode);
+
+ switch (insn->opcode) {
+ case TGSI_OPCODE_UIF:
+ ureg_UIF(c->ureg, insn->src[0], &c->cf_label);
+ break;
+
+ case TGSI_OPCODE_IF:
+ ureg_IF(c->ureg, insn->src[0], &c->cf_label);
+ break;
+
+ case TGSI_OPCODE_ELSE:
+ ureg_fixup_label(c->ureg, c->current_if_else, ureg_get_instruction_number(c->ureg));
+ ureg_ELSE(c->ureg, &c->cf_label);
+ c->current_if_else = c->cf_label;
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ ureg_fixup_label(c->ureg, c->current_if_else, ureg_get_instruction_number(c->ureg));
+ ureg_ENDIF(c->ureg);
+ break;
+
+ case TGSI_OPCODE_BGNLOOP:
+ /* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
+ * does reference BGNLOOP's. Follow the former behavior unless something comes up
+ * with a need.
+ */
+ ureg_BGNLOOP(c->ureg, &c->cf_label);
+ break;
+
+ case TGSI_OPCODE_ENDLOOP:
+ ureg_ENDLOOP(c->ureg, &c->cf_label);
+ break;
+
+ default:
+ if (insn->is_tex) {
+ ureg_tex_insn(c->ureg, insn->opcode,
+ insn->dst, opcode_info->num_dst,
+ insn->tex_target, insn->tex_return_type,
+ &insn->tex_offset,
+ insn->tex_offset.File != TGSI_FILE_NULL ? 1 : 0,
+ insn->src, opcode_info->num_src);
+ } else if (insn->is_mem) {
+ ureg_memory_insn(c->ureg, insn->opcode,
+ insn->dst, opcode_info->num_dst,
+ insn->src, opcode_info->num_src,
+ insn->mem_qualifier,
+ insn->tex_target,
+ insn->mem_format);
+ } else {
+ ureg_insn(c->ureg, insn->opcode,
+ insn->dst, opcode_info->num_dst,
+ insn->src, opcode_info->num_src,
+ insn->precise);
+ }
+ }
+ }
+}
+
+static void
+ntt_emit_if_ureg(struct ntt_compile *c, nir_if *if_stmt)
+{
+ /* Note: the last block emitted our IF opcode. */
+
+ int if_stack = c->current_if_else;
+ c->current_if_else = c->cf_label;
+
+ /* Either the then or else block includes the ENDIF, which will fix up the
+ * IF(/ELSE)'s label for jumping
+ */
+ ntt_emit_cf_list_ureg(c, &if_stmt->then_list);
+ ntt_emit_cf_list_ureg(c, &if_stmt->else_list);
+
+ c->current_if_else = if_stack;
+}
+
+static void
+ntt_emit_cf_list_ureg(struct ntt_compile *c, struct exec_list *list)
+{
+ foreach_list_typed(nir_cf_node, node, node, list) {
+ switch (node->type) {
+ case nir_cf_node_block:
+ ntt_emit_block_ureg(c, nir_cf_node_as_block(node));
+ break;
+
+ case nir_cf_node_if:
+ ntt_emit_if_ureg(c, nir_cf_node_as_if(node));
+ break;
+
+ case nir_cf_node_loop:
+ /* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
+ * does reference BGNLOOP's. Follow the former behavior unless something comes up
+ * with a need.
+ */
+ ntt_emit_cf_list_ureg(c, &nir_cf_node_as_loop(node)->body);
+ break;
+
+ default:
+ unreachable("unknown CF type");
+ }
+ }
+}
+
static void
ntt_emit_impl(struct ntt_compile *c, nir_function_impl *impl)
{
@@ -2450,11 +2679,30 @@ ntt_emit_impl(struct ntt_compile *c, nir_function_impl *impl)
c->ssa_temp = rzalloc_array(c, struct ureg_src, impl->ssa_alloc);
c->reg_temp = rzalloc_array(c, struct ureg_dst, impl->reg_alloc);
+ /* Set up the struct ntt_blocks to put insns in */
+ c->blocks = _mesa_pointer_hash_table_create(c);
+ nir_foreach_block(block, impl) {
+ struct ntt_block *ntt_block = rzalloc(c->blocks, struct ntt_block);
+ util_dynarray_init(&ntt_block->insns, ntt_block);
+ _mesa_hash_table_insert(c->blocks, block, ntt_block);
+ }
+
+ c->cur_block = ntt_block_from_nir(c, nir_start_block(impl));
+ ntt_setup_inputs(c);
+ ntt_setup_outputs(c);
+ ntt_setup_uniforms(c);
+
ntt_setup_registers(c, &impl->registers);
+
+ /* Emit the ntt insns */
ntt_emit_cf_list(c, &impl->body);
+ /* Turn the ntt insns into actual TGSI tokens */
+ ntt_emit_cf_list_ureg(c, &impl->body);
+
ralloc_free(c->liveness);
c->liveness = NULL;
+
}
static int
@@ -3200,10 +3448,6 @@ const void *nir_to_tgsi_options(struct nir_shader *s,
c->ureg = ureg_create(pipe_shader_type_from_mesa(s->info.stage));
ureg_setup_shader_info(c->ureg, &s->info);
- ntt_setup_inputs(c);
- ntt_setup_outputs(c);
- ntt_setup_uniforms(c);
-
if (s->info.stage == MESA_SHADER_FRAGMENT) {
/* The draw module's polygon stipple layer doesn't respect the chosen
* coordinate mode, so leave it as unspecified unless we're actually
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index 1aff513813d..272d865a2db 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -210,6 +210,7 @@ OP12(INTERP_OFFSET)
#undef OP11
#undef OP12
#undef OP13
+#undef OP14
#undef OP00_LBL
#undef OP01_LBL
More information about the mesa-commit
mailing list