[Mesa-dev] [PATCH v2 5/6] nvc0/ir: add support for new bitfield manipulation opcodes
Ilia Mirkin
imirkin at alum.mit.edu
Sat Apr 26 09:17:00 PDT 2014
This adds support for:
IBFE, UBFE, BFI, LSB, IMSB, UMSB, BREV, POPC
Which are all required for ARB_gs5 support.
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
src/gallium/drivers/nouveau/codegen/nv50_ir.h | 2 +
.../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 35 ++++++++++++
.../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 17 ++++++
.../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 64 +++++++++++++++++++++-
.../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 +
.../drivers/nouveau/codegen/nv50_ir_target.cpp | 5 +-
.../nouveau/codegen/nv50_ir_target_nvc0.cpp | 7 ++-
7 files changed, 127 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index c57729e..919d3a4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -143,6 +143,7 @@ enum operation
OP_POPCNT, // bitcount(src0 & src1)
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
+ OP_BFIND, // find highest/lowest set bit
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
OP_ATOM,
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
@@ -171,6 +172,7 @@ enum operation
#define NV50_IR_SUBOP_TEXBAR(n) n
#define NV50_IR_SUBOP_MOV_FINAL 1
#define NV50_IR_SUBOP_EXTBF_REV 1
+#define NV50_IR_SUBOP_BFIND_SAMT 1
#define NV50_IR_SUBOP_PERMT_F4E 1
#define NV50_IR_SUBOP_PERMT_B4E 2
#define NV50_IR_SUBOP_PERMT_RC8 3
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index c258b6b..63d5525 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -94,6 +94,8 @@ private:
void emitLogicOp(const Instruction *, uint8_t subOp);
void emitPOPC(const Instruction *);
void emitINSBF(const Instruction *);
+ void emitEXTBF(const Instruction *);
+ void emitBFIND(const Instruction *);
void emitShift(const Instruction *);
void emitSFnOp(const Instruction *, uint8_t subOp);
@@ -696,6 +698,30 @@ CodeEmitterGK110::emitINSBF(const Instruction *i)
}
void
+CodeEmitterGK110::emitEXTBF(const Instruction *i)
+{
+ emitForm_21(i, 0x600, 0xc00);
+
+ if (i->dType == TYPE_S32)
+ code[1] |= 0x80000;
+ if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
+ code[1] |= 0x800;
+}
+
+void
+CodeEmitterGK110::emitBFIND(const Instruction *i)
+{
+ emitForm_21(i, 0x618, 0xc18);
+
+ if (i->dType == TYPE_S32)
+ code[1] |= 0x80000;
+ if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+ code[1] |= 0x800;
+ if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
+ code[1] |= 0x1000;
+}
+
+void
CodeEmitterGK110::emitShift(const Instruction *i)
{
if (i->op == OP_SHR) {
@@ -1725,6 +1751,15 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
case OP_POPCNT:
emitPOPC(insn);
break;
+ case OP_INSBF:
+ emitINSBF(insn);
+ break;
+ case OP_EXTBF:
+ emitEXTBF(insn);
+ break;
+ case OP_BFIND:
+ emitBFIND(insn);
+ break;
case OP_JOIN:
emitNOP(insn);
insn->join = 1;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index cef92cf..11a7c2b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -104,6 +104,7 @@ private:
void emitPOPC(const Instruction *);
void emitINSBF(const Instruction *);
void emitEXTBF(const Instruction *);
+ void emitBFIND(const Instruction *);
void emitPERMT(const Instruction *);
void emitShift(const Instruction *);
@@ -804,6 +805,19 @@ CodeEmitterNVC0::emitEXTBF(const Instruction *i)
}
void
+CodeEmitterNVC0::emitBFIND(const Instruction *i)
+{
+ emitForm_B(i, HEX64(78000000, 00000003));
+
+ if (i->dType == TYPE_S32)
+ code[0] |= 1 << 5;
+ if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+ code[0] |= 1 << 8;
+ if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
+ code[0] |= 1 << 6;
+}
+
+void
CodeEmitterNVC0::emitPERMT(const Instruction *i)
{
emitForm_A(i, HEX64(24000000, 00000004));
@@ -2382,6 +2396,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_EXTBF:
emitEXTBF(insn);
break;
+ case OP_BFIND:
+ emitBFIND(insn);
+ break;
case OP_PERMT:
emitPERMT(insn);
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index fc418bf..05a79a3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -418,6 +418,8 @@ nv50_ir::DataType Instruction::inferSrcType() const
case TGSI_OPCODE_ATOMXOR:
case TGSI_OPCODE_ATOMUMIN:
case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_UBFE:
+ case TGSI_OPCODE_UMSB:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_IDIV:
@@ -434,6 +436,8 @@ nv50_ir::DataType Instruction::inferSrcType() const
case TGSI_OPCODE_UARL:
case TGSI_OPCODE_ATOMIMIN:
case TGSI_OPCODE_ATOMIMAX:
+ case TGSI_OPCODE_IBFE:
+ case TGSI_OPCODE_IMSB:
return nv50_ir::TYPE_S32;
default:
return nv50_ir::TYPE_F32;
@@ -625,6 +629,15 @@ static nv50_ir::operation translateOpcode(uint opcode)
NV50_IR_OPCODE_CASE(TXB2, TXB);
NV50_IR_OPCODE_CASE(TXL2, TXL);
+ NV50_IR_OPCODE_CASE(IBFE, EXTBF);
+ NV50_IR_OPCODE_CASE(UBFE, EXTBF);
+ NV50_IR_OPCODE_CASE(BFI, INSBF);
+ NV50_IR_OPCODE_CASE(BREV, EXTBF);
+ NV50_IR_OPCODE_CASE(POPC, POPCNT);
+ NV50_IR_OPCODE_CASE(LSB, BFIND);
+ NV50_IR_OPCODE_CASE(IMSB, BFIND);
+ NV50_IR_OPCODE_CASE(UMSB, BFIND);
+
NV50_IR_OPCODE_CASE(END, EXIT);
default:
@@ -2137,7 +2150,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
Instruction *geni;
Value *dst0[4], *rDst0[4];
- Value *src0, *src1, *src2;
+ Value *src0, *src1, *src2, *src3;
Value *val0, *val1;
int c;
@@ -2688,6 +2701,55 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
case TGSI_OPCODE_ATOMIMAX:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
+ case TGSI_OPCODE_IBFE:
+ case TGSI_OPCODE_UBFE:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+ mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_BFI:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ src3 = fetchSrc(3, c);
+ mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2);
+ mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0);
+ }
+ break;
+ case TGSI_OPCODE_LSB:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000));
+ geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0);
+ geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+ }
+ break;
+ case TGSI_OPCODE_IMSB:
+ case TGSI_OPCODE_UMSB:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ mkOp1(OP_BFIND, srcTy, dst0[c], src0);
+ }
+ break;
+ case TGSI_OPCODE_BREV:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
+ geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ }
+ break;
+ case TGSI_OPCODE_POPC:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
+ }
+ break;
default:
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
assert(0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 42013e5..f788c72 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -173,6 +173,7 @@ const char *operationStr[OP_LAST + 1] =
"popcnt",
"insbf",
"extbf",
+ "bfind",
"permt",
"atom",
"bar",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index f479cf4..4ca5687 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -50,7 +50,7 @@ const uint8_t Target::operationSrcNr[] =
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
- 2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT
+ 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
@@ -115,8 +115,9 @@ const OpClass Target::operationClass[] =
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
- // POPCNT, INSBF, EXTBF, PERMT
+ // POPCNT, INSBF, EXTBF, BFIND; PERMT
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
+ OPCLASS_BITFIELD,
// ATOM, BAR
OPCLASS_ATOMIC, OPCLASS_CONTROL,
// VADD, VAVG, VMIN, VMAX
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index c747f3e..395d5b5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -124,7 +124,10 @@ static const struct opProperties _initProps[] =
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
- { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
+ { OP_POPCNT, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 },
+ { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
+ { OP_EXTBF, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
+ { OP_BFIND, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1 },
{ OP_PERMT, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
@@ -393,6 +396,8 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
case OP_AND:
case OP_OR:
case OP_XOR:
+ case OP_POPCNT:
+ case OP_BFIND:
break;
case OP_SET:
if (insn->sType != TYPE_F32)
--
1.8.3.2
More information about the mesa-dev
mailing list