[Mesa-dev] [PATCH v2 5/6] nvc0/ir: add support for new bitfield manipulation opcodes

Ilia Mirkin imirkin at alum.mit.edu
Sat Apr 26 09:17:00 PDT 2014


This adds support for:

IBFE, UBFE, BFI, LSB, IMSB, UMSB, BREV, POPC

Which are all required for ARB_gs5 support.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h      |  2 +
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 35 ++++++++++++
 .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 17 ++++++
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 64 +++++++++++++++++++++-
 .../drivers/nouveau/codegen/nv50_ir_print.cpp      |  1 +
 .../drivers/nouveau/codegen/nv50_ir_target.cpp     |  5 +-
 .../nouveau/codegen/nv50_ir_target_nvc0.cpp        |  7 ++-
 7 files changed, 127 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index c57729e..919d3a4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -143,6 +143,7 @@ enum operation
    OP_POPCNT, // bitcount(src0 & src1)
    OP_INSBF,  // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
    OP_EXTBF,  // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
+   OP_BFIND,  // find highest/lowest set bit
    OP_PERMT,  // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
    OP_ATOM,
    OP_BAR,    // execution barrier, sources = { id, thread count, predicate }
@@ -171,6 +172,7 @@ enum operation
 #define NV50_IR_SUBOP_TEXBAR(n)    n
 #define NV50_IR_SUBOP_MOV_FINAL    1
 #define NV50_IR_SUBOP_EXTBF_REV    1
+#define NV50_IR_SUBOP_BFIND_SAMT   1
 #define NV50_IR_SUBOP_PERMT_F4E    1
 #define NV50_IR_SUBOP_PERMT_B4E    2
 #define NV50_IR_SUBOP_PERMT_RC8    3
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index c258b6b..63d5525 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -94,6 +94,8 @@ private:
    void emitLogicOp(const Instruction *, uint8_t subOp);
    void emitPOPC(const Instruction *);
    void emitINSBF(const Instruction *);
+   void emitEXTBF(const Instruction *);
+   void emitBFIND(const Instruction *);
    void emitShift(const Instruction *);
 
    void emitSFnOp(const Instruction *, uint8_t subOp);
@@ -696,6 +698,30 @@ CodeEmitterGK110::emitINSBF(const Instruction *i)
 }
 
 void
+CodeEmitterGK110::emitEXTBF(const Instruction *i)
+{
+   emitForm_21(i, 0x600, 0xc00);
+
+   if (i->dType == TYPE_S32)
+      code[1] |= 0x80000;
+   if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
+      code[1] |= 0x800;
+}
+
+void
+CodeEmitterGK110::emitBFIND(const Instruction *i)
+{
+   emitForm_21(i, 0x618, 0xc18);
+
+   if (i->dType == TYPE_S32)
+      code[1] |= 0x80000;
+   if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+      code[1] |= 0x800;
+   if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
+      code[1] |= 0x1000;
+}
+
+void
 CodeEmitterGK110::emitShift(const Instruction *i)
 {
    if (i->op == OP_SHR) {
@@ -1725,6 +1751,15 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
    case OP_POPCNT:
       emitPOPC(insn);
       break;
+   case OP_INSBF:
+      emitINSBF(insn);
+      break;
+   case OP_EXTBF:
+      emitEXTBF(insn);
+      break;
+   case OP_BFIND:
+      emitBFIND(insn);
+      break;
    case OP_JOIN:
       emitNOP(insn);
       insn->join = 1;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index cef92cf..11a7c2b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -104,6 +104,7 @@ private:
    void emitPOPC(const Instruction *);
    void emitINSBF(const Instruction *);
    void emitEXTBF(const Instruction *);
+   void emitBFIND(const Instruction *);
    void emitPERMT(const Instruction *);
    void emitShift(const Instruction *);
 
@@ -804,6 +805,19 @@ CodeEmitterNVC0::emitEXTBF(const Instruction *i)
 }
 
 void
+CodeEmitterNVC0::emitBFIND(const Instruction *i)
+{
+   emitForm_B(i, HEX64(78000000, 00000003));
+
+   if (i->dType == TYPE_S32)
+      code[0] |= 1 << 5;
+   if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+      code[0] |= 1 << 8;
+   if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
+      code[0] |= 1 << 6;
+}
+
+void
 CodeEmitterNVC0::emitPERMT(const Instruction *i)
 {
    emitForm_A(i, HEX64(24000000, 00000004));
@@ -2382,6 +2396,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
    case OP_EXTBF:
       emitEXTBF(insn);
       break;
+   case OP_BFIND:
+      emitBFIND(insn);
+      break;
    case OP_PERMT:
       emitPERMT(insn);
       break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index fc418bf..05a79a3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -418,6 +418,8 @@ nv50_ir::DataType Instruction::inferSrcType() const
    case TGSI_OPCODE_ATOMXOR:
    case TGSI_OPCODE_ATOMUMIN:
    case TGSI_OPCODE_ATOMUMAX:
+   case TGSI_OPCODE_UBFE:
+   case TGSI_OPCODE_UMSB:
       return nv50_ir::TYPE_U32;
    case TGSI_OPCODE_I2F:
    case TGSI_OPCODE_IDIV:
@@ -434,6 +436,8 @@ nv50_ir::DataType Instruction::inferSrcType() const
    case TGSI_OPCODE_UARL:
    case TGSI_OPCODE_ATOMIMIN:
    case TGSI_OPCODE_ATOMIMAX:
+   case TGSI_OPCODE_IBFE:
+   case TGSI_OPCODE_IMSB:
       return nv50_ir::TYPE_S32;
    default:
       return nv50_ir::TYPE_F32;
@@ -625,6 +629,15 @@ static nv50_ir::operation translateOpcode(uint opcode)
    NV50_IR_OPCODE_CASE(TXB2, TXB);
    NV50_IR_OPCODE_CASE(TXL2, TXL);
 
+   NV50_IR_OPCODE_CASE(IBFE, EXTBF);
+   NV50_IR_OPCODE_CASE(UBFE, EXTBF);
+   NV50_IR_OPCODE_CASE(BFI, INSBF);
+   NV50_IR_OPCODE_CASE(BREV, EXTBF);
+   NV50_IR_OPCODE_CASE(POPC, POPCNT);
+   NV50_IR_OPCODE_CASE(LSB, BFIND);
+   NV50_IR_OPCODE_CASE(IMSB, BFIND);
+   NV50_IR_OPCODE_CASE(UMSB, BFIND);
+
    NV50_IR_OPCODE_CASE(END, EXIT);
 
    default:
@@ -2137,7 +2150,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
    Instruction *geni;
 
    Value *dst0[4], *rDst0[4];
-   Value *src0, *src1, *src2;
+   Value *src0, *src1, *src2, *src3;
    Value *val0, *val1;
    int c;
 
@@ -2688,6 +2701,55 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
    case TGSI_OPCODE_ATOMIMAX:
       handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
       break;
+   case TGSI_OPCODE_IBFE:
+   case TGSI_OPCODE_UBFE:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = fetchSrc(0, c);
+         src1 = fetchSrc(1, c);
+         src2 = fetchSrc(2, c);
+         mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+         mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
+      }
+      break;
+   case TGSI_OPCODE_BFI:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = fetchSrc(0, c);
+         src1 = fetchSrc(1, c);
+         src2 = fetchSrc(2, c);
+         src3 = fetchSrc(3, c);
+         mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2);
+         mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0);
+      }
+      break;
+   case TGSI_OPCODE_LSB:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = fetchSrc(0, c);
+         geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000));
+         geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+         geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0);
+         geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+      }
+      break;
+   case TGSI_OPCODE_IMSB:
+   case TGSI_OPCODE_UMSB:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = fetchSrc(0, c);
+         mkOp1(OP_BFIND, srcTy, dst0[c], src0);
+      }
+      break;
+   case TGSI_OPCODE_BREV:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = fetchSrc(0, c);
+         geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
+         geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+      }
+      break;
+   case TGSI_OPCODE_POPC:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = fetchSrc(0, c);
+         mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
+      }
+      break;
    default:
       ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
       assert(0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 42013e5..f788c72 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -173,6 +173,7 @@ const char *operationStr[OP_LAST + 1] =
    "popcnt",
    "insbf",
    "extbf",
+   "bfind",
    "permt",
    "atom",
    "bar",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index f479cf4..4ca5687 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -50,7 +50,7 @@ const uint8_t Target::operationSrcNr[] =
    0,                      // TEXBAR
    1, 1,                   // DFDX, DFDY
    1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
-   2, 3, 2, 3,             // POPCNT, INSBF, EXTBF, PERMT
+   2, 3, 2, 1, 3,          // POPCNT, INSBF, EXTBF, BFIND, PERMT
    2, 2,                   // ATOM, BAR
    2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
    2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
@@ -115,8 +115,9 @@ const OpClass Target::operationClass[] =
    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
-   // POPCNT, INSBF, EXTBF, PERMT
+   // POPCNT, INSBF, EXTBF, BFIND; PERMT
    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
+   OPCLASS_BITFIELD,
    // ATOM, BAR
    OPCLASS_ATOMIC, OPCLASS_CONTROL,
    // VADD, VAVG, VMIN, VMAX
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index c747f3e..395d5b5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -124,7 +124,10 @@ static const struct opProperties _initProps[] =
    { OP_DFDX,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
    { OP_DFDY,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
    { OP_CALL,   0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
-   { OP_INSBF,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
+   { OP_POPCNT, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 },
+   { OP_INSBF,  0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
+   { OP_EXTBF,  0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
+   { OP_BFIND,  0x0, 0x0, 0x1, 0x0, 0x1, 0x1 },
    { OP_PERMT,  0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
    { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
    { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
@@ -393,6 +396,8 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
       case OP_AND:
       case OP_OR:
       case OP_XOR:
+      case OP_POPCNT:
+      case OP_BFIND:
          break;
       case OP_SET:
          if (insn->sType != TYPE_F32)
-- 
1.8.3.2



More information about the mesa-dev mailing list