[Mesa-dev] [PATCH 10/11] nv50/ir: add atomics support on shared memory for Kepler

Samuel Pitoiset samuel.pitoiset at gmail.com
Sat Feb 27 14:02:06 UTC 2016

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 112 ++++++++++++++++++++-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |   1 +
 2 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 2928963..0a4e494 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1034,6 +1034,112 @@ NVC0LoweringPass::handleSUQ(Instruction *suq)
+NVC0LoweringPass::handleSharedATOMNVE4(Instruction *atom)
+   assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
+   BasicBlock *currBB = atom->bb;
+   BasicBlock *tryLockBB = atom->bb->splitBefore(atom, false);
+   BasicBlock *joinBB = atom->bb->splitAfter(atom);
+   BasicBlock *setAndUnlockBB = new BasicBlock(func);
+   BasicBlock *failLockBB = new BasicBlock(func);
+   bld.setPosition(currBB, true);
+   assert(!currBB->joinAt);
+   currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
+   Instruction *pred = bld.mkMov(bld.getSSA(1, FILE_PREDICATE),
+                                 bld.mkImm(0), TYPE_U32);
+   bld.mkFlow(OP_BRA, tryLockBB, CC_ALWAYS, NULL);
+   currBB->cfg.attach(&tryLockBB->cfg, Graph::Edge::TREE);
+   bld.setPosition(tryLockBB, true);
+   Instruction *ld =
+      bld.mkLoad(TYPE_U32, atom->getDef(0),
+                 bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL);
+   ld->setDef(1, bld.getSSA(1, FILE_PREDICATE));
+   ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED;
+   bld.mkFlow(OP_BRA, setAndUnlockBB, CC_P, ld->getDef(1));
+   bld.mkFlow(OP_BRA, failLockBB, CC_ALWAYS, NULL);
+   tryLockBB->cfg.attach(&failLockBB->cfg, Graph::Edge::CROSS);
+   tryLockBB->cfg.attach(&setAndUnlockBB->cfg, Graph::Edge::TREE);
+   tryLockBB->cfg.detach(&joinBB->cfg);
+   bld.remove(atom);
+   bld.setPosition(setAndUnlockBB, true);
+   Value *stVal;
+   if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
+      // Read the old value, and write the new one.
+      stVal = atom->getSrc(1);
+   } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+      CmpInstruction *set =
+         bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(),
+                   TYPE_U32, ld->getDef(0), atom->getSrc(1));
+      CmpInstruction *slct =
+         bld.mkCmp(OP_SLCT, CC_NE, TYPE_U32, bld.getSSA(),
+                   TYPE_U32, atom->getSrc(2), ld->getDef(0), set->getDef(0));
+      stVal = slct->getDef(0);
+   } else {
+      operation op;
+      switch (atom->subOp) {
+      case NV50_IR_SUBOP_ATOM_ADD:
+         op = OP_ADD;
+         break;
+      case NV50_IR_SUBOP_ATOM_AND:
+         op = OP_AND;
+         break;
+      case NV50_IR_SUBOP_ATOM_OR:
+         op = OP_OR;
+         break;
+      case NV50_IR_SUBOP_ATOM_XOR:
+         op = OP_XOR;
+         break;
+      case NV50_IR_SUBOP_ATOM_MIN:
+         op = OP_MIN;
+         break;
+      case NV50_IR_SUBOP_ATOM_MAX:
+         op = OP_MAX;
+         break;
+      default:
+         assert(0);
+      }
+      Instruction *i =
+         bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0),
+                   atom->getSrc(1));
+      stVal = i->getDef(0);
+   }
+   Instruction *st =
+      bld.mkStore(OP_STORE, TYPE_U32,
+                  bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0),
+                  NULL, stVal);
+   st->setDef(0, pred->getDef(0));
+   bld.mkFlow(OP_BRA, failLockBB, CC_ALWAYS, NULL);
+   setAndUnlockBB->cfg.attach(&failLockBB->cfg, Graph::Edge::TREE);
+   // Lock until the store has not been performed.
+   bld.setPosition(failLockBB, true);
+   bld.mkFlow(OP_BRA, tryLockBB, CC_NOT_P, pred->getDef(0));
+   bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);
+   failLockBB->cfg.attach(&tryLockBB->cfg, Graph::Edge::BACK);
+   failLockBB->cfg.attach(&joinBB->cfg, Graph::Edge::TREE);
+   bld.setPosition(joinBB, false);
+   bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
 NVC0LoweringPass::handleSharedATOM(Instruction *atom)
    assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
@@ -1138,7 +1244,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
       sv = SV_LBASE;
-      handleSharedATOM(atom);
+      if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
+         handleSharedATOMNVE4(atom);
+      } else {
+         handleSharedATOM(atom);
+      }
       return true;
       assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 3872f52..cf3e98f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -106,6 +106,7 @@ protected:
    bool handleCasExch(Instruction *, bool needCctl);
    void handleSurfaceOpNVE4(TexInstruction *);
    void handleSharedATOM(Instruction *);
+   void handleSharedATOMNVE4(Instruction *);
    void checkPredicate(Instruction *);

More information about the mesa-dev mailing list