[Mesa-dev] [PATCH 11/12] nv50/ir: add atomics support on shared memory for Fermi
Samuel Pitoiset
samuel.pitoiset at gmail.com
Sun Feb 7 09:47:41 UTC 2016
On 02/07/2016 12:23 AM, Ilia Mirkin wrote:
> On Sat, Feb 6, 2016 at 5:38 PM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>> ---
>> .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 1 +
>> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 102 ++++++++++++++++++++-
>> .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 +
>> 3 files changed, 102 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
>> index f6605eb..42b2a84 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
>> @@ -398,6 +398,7 @@ CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
>> srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
>> break;
>> default:
>> + srcId(i->src(s), 49);
>
> Yeah.... no :) I'd want to see some assert's here to make sure that
> this is what you think it is. Also, as I recall this is related to
> SELP emission, nothing here.
Oh right, I forgot to clean up this part. :-)
>
>> // ignore here, can be predicate or flags, but must not be address
>> break;
>> }
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> index e7cb54b..243e23a 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> @@ -1033,6 +1033,99 @@ NVC0LoweringPass::handleSUQ(Instruction *suq)
>> return true;
>> }
>>
>> +void
>> +NVC0LoweringPass::handleSharedATOM(Instruction *atom)
>> +{
>> + assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
>> +
>> + BasicBlock *currBB = atom->bb;
>> + BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false);
>> + BasicBlock *joinBB = atom->bb->splitAfter(atom);
>> +
>> + bld.setPosition(currBB, true);
>> + assert(!currBB->joinAt);
>> + currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
>> +
>> + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL);
>> + currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE);
>> +
>> + bld.setPosition(tryLockAndSetBB, true);
>> +
>> + Instruction *ld =
>> + bld.mkLoad(TYPE_U32, atom->getDef(0),
>> + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL);
>> + ld->setDef(1, bld.getSSA(1, FILE_PREDICATE));
>> + ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED;
>> +
>> + Value *stVal;
>> + if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
>> + // Read the old value, and write the new one.
>> + stVal = atom->getSrc(1);
>> + } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) {
>> + CmpInstruction *set =
>> + bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
>> + TYPE_U32, ld->getDef(0), atom->getSrc(1));
>> + set->setPredicate(CC_P, ld->getDef(1));
>> +
>> + CmpInstruction *selp =
>> + bld.mkCmp(OP_SELP, CC_NOT_P, TYPE_U32, bld.getSSA(4, FILE_ADDRESS),
>> + TYPE_U32, ld->getDef(0), atom->getSrc(2),
>> + set->getDef(0));
>> + selp->setPredicate(CC_P, ld->getDef(1));
>> +
>> + stVal = selp->getDef(0);
>> + } else {
>> + operation op;
>> +
>> + switch (atom->subOp) {
>> + case NV50_IR_SUBOP_ATOM_ADD:
>> + op = OP_ADD;
>> + break;
>> + case NV50_IR_SUBOP_ATOM_AND:
>> + op = OP_AND;
>> + break;
>> + case NV50_IR_SUBOP_ATOM_OR:
>> + op = OP_OR;
>> + break;
>> + case NV50_IR_SUBOP_ATOM_XOR:
>> + op = OP_XOR;
>> + break;
>> + case NV50_IR_SUBOP_ATOM_MIN:
>> + op = OP_MIN;
>> + break;
>> + case NV50_IR_SUBOP_ATOM_MAX:
>> + op = OP_MAX;
>> + break;
>> + default:
>> + assert(0);
>> + }
>> +
>> + Instruction *i =
>> + bld.mkOp2(op, atom->dType, bld.getSSA(4, FILE_ADDRESS), ld->getDef(0),
>> + atom->getSrc(1));
>
> Why is this FILE_ADDRESS? This is just a regular operation, nothing to
> do with address registers. Just bld.getSSA() should be fine here.
Ok.
>
>> + i->setPredicate(CC_P, ld->getDef(1));
>> +
>> + stVal = i->getDef(0);
>> + }
>> +
>> + Instruction *st =
>> + bld.mkStore(OP_STORE, TYPE_U32,
>> + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0),
>> + NULL, stVal);
>> + st->setPredicate(CC_P, ld->getDef(1));
>> + st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED;
>> +
>> + // Loop until the lock is acquired.
>> + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1));
>> + tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK);
>> + bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);
>
> You need an edge to the joinBB as well, no? (a CROSS edge, I guess).
Mmmh... Yeah probably.
>
>> +
>> + bld.remove(atom);
>> +
>> + bld.setPosition(joinBB, false);
>> + bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
>> +}
>> +
>> bool
>> NVC0LoweringPass::handleATOM(Instruction *atom)
>> {
>> @@ -1044,8 +1137,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
>> sv = SV_LBASE;
>> break;
>> case FILE_MEMORY_SHARED:
>> - sv = SV_SBASE;
>> - break;
>> + handleSharedATOM(atom);
>> + return true;
>> default:
>> assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
>> @@ -1072,6 +1165,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
>> bool
>> NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
>> {
>> + if (cas->src(0).getFile() == FILE_MEMORY_SHARED) {
>> + // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM().
>> + return false;
>> + }
>> +
>> if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS &&
>> cas->subOp != NV50_IR_SUBOP_ATOM_EXCH)
>> return false;
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>> index 09ec7e6..6eb8aff 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>> @@ -105,6 +105,7 @@ protected:
>> bool handleATOM(Instruction *);
>> bool handleCasExch(Instruction *, bool needCctl);
>> void handleSurfaceOpNVE4(TexInstruction *);
>> + void handleSharedATOM(Instruction *);
>>
>> void checkPredicate(Instruction *);
>>
>> --
>> 2.6.4
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list