[Mesa-dev] [PATCH] nvc0/lowering: Handle conversions to U64/S64 manually
Pierre Moreau
pierre.morrow at free.fr
Sat Mar 19 21:56:57 UTC 2016
Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the GPU
unhappy. Instead, manually handle the conversion between 64-bit and 32-bit
values, and use `cvt` to convert between the original target (resp. source)
and 32-bit value. This happens to be the behaviour of NVIDIA's driver.
Signed-off-by: Pierre Moreau <pierre.morrow at free.fr>
---
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 59 ++++++++++++++++++++++
.../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 +
2 files changed, 60 insertions(+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 2719f2c..c419a68 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i)
return true;
}
+bool
+NVC0LoweringPass::handleCVT(Instruction *i)
+{
+ if (isFloatType(i->dType) || isFloatType(i->sType) ||
+ isSignedIntType(i->dType) xor isSignedIntType(i->sType))
+ return false;
+
+ if (typeSizeof(i->sType) == 8) {
+ Value *toSplit = i->getSrc(0);
+ if (i->saturate) {
+ Value *minValue = bld.loadImm(bld.getSSA(8), 0ul);
+ Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX);
+ if (isSignedType(i->sType)) {
+ minValue = bld.loadImm(bld.getSSA(8), INT32_MIN);
+ maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX);
+ }
+ Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8), toSplit,
+ minValue);
+ toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes,
+ maxValue);
+ }
+
+ Value *value32[2] = { bld.getSSA(), bld.getSSA() };
+ bld.mkSplit(value32, 4, toSplit);
+ if (typeSizeof(i->dType) == 4) {
+ bld.mkMov(i->getDef(0), value32[0], i->dType);
+ delete_Instruction(prog, i);
+ return true;
+ }
+
+ i->setSrc(0, bld.getSSA());
+ i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
+ bld.mkMov(i->getSrc(0), value32[0], i->sType);
+ } else if (typeSizeof(i->dType) == 8) {
+ bld.setPosition(i, true);
+ Value *res = i->getDef(0);
+ Value *high32 = bld.loadImm(bld.getSSA(),
+ isSignedType(i->sType) ? UINT32_MAX : 0u);
+ Value *low32 = i->getSrc(0);
+ DataType resType = i->dType;
+
+ if (typeSizeof(i->sType) <= 2) {
+ i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
+ low32 = bld.getSSA();
+ i->setDef(0, low32);
+ } else if (typeSizeof(i->sType) == 4) {
+ delete_Instruction(prog, i);
+ }
+
+ Value *merged64 = bld.mkOp2v(OP_MERGE, resType, bld.getSSA(8), low32,
+ high32);
+ bld.mkMov(res, merged64, resType);
+ }
+
+ return true;
+}
+
// Generate a binary predicate if an instruction is predicated by
// e.g. an f32 value.
void
@@ -1894,6 +1951,8 @@ NVC0LoweringPass::visit(Instruction *i)
checkPredicate(i);
switch (i->op) {
+ case OP_CVT:
+ return handleCVT(i);
case OP_TEX:
case OP_TXB:
case OP_TXL:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 6eb8aff..9fc24d9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -96,6 +96,7 @@ protected:
bool handleMOD(Instruction *);
bool handleSQRT(Instruction *);
bool handlePOW(Instruction *);
+ bool handleCVT(Instruction *);
bool handleTEX(TexInstruction *);
bool handleTXD(TexInstruction *);
bool handleTXQ(TexInstruction *);
--
2.7.4
More information about the mesa-dev
mailing list