[Mesa-dev] [PATCH v3 07/12] nvc0/ir: add support for compute UBOs on Kepler
Samuel Pitoiset
samuel.pitoiset at gmail.com
Fri Apr 1 16:56:03 UTC 2016
Make sure to avoid out of bounds access in presence of indirect
array indexing by loading the size from the driver constant buffer.
Changes from v2:
- add a todo for clamping the offset to the max possible const buf
- rework the lowering pass
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 55 +++++++++++++++++++++-
.../nouveau/codegen/nv50_ir_lowering_nvc0.h | 3 ++
2 files changed, 57 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 850147b..5a08fc7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1322,6 +1322,24 @@ NVC0LoweringPass::loadBufLength32(Value *ptr, uint32_t off)
}
inline Value *
+NVC0LoweringPass::loadUboInfo32(Value *ptr, uint32_t off)
+{
+ return loadResInfo32(ptr, off, prog->driver->io.uboInfoBase);
+}
+
+inline Value *
+NVC0LoweringPass::loadUboInfo64(Value *ptr, uint32_t off)
+{
+ return loadResInfo64(ptr, off, prog->driver->io.uboInfoBase);
+}
+
+inline Value *
+NVC0LoweringPass::loadUboLength32(Value *ptr, uint32_t off)
+{
+ return loadResLength32(ptr, off, prog->driver->io.uboInfoBase);
+}
+
+inline Value *
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
{
uint8_t b = prog->driver->io.msInfoCBSlot;
@@ -1711,7 +1729,42 @@ NVC0LoweringPass::handleLDST(Instruction *i)
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
}
} else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
- if (i->src(0).isIndirect(1)) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET &&
+ prog->getType() == Program::TYPE_COMPUTE) {
+ // The launch descriptor only allows to set up 8 CBs, but OpenGL
+ // requires at least 12 UBOs. To bypass this limitation, we store the
+ // addrs into the driver constbuf and we directly load from the global
+ // memory.
+ int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1;
+ Value *ind = i->getIndirect(0, 1);
+ Value *ptr = loadUboInfo64(ind, fileIndex * 16);
+
+ // TODO: clamp the offset to the maximum number of const buf.
+ if (i->src(0).isIndirect(1)) {
+ Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
+ Value *length = loadUboLength32(ind, fileIndex * 16);
+ Value *pred = new_LValue(func, FILE_PREDICATE);
+ if (i->src(0).isIndirect(0)) {
+ bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
+ bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
+ }
+ i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
+ i->setIndirect(0, 1, NULL);
+ i->setIndirect(0, 0, ptr);
+ bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
+ i->setPredicate(CC_NOT_P, pred);
+ if (i->defExists(0)) {
+ bld.mkMov(i->getDef(0), bld.mkImm(0));
+ }
+ } else if (i->getSrc(0)->reg.fileIndex > 0) {
+ if (i->src(0).isIndirect(0)) {
+ bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
+ }
+ i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
+ i->setIndirect(0, 1, NULL);
+ i->setIndirect(0, 0, ptr);
+ }
+ } else if (i->src(0).isIndirect(1)) {
Value *ptr;
if (i->src(0).isIndirect(0))
ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(),
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index be81d29..aa19249 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -127,6 +127,9 @@ private:
Value *loadBufInfo32(Value *ptr, uint32_t off);
Value *loadBufInfo64(Value *ptr, uint32_t off);
Value *loadBufLength32(Value *ptr, uint32_t off);
+ Value *loadUboInfo32(Value *ptr, uint32_t off);
+ Value *loadUboInfo64(Value *ptr, uint32_t off);
+ Value *loadUboLength32(Value *ptr, uint32_t off);
Value *loadMsInfo32(Value *ptr, uint32_t off);
Value *loadTexHandle(Value *ptr, unsigned int slot);
--
2.7.4
More information about the mesa-dev
mailing list