[Mesa-dev] [PATCH 05/11] nvc0: allow to use more than 7 UBOs for compute on Kepler

Samuel Pitoiset samuel.pitoiset at gmail.com
Sat Feb 27 14:02:01 UTC 2016


The launch descriptor only allows to set up 8 CBs, but OpenGL
requires at least 14 UBOs. To bypass this limitation, we store
the addrs into the driver constbuf and we directly load from
the global memory.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h       |  1 +
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 22 +++++++++++++++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  6 +++++-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  1 +
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c    | 25 ++++++++++++++++++++++
 5 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 479e426..a66aa67 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -183,6 +183,7 @@ struct nv50_ir_prog_info
       uint16_t sampleInfoBase;   /* base address for sample positions */
       uint8_t msInfoCBSlot;      /* cX[] used for multisample info */
       uint16_t msInfoBase;       /* base address for multisample info */
+      uint16_t uboInfoBase;      /* base address for compute UBOs (gk104+) */
    } io;
 
    /* driver callback to assign input/output locations */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index d6dfed3..2928963 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1997,6 +1997,28 @@ NVC0LoweringPass::visit(Instruction *i)
             i->setIndirect(0, 0, ptr);
             i->subOp = NV50_IR_SUBOP_LDC_IS;
          }
+
+         if (targ->getChipset() >= NVISA_GK104_CHIPSET &&
+             prog->getType() == Program::TYPE_COMPUTE) {
+            /* The launch descriptor only allows to set up 8 CBs, but OpenGL
+             * requires at least 14 UBOs. To bypass this limitation, we store
+             * the addrs into the driver constbuf and we directly load from the
+             * global memory. */
+            if (i->getSrc(0)->reg.fileIndex >= 7) {
+               uint32_t addr = prog->driver->io.uboInfoBase;
+               uint8_t b = prog->driver->io.resInfoCBSlot;
+
+               addr += (i->getSrc(0)->reg.fileIndex % 7) * 0x8;
+
+               Instruction *ld = bld.mkLoad(TYPE_U64, bld.getSSA(8, FILE_GPR),
+                  bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, addr), NULL);
+
+               bld.mkLoad(i->dType, i->getDef(0),
+                          bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0),
+                                       ld->getDef(0));
+               bld.remove(i);
+            }
+         }
       } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
          assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
          i->op = OP_VFETCH;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index dcb0bda..06c1fc6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -91,7 +91,8 @@
 #define NVC0_BIND_CP_SCREEN      51
 #define NVC0_BIND_CP_QUERY       52
 #define NVC0_BIND_CP_BUF         53
-#define NVC0_BIND_CP_COUNT       54
+#define NVC0_BIND_CP_UBO         54
+#define NVC0_BIND_CP_COUNT       55
 
 /* bufctx for other operations */
 #define NVC0_BIND_2D            0
@@ -116,6 +117,9 @@
 /* 8 sets of 32-buts pairs MS offsets */
 #define NVC0_CB_AUX_MS_INFO         0x100 /* CP */
 #define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
+/* 7 sets of 32-bits integer addrs */
+#define NVC0_CB_AUX_UBO_INFO        0x140 /* CP */
+#define NVC0_CB_AUX_UBO_SIZE        (7 * 2 * 4)
 /* 8 sets of 32-bits integer pairs sample offsets */
 #define NVC0_CB_AUX_SAMPLE_INFO     0x180 /* FP */
 #define NVC0_CB_AUX_SAMPLE_SIZE     (8 * 4 * 2)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index afb909c..aba0eda 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -544,6 +544,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
          info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
          info->io.suInfoBase = NVC0_CB_AUX_SUF_INFO(0);
          info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
+         info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO;
       } else {
          info->io.resInfoCBSlot = 15;
          info->io.suInfoBase = 0; /* TODO */
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 557dbdc..2640e0f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -486,7 +486,9 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
                                const uint *grid_layout)
 {
    const struct nvc0_screen *screen = nvc0->screen;
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    const struct nvc0_program *cp = nvc0->compprog;
+   uint32_t address;
    unsigned i;
 
    nve4_cp_launch_desc_init_default(desc);
@@ -521,6 +523,29 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
    }
    nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
                               NVC0_CB_AUX_INFO(5), 1 << 10);
+
+   address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
+
+   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
+   PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO);
+   PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO);
+   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
+   PUSH_DATA (push, 7 * 2 * 4);
+   PUSH_DATA (push, 0x1);
+   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7 * 2);
+   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+
+   for (; i < NVC0_MAX_PIPE_CONSTBUFS; i++) {
+      struct nv04_resource *res = nv04_resource(nvc0->constbuf[5][i].u.buf);
+      if (res) {
+         PUSH_DATA (push, res->address + nvc0->constbuf[5][i].offset);
+         PUSH_DATAh(push, res->address + nvc0->constbuf[5][i].offset);
+         BCTX_REFN(nvc0->bufctx_cp, CP_UBO, res, RD);
+      } else {
+         PUSH_DATA (push, 0);
+         PUSH_DATA (push, 0);
+      }
+   }
 }
 
 static inline struct nve4_cp_launch_desc *
-- 
2.7.1



More information about the mesa-dev mailing list