[Mesa-dev] [PATCH v2 01/13] nvc0: bind driver cb for compute on c7[] for Kepler

Samuel Pitoiset samuel.pitoiset at gmail.com
Fri Apr 1 14:20:09 UTC 2016



On 04/01/2016 07:21 AM, Ilia Mirkin wrote:
> As I recall, we already upload ms offsets for fs. Perhaps reuse the same
> spot for these CP ones? What are they used for, anyways? Can't think of
> anything offhand, but perhaps opencl needs something funky?

Where do we upload ms offset for fs ? Because msInfoBase is set to 0 for 3d.

For compute, they are used in NVC0LoweringPass::adjustCoordinatesMS() 
which is called from processSurfaceCoordsNVE4(), so I think they are 
useful for compute. :-)

>
> On Mar 31, 2016 12:09 PM, "Samuel Pitoiset" <samuel.pitoiset at gmail.com
> <mailto:samuel.pitoiset at gmail.com>> wrote:
>
>     Instead of using the screen->parm buffer object which will be removed,
>     upload auxiliary constants to uniform_bo to be consistent regarding
>     what we already do for Fermi.
>
>     This breaks surfaces support (for compute only) but this will be
>     properly re-introduced later for ARB_shader_image_load_store.
>
>     Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com
>     <mailto:samuel.pitoiset at gmail.com>>
>     ---
>       src/gallium/drivers/nouveau/nvc0/nvc0_context.h |  6 ++++
>       src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 11 ++++---
>       src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 40
>     ++++++++++++++++---------
>       src/gallium/drivers/nouveau/nvc0/nve4_compute.h | 25 ----------------
>       4 files changed, 37 insertions(+), 45 deletions(-)
>
>     diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>     b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>     index 31e1272..34dff6a 100644
>     --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>     +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>     @@ -113,6 +113,12 @@
>       /* 8 user clip planes, at 4 32-bits floats each */
>       #define NVC0_CB_AUX_UCP_INFO        0x100
>       #define NVC0_CB_AUX_UCP_SIZE        (PIPE_MAX_CLIP_PLANES * 4 * 4)
>     +/* 8 sets of 32-bits pairs MS offsets */
>     +#define NVC0_CB_AUX_MS_INFO         0x100 /* CP */
>     +#define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
>     +/* block/grid size, at 3 32-bits integers each and gridid */
>     +#define NVC0_CB_AUX_GRID_INFO       0x140 /* CP */
>     +#define NVC0_CB_AUX_GRID_SIZE       (7 * 4)
>       /* 8 sets of 32-bits integer pairs sample offsets */
>       #define NVC0_CB_AUX_SAMPLE_INFO     0x180 /* FP */
>       #define NVC0_CB_AUX_SAMPLE_SIZE     (8 * 4 * 2)
>     diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>     b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>     index a3433f4..d76b48f 100644
>     --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>     +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>     @@ -540,17 +540,16 @@ nvc0_program_translate(struct nvc0_program
>     *prog, uint16_t chipset,
>
>          if (prog->type == PIPE_SHADER_COMPUTE) {
>             if (chipset >= NVISA_GK104_CHIPSET) {
>     -         info->io.auxCBSlot = 0;
>     -         info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
>     -         info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
>     -         info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
>     +         info->io.auxCBSlot = 7;
>     +         info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>     +         info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
>                info->io.bufInfoBase = 0; /* TODO */
>             } else {
>                info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>     -         info->io.suInfoBase = 0; /* TODO */
>             }
>             info->io.msInfoCBSlot = 0;
>     -      info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
>     +      info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
>     +      info->io.suInfoBase = 0; /* TODO */
>          } else {
>             if (chipset >= NVISA_GK104_CHIPSET) {
>                info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>     diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>     b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>     index b3d8414..cae4838 100644
>     --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>     +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>     @@ -41,6 +41,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>          int i;
>          int ret;
>          uint32_t obj_class;
>     +   uint64_t address;
>
>          switch (dev->chipset & ~0xf) {
>          case 0x100:
>     @@ -65,7 +66,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>             return ret;
>          }
>
>     -   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0,
>     NVE4_CP_PARAM_SIZE, NULL,
>     +   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 <<
>     12, NULL,
>                               &screen->parm);
>          if (ret)
>             return ret;
>     @@ -128,15 +129,17 @@ nve4_screen_compute_setup(struct nvc0_screen
>     *screen,
>          }
>
>          BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
>     -   PUSH_DATA (push, 0); /* does not interefere with 3D */
>     +   PUSH_DATA (push, 7); /* does not interfere with 3D */
>
>          if (obj_class == NVF0_COMPUTE_CLASS)
>             IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
>
>     +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
>     +
>          /* MS sample coordinate offsets: these do not work with _ALT
>     modes ! */
>          BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>     -   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
>     -   PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
>     +   PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);
>     +   PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);
>          BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>          PUSH_DATA (push, 64);
>          PUSH_DATA (push, 1);
>     @@ -159,7 +162,7 @@ nve4_screen_compute_setup(struct nvc0_screen
>     *screen,
>          PUSH_DATA (push, 3); /* 7 */
>          PUSH_DATA (push, 1);
>
>     -#ifdef DEBUG
>     +#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
>          BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>          PUSH_DATAh(push, screen->parm->offset +
>     NVE4_CP_INPUT_TRAP_INFO_PTR);
>          PUSH_DATA (push, screen->parm->offset +
>     NVE4_CP_INPUT_TRAP_INFO_PTR);
>     @@ -194,6 +197,9 @@ nve4_compute_validate_surfaces(struct
>     nvc0_context *nvc0)
>          uint32_t mask;
>          unsigned i;
>          const unsigned t = 1;
>     +   uint64_t address;
>     +
>     +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
>
>          mask = nvc0->surfaces_dirty[t];
>          while (mask) {
>     @@ -205,8 +211,8 @@ nve4_compute_validate_surfaces(struct
>     nvc0_context *nvc0)
>              * directly instead of via binding points, so we have to
>     supply them.
>              */
>             BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>     -      PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
>     -      PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
>     +      PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i));
>     +      PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i));
>             BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>             PUSH_DATA (push, 64);
>             PUSH_DATA (push, 1);
>     @@ -271,6 +277,7 @@ static void
>       nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
>       {
>          struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>     +   struct nvc0_screen *screen = nvc0->screen;
>          uint64_t address;
>          const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
>          unsigned i, n;
>     @@ -282,11 +289,11 @@ nve4_compute_set_tex_handles(struct
>     nvc0_context *nvc0)
>          n = util_logbase2(dirty) + 1 - i;
>          assert(n);
>
>     -   address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
>     +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
>
>          BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>     -   PUSH_DATAh(push, address);
>     -   PUSH_DATA (push, address);
>     +   PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));
>     +   PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));
>          BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>          PUSH_DATA (push, n * 4);
>          PUSH_DATA (push, 0x1);
>     @@ -334,6 +341,9 @@ nve4_compute_upload_input(struct nvc0_context
>     *nvc0, const void *input,
>          struct nvc0_screen *screen = nvc0->screen;
>          struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>          struct nvc0_program *cp = nvc0->compprog;
>     +   uint64_t address;
>     +
>     +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
>
>          if (cp->parm_size) {
>             BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>     @@ -347,8 +357,8 @@ nve4_compute_upload_input(struct nvc0_context
>     *nvc0, const void *input,
>             PUSH_DATAp(push, input, cp->parm_size / 4);
>          }
>          BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>     -   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
>     -   PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
>     +   PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
>     +   PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
>          BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>          PUSH_DATA (push, 7 * 4);
>          PUSH_DATA (push, 0x1);
>     @@ -408,7 +418,9 @@ nve4_compute_setup_launch_desc(struct
>     nvc0_context *nvc0,
>             if (nvc0->constbuf[s][i].u.buf)
>                nve4_cp_launch_desc_set_ctx_cb(desc, i + 1,
>     &nvc0->constbuf[s][i]);
>          }
>     -   nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0,
>     NVE4_CP_INPUT_SIZE);
>     +   nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12);
>     +   nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
>     +                              NVC0_CB_AUX_INFO(5), 1 << 10);
>       }
>
>       static inline struct nve4_cp_launch_desc *
>     @@ -495,7 +507,7 @@ nve4_compute_validate_textures(struct
>     nvc0_context *nvc0)
>          struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>          const unsigned s = 5;
>          unsigned i;
>     -   uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
>     +   uint32_t commands[2][32];
>          unsigned n[2] = { 0, 0 };
>
>          for (i = 0; i < nvc0->num_textures[s]; ++i) {
>     diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
>     b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
>     index 84f8593..dcafbed 100644
>     --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
>     +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
>     @@ -4,31 +4,6 @@
>
>       #include "nvc0/nve4_compute.xml.h"
>
>     -/* Input space is implemented as c0[], to which we bind the
>     screen->parm bo.
>     - */
>     -#define NVE4_CP_INPUT_USER           0x0000
>     -#define NVE4_CP_INPUT_USER_LIMIT     0x1000
>     -#define NVE4_CP_INPUT_GRID_INFO(i)  (0x1000 + (i) * 4)
>     -#define NVE4_CP_INPUT_NTID(i)       (0x1000 + (i) * 4)
>     -#define NVE4_CP_INPUT_NCTAID(i)     (0x100c + (i) * 4)
>     -#define NVE4_CP_INPUT_GRIDID         0x1018
>     -#define NVE4_CP_INPUT_TEX(i)        (0x1040 + (i) * 4)
>     -#define NVE4_CP_INPUT_TEX_STRIDE     4
>     -#define NVE4_CP_INPUT_TEX_MAX        32
>     -#define NVE4_CP_INPUT_MS_OFFSETS     0x10c0
>     -#define NVE4_CP_INPUT_SUF_STRIDE     64
>     -#define NVE4_CP_INPUT_SUF(i)        (0x1100 + (i) *
>     NVE4_CP_INPUT_SUF_STRIDE)
>     -#define NVE4_CP_INPUT_SUF_MAX        32
>     -#define NVE4_CP_INPUT_TRAP_INFO_PTR  0x1900
>     -#define NVE4_CP_INPUT_TEMP_PTR       0x1908
>     -#define NVE4_CP_INPUT_MP_TEMP_SIZE   0x1910
>     -#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914
>     -#define NVE4_CP_INPUT_CSTACK_SIZE    0x1918
>     -#define NVE4_CP_INPUT_SIZE           0x1a00
>     -#define NVE4_CP_PARAM_TRAP_INFO      0x2000
>     -#define NVE4_CP_PARAM_TRAP_INFO_SZ  (1 << 16)
>     -#define NVE4_CP_PARAM_SIZE          (NVE4_CP_PARAM_TRAP_INFO + (1
>     << 16))
>     -
>       struct nve4_cp_launch_desc
>       {
>          u32 unk0[8];
>     --
>     2.7.4
>
>     _______________________________________________
>     mesa-dev mailing list
>     mesa-dev at lists.freedesktop.org <mailto:mesa-dev at lists.freedesktop.org>
>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

-- 
-Samuel


More information about the mesa-dev mailing list