[Mesa-dev] [PATCH v2 01/13] nvc0: bind driver cb for compute on c7[] for Kepler

Ilia Mirkin imirkin at alum.mit.edu
Fri Apr 1 05:21:04 UTC 2016


As I recall, we already upload ms offsets for fs. Perhaps reuse the same
spot for these CP ones? What are they used for, anyways? Can't think of
anything offhand, but perhaps opencl needs something funky?
On Mar 31, 2016 12:09 PM, "Samuel Pitoiset" <samuel.pitoiset at gmail.com>
wrote:

> Instead of using the screen->parm buffer object which will be removed,
> upload auxiliary constants to uniform_bo to be consistent regarding
> what we already do for Fermi.
>
> This breaks surfaces support (for compute only) but this will be
> properly re-introduced later for ARB_shader_image_load_store.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h |  6 ++++
>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 11 ++++---
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 40
> ++++++++++++++++---------
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.h | 25 ----------------
>  4 files changed, 37 insertions(+), 45 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 31e1272..34dff6a 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -113,6 +113,12 @@
>  /* 8 user clip planes, at 4 32-bits floats each */
>  #define NVC0_CB_AUX_UCP_INFO        0x100
>  #define NVC0_CB_AUX_UCP_SIZE        (PIPE_MAX_CLIP_PLANES * 4 * 4)
> +/* 8 sets of 32-bits pairs MS offsets */
> +#define NVC0_CB_AUX_MS_INFO         0x100 /* CP */
> +#define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
> +/* block/grid size, at 3 32-bits integers each and gridid */
> +#define NVC0_CB_AUX_GRID_INFO       0x140 /* CP */
> +#define NVC0_CB_AUX_GRID_SIZE       (7 * 4)
>  /* 8 sets of 32-bits integer pairs sample offsets */
>  #define NVC0_CB_AUX_SAMPLE_INFO     0x180 /* FP */
>  #define NVC0_CB_AUX_SAMPLE_SIZE     (8 * 4 * 2)
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index a3433f4..d76b48f 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -540,17 +540,16 @@ nvc0_program_translate(struct nvc0_program *prog,
> uint16_t chipset,
>
>     if (prog->type == PIPE_SHADER_COMPUTE) {
>        if (chipset >= NVISA_GK104_CHIPSET) {
> -         info->io.auxCBSlot = 0;
> -         info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
> -         info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
> -         info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
> +         info->io.auxCBSlot = 7;
> +         info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
> +         info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
>           info->io.bufInfoBase = 0; /* TODO */
>        } else {
>           info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
> -         info->io.suInfoBase = 0; /* TODO */
>        }
>        info->io.msInfoCBSlot = 0;
> -      info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
> +      info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
> +      info->io.suInfoBase = 0; /* TODO */
>     } else {
>        if (chipset >= NVISA_GK104_CHIPSET) {
>           info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> index b3d8414..cae4838 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> @@ -41,6 +41,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>     int i;
>     int ret;
>     uint32_t obj_class;
> +   uint64_t address;
>
>     switch (dev->chipset & ~0xf) {
>     case 0x100:
> @@ -65,7 +66,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>        return ret;
>     }
>
> -   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0,
> NVE4_CP_PARAM_SIZE, NULL,
> +   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12,
> NULL,
>                          &screen->parm);
>     if (ret)
>        return ret;
> @@ -128,15 +129,17 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>     }
>
>     BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
> -   PUSH_DATA (push, 0); /* does not interefere with 3D */
> +   PUSH_DATA (push, 7); /* does not interfere with 3D */
>
>     if (obj_class == NVF0_COMPUTE_CLASS)
>        IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
>
> +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
> +
>     /* MS sample coordinate offsets: these do not work with _ALT modes ! */
>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> -   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
> -   PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
> +   PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);
> +   PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);
>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>     PUSH_DATA (push, 64);
>     PUSH_DATA (push, 1);
> @@ -159,7 +162,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>     PUSH_DATA (push, 3); /* 7 */
>     PUSH_DATA (push, 1);
>
> -#ifdef DEBUG
> +#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>     PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
>     PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
> @@ -194,6 +197,9 @@ nve4_compute_validate_surfaces(struct nvc0_context
> *nvc0)
>     uint32_t mask;
>     unsigned i;
>     const unsigned t = 1;
> +   uint64_t address;
> +
> +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
>
>     mask = nvc0->surfaces_dirty[t];
>     while (mask) {
> @@ -205,8 +211,8 @@ nve4_compute_validate_surfaces(struct nvc0_context
> *nvc0)
>         * directly instead of via binding points, so we have to supply
> them.
>         */
>        BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> -      PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
> -      PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
> +      PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i));
> +      PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i));
>        BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>        PUSH_DATA (push, 64);
>        PUSH_DATA (push, 1);
> @@ -271,6 +277,7 @@ static void
>  nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
>  {
>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> +   struct nvc0_screen *screen = nvc0->screen;
>     uint64_t address;
>     const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
>     unsigned i, n;
> @@ -282,11 +289,11 @@ nve4_compute_set_tex_handles(struct nvc0_context
> *nvc0)
>     n = util_logbase2(dirty) + 1 - i;
>     assert(n);
>
> -   address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
> +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
>
>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> -   PUSH_DATAh(push, address);
> -   PUSH_DATA (push, address);
> +   PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));
> +   PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));
>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>     PUSH_DATA (push, n * 4);
>     PUSH_DATA (push, 0x1);
> @@ -334,6 +341,9 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
> const void *input,
>     struct nvc0_screen *screen = nvc0->screen;
>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>     struct nvc0_program *cp = nvc0->compprog;
> +   uint64_t address;
> +
> +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
>
>     if (cp->parm_size) {
>        BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> @@ -347,8 +357,8 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
> const void *input,
>        PUSH_DATAp(push, input, cp->parm_size / 4);
>     }
>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> -   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
> -   PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
> +   PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
> +   PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>     PUSH_DATA (push, 7 * 4);
>     PUSH_DATA (push, 0x1);
> @@ -408,7 +418,9 @@ nve4_compute_setup_launch_desc(struct nvc0_context
> *nvc0,
>        if (nvc0->constbuf[s][i].u.buf)
>           nve4_cp_launch_desc_set_ctx_cb(desc, i + 1,
> &nvc0->constbuf[s][i]);
>     }
> -   nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0,
> NVE4_CP_INPUT_SIZE);
> +   nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12);
> +   nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
> +                              NVC0_CB_AUX_INFO(5), 1 << 10);
>  }
>
>  static inline struct nve4_cp_launch_desc *
> @@ -495,7 +507,7 @@ nve4_compute_validate_textures(struct nvc0_context
> *nvc0)
>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>     const unsigned s = 5;
>     unsigned i;
> -   uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
> +   uint32_t commands[2][32];
>     unsigned n[2] = { 0, 0 };
>
>     for (i = 0; i < nvc0->num_textures[s]; ++i) {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
> index 84f8593..dcafbed 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
> @@ -4,31 +4,6 @@
>
>  #include "nvc0/nve4_compute.xml.h"
>
> -/* Input space is implemented as c0[], to which we bind the screen->parm
> bo.
> - */
> -#define NVE4_CP_INPUT_USER           0x0000
> -#define NVE4_CP_INPUT_USER_LIMIT     0x1000
> -#define NVE4_CP_INPUT_GRID_INFO(i)  (0x1000 + (i) * 4)
> -#define NVE4_CP_INPUT_NTID(i)       (0x1000 + (i) * 4)
> -#define NVE4_CP_INPUT_NCTAID(i)     (0x100c + (i) * 4)
> -#define NVE4_CP_INPUT_GRIDID         0x1018
> -#define NVE4_CP_INPUT_TEX(i)        (0x1040 + (i) * 4)
> -#define NVE4_CP_INPUT_TEX_STRIDE     4
> -#define NVE4_CP_INPUT_TEX_MAX        32
> -#define NVE4_CP_INPUT_MS_OFFSETS     0x10c0
> -#define NVE4_CP_INPUT_SUF_STRIDE     64
> -#define NVE4_CP_INPUT_SUF(i)        (0x1100 + (i) *
> NVE4_CP_INPUT_SUF_STRIDE)
> -#define NVE4_CP_INPUT_SUF_MAX        32
> -#define NVE4_CP_INPUT_TRAP_INFO_PTR  0x1900
> -#define NVE4_CP_INPUT_TEMP_PTR       0x1908
> -#define NVE4_CP_INPUT_MP_TEMP_SIZE   0x1910
> -#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914
> -#define NVE4_CP_INPUT_CSTACK_SIZE    0x1918
> -#define NVE4_CP_INPUT_SIZE           0x1a00
> -#define NVE4_CP_PARAM_TRAP_INFO      0x2000
> -#define NVE4_CP_PARAM_TRAP_INFO_SZ  (1 << 16)
> -#define NVE4_CP_PARAM_SIZE          (NVE4_CP_PARAM_TRAP_INFO + (1 << 16))
> -
>  struct nve4_cp_launch_desc
>  {
>     u32 unk0[8];
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160401/2219f9b0/attachment.html>


More information about the mesa-dev mailing list