[Mesa-dev] [PATCH v2 4/4] nouveau: Add support for SV_WORK_DIM

Ilia Mirkin imirkin at alum.mit.edu
Wed Jun 29 13:41:02 UTC 2016


On Wed, Jun 29, 2016 at 8:37 AM, Hans de Goede <hdegoede at redhat.com> wrote:
> Add support for SV_WORK_DIM for nvc0 and nve4.
>
> Signed-off-by: Hans de Goede <hdegoede at redhat.com>
> ---
> Changes in v2
> -Use new NVC0_CB_AUX_GRID_INFO(i) version
> Changes in v1 (first non RFC posting):
> -Adjust NVC0_CB_AUX_GRID_SIZE for the extra value in grid-info
> -Use NVC0_CB_AUX_GRID_SIZE instead of a hardcoded value when
>  uploading the grid info
> -Also implement SV_WORK_DIM for nvc0
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir.h      |  1 +
>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  1 +
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      |  2 ++
>  .../nouveau/codegen/nv50_ir_target_nvc0.cpp        |  1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    | 24 ++++++++++++++++------
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  4 ++--
>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  2 +-
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c    |  7 ++++---
>  8 files changed, 30 insertions(+), 12 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index 94e54bb..41804b6 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -461,6 +461,7 @@ enum SVSemantic
>     SV_BASEVERTEX,
>     SV_BASEINSTANCE,
>     SV_DRAWID,
> +   SV_WORK_DIM,
>     SV_UNDEFINED,
>     SV_LAST
>  };
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index ed3249e..7695511 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -408,6 +408,7 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
>     case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
>     case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
>     case TGSI_SEMANTIC_DRAWID:     return nv50_ir::SV_DRAWID;
> +   case TGSI_SEMANTIC_WORK_DIM:   return nv50_ir::SV_WORK_DIM;
>     default:
>        assert(0);
>        return nv50_ir::SV_CLOCK;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 67bd73b..e9c3f27 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -2372,6 +2372,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
>           i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1));
>           return true;
>        }
> +      // Fallthrough
> +   case SV_WORK_DIM:
>        addr += prog->driver->prop.cp.gridInfoBase;
>        bld.mkLoad(TYPE_U32, i->getDef(0),
>                   bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> index 932ec39..04ac288 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> @@ -295,6 +295,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
>     case SV_NTID:           return kepler ? (0x00 + idx * 4) : ~0;
>     case SV_NCTAID:         return kepler ? (0x0c + idx * 4) : ~0;
>     case SV_GRIDID:         return kepler ? 0x18 : ~0;
> +   case SV_WORK_DIM:       return 0x1c;
>     case SV_SAMPLE_INDEX:   return 0;
>     case SV_SAMPLE_POS:     return 0;
>     case SV_SAMPLE_MASK:    return 0;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> index 59bbe1e..887fdf2 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> @@ -356,7 +356,8 @@ nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
>  }
>
>  static void
> -nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
> +nvc0_compute_upload_input(struct nvc0_context *nvc0,
> +                          const struct pipe_grid_info *info)
>  {
>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>     struct nvc0_screen *screen = nvc0->screen;
> @@ -375,13 +376,24 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
>        /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
>        BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
>        PUSH_DATA (push, 0);
> -      PUSH_DATAp(push, input, cp->parm_size / 4);
> +      PUSH_DATAp(push, info->input, cp->parm_size / 4);
>
>        nvc0_compute_invalidate_constbufs(nvc0);
> -
> -      BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
> -      PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
>     }
> +
> +   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
> +   PUSH_DATA (push, 2048);
> +   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
> +   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
> +   BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
> +   PUSH_DATA (push, (15 << 8) | 1);

The above 2 lines shouldn't be necessary.

> +   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
> +   /* (7) as we only upload work_dim on nvc0, the rest uses special regs */
> +   PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));
> +   PUSH_DATA (push, info->work_dim);
> +
> +   BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
> +   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
>  }
>
>  void
> @@ -398,7 +410,7 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
>        return;
>     }
>
> -   nvc0_compute_upload_input(nvc0, info->input);
> +   nvc0_compute_upload_input(nvc0, info);
>
>     BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
>     PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 4868a64..912278d 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -114,9 +114,9 @@
>  /* 8 sets of 32-bits coordinate offsets */
>  #define NVC0_CB_AUX_MS_INFO         0x0a0 /* CP */
>  #define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
> -/* block/grid size, at 3 32-bits integers each and gridid */
> +/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
>  #define NVC0_CB_AUX_GRID_INFO(i)    0x0e0 + (i) * 4 /* CP */
> -#define NVC0_CB_AUX_GRID_SIZE       (7 * 4)
> +#define NVC0_CB_AUX_GRID_SIZE       (8 * 4)
>  /* 8 user clip planes, at 4 32-bits floats each */
>  #define NVC0_CB_AUX_UCP_INFO        0x100
>  #define NVC0_CB_AUX_UCP_SIZE        (PIPE_MAX_CLIP_PLANES * 4 * 4)
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index f151d51..d49614f 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -562,13 +562,13 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
>        if (chipset >= NVISA_GK104_CHIPSET) {
>           info->io.auxCBSlot = 7;
>           info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
> -         info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO(0);
>           info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0);
>        }
>        info->io.msInfoCBSlot = 0;
>        info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
>        info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>        info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
> +      info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO(0);
>     } else {
>        if (chipset >= NVISA_GK104_CHIPSET) {
>           info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> index 5fddd92..d1cf59a 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> @@ -434,7 +434,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
>     PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO(0));
>     PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO(0));
>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> -   PUSH_DATA (push, 7 * 4);
> +   PUSH_DATA (push, NVC0_CB_AUX_GRID_SIZE);

I would rather this stay an explicit value, i.e. 8 * 4. You're welcome
to throw in a STATIC_ASSERT to make sure the two are identical.
However the value here has to map to the number of bytes fed in on the
pushbuf, which is easiest to verify when it's in non-symbolic form.

With these two minor items corrected, this series is

Reviewed-by: Ilia Mirkin <imirkin at alum.mit.edu>

>     PUSH_DATA (push, 0x1);
>
>     if (unlikely(info->indirect)) {
> @@ -444,18 +444,19 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
>        nouveau_pushbuf_space(push, 16, 0, 1);
>        PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
>
> -      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
> +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>        PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>        PUSH_DATAp(push, info->block, 3);
>        nouveau_pushbuf_data(push, res->bo, offset,
>                             NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
>     } else {
> -      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
> +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>        PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>        PUSH_DATAp(push, info->block, 3);
>        PUSH_DATAp(push, info->grid, 3);
>     }
>     PUSH_DATA (push, 0);
> +   PUSH_DATA (push, info->work_dim);
>
>     BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
>     PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
> --
> 2.7.4
>


More information about the mesa-dev mailing list