[Mesa-dev] [RFC 3/3] nouveau: codegen: Add support for SV_WORK_DIM

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Apr 27 15:41:07 UTC 2016


Yeah, please do it for Fermi as well.

On 04/27/2016 04:43 PM, Hans de Goede wrote:
> Add support for SV_WORK_DIM.
>
> Note this is only implemented for nve4 for now, hence this patch
> being RFC.
>
> Signed-off-by: Hans de Goede <hdegoede at redhat.com>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir.h                 | 1 +
>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp     | 1 +
>  src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 ++
>  src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp   | 1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h               | 2 +-
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c               | 7 ++++---
>  6 files changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index 94e54bb..41804b6 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -461,6 +461,7 @@ enum SVSemantic
>     SV_BASEVERTEX,
>     SV_BASEINSTANCE,
>     SV_DRAWID,
> +   SV_WORK_DIM,
>     SV_UNDEFINED,
>     SV_LAST
>  };
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index 3708f37..f75f480 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -408,6 +408,7 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
>     case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
>     case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
>     case TGSI_SEMANTIC_DRAWID:     return nv50_ir::SV_DRAWID;
> +   case TGSI_SEMANTIC_WORK_DIM:   return nv50_ir::SV_WORK_DIM;
>     default:
>        assert(0);
>        return nv50_ir::SV_CLOCK;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 3bce962..1785623 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -2178,6 +2178,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
>           i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1));
>           return true;
>        }
> +      // Fallthrough
> +   case SV_WORK_DIM:
>        addr += prog->driver->prop.cp.gridInfoBase;
>        bld.mkLoad(TYPE_U32, i->getDef(0),
>                   bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> index 9e1e7bf..80cb9fd 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> @@ -293,6 +293,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
>     case SV_NTID:           return kepler ? (0x00 + idx * 4) : ~0;
>     case SV_NCTAID:         return kepler ? (0x0c + idx * 4) : ~0;
>     case SV_GRIDID:         return kepler ? 0x18 : ~0;
> +   case SV_WORK_DIM:       return 0x1c;
>     case SV_SAMPLE_INDEX:   return 0;
>     case SV_SAMPLE_POS:     return 0;
>     case SV_SAMPLE_MASK:    return 0;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 7fcbf4a..b6c52d5 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -113,7 +113,7 @@
>  /* 8 sets of 32-bits coordinate offsets */
>  #define NVC0_CB_AUX_MS_INFO         0x0a0 /* CP */
>  #define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
> -/* block/grid size, at 3 32-bits integers each and gridid */
> +/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
>  #define NVC0_CB_AUX_GRID_INFO       0x0e0 /* CP */
>  #define NVC0_CB_AUX_GRID_SIZE       (7 * 4)

You are lucky because this fits perfectly. :-)

>  /* 8 user clip planes, at 4 32-bits floats each */
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> index 1fe6026..1ecf65c 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> @@ -496,7 +496,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
>     PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
>     PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> -   PUSH_DATA (push, 7 * 4);
> +   PUSH_DATA (push, 8 * 4);
>     PUSH_DATA (push, 0x1);
>
>     if (unlikely(info->indirect)) {
> @@ -506,18 +506,19 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
>        nouveau_pushbuf_space(push, 16, 0, 1);
>        PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
>
> -      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
> +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>        PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>        PUSH_DATAp(push, info->block, 3);
>        nouveau_pushbuf_data(push, res->bo, offset,
>                             NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
>     } else {
> -      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
> +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>        PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>        PUSH_DATAp(push, info->block, 3);
>        PUSH_DATAp(push, info->grid, 3);
>     }
>     PUSH_DATA (push, 0);
> +   PUSH_DATA (push, info->work_dim);
>
>     BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
>     PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
>

-- 
-Samuel


More information about the mesa-dev mailing list