[Mesa-dev] [PATCH v2 4/4] nouveau: Add support for SV_WORK_DIM

Samuel Pitoiset samuel.pitoiset at gmail.com
Sat Jul 2 10:24:44 UTC 2016



On 07/02/2016 12:23 PM, Hans de Goede wrote:
> Hi,
>
> On 29-06-16 15:41, Ilia Mirkin wrote:
>> On Wed, Jun 29, 2016 at 8:37 AM, Hans de Goede <hdegoede at redhat.com>
>> wrote:
>>> Add support for SV_WORK_DIM for nvc0 and nve4.
>>>
>>> Signed-off-by: Hans de Goede <hdegoede at redhat.com>
>>> ---
>>> Changes in v2
>>> -Use new NVC0_CB_AUX_GRID_INFO(i) version
>>> Changes in v1 (first non RFC posting):
>>> -Adjust NVC0_CB_AUX_GRID_SIZE for the extra value in grid-info
>>> -Use NVC0_CB_AUX_GRID_SIZE instead of a hardcoded value when
>>>  uploading the grid info
>>> -Also implement SV_WORK_DIM for nvc0
>>> ---
>>>  src/gallium/drivers/nouveau/codegen/nv50_ir.h      |  1 +
>>>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  1 +
>>>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      |  2 ++
>>>  .../nouveau/codegen/nv50_ir_target_nvc0.cpp        |  1 +
>>>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    | 24
>>> ++++++++++++++++------
>>>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  4 ++--
>>>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  2 +-
>>>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c    |  7 ++++---
>>>  8 files changed, 30 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>> index 94e54bb..41804b6 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>> @@ -461,6 +461,7 @@ enum SVSemantic
>>>     SV_BASEVERTEX,
>>>     SV_BASEINSTANCE,
>>>     SV_DRAWID,
>>> +   SV_WORK_DIM,
>>>     SV_UNDEFINED,
>>>     SV_LAST
>>>  };
>>> diff --git
>>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>>> index ed3249e..7695511 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>>> @@ -408,6 +408,7 @@ static nv50_ir::SVSemantic translateSysVal(uint
>>> sysval)
>>>     case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
>>>     case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
>>>     case TGSI_SEMANTIC_DRAWID:     return nv50_ir::SV_DRAWID;
>>> +   case TGSI_SEMANTIC_WORK_DIM:   return nv50_ir::SV_WORK_DIM;
>>>     default:
>>>        assert(0);
>>>        return nv50_ir::SV_CLOCK;
>>> diff --git
>>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>>> index 67bd73b..e9c3f27 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>>> @@ -2372,6 +2372,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
>>>           i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1));
>>>           return true;
>>>        }
>>> +      // Fallthrough
>>> +   case SV_WORK_DIM:
>>>        addr += prog->driver->prop.cp.gridInfoBase;
>>>        bld.mkLoad(TYPE_U32, i->getDef(0),
>>>                   bld.mkSymbol(FILE_MEMORY_CONST,
>>> prog->driver->io.auxCBSlot,
>>> diff --git
>>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> index 932ec39..04ac288 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> @@ -295,6 +295,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile,
>>> const Symbol *sym) const
>>>     case SV_NTID:           return kepler ? (0x00 + idx * 4) : ~0;
>>>     case SV_NCTAID:         return kepler ? (0x0c + idx * 4) : ~0;
>>>     case SV_GRIDID:         return kepler ? 0x18 : ~0;
>>> +   case SV_WORK_DIM:       return 0x1c;
>>>     case SV_SAMPLE_INDEX:   return 0;
>>>     case SV_SAMPLE_POS:     return 0;
>>>     case SV_SAMPLE_MASK:    return 0;
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>>> index 59bbe1e..887fdf2 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>>> @@ -356,7 +356,8 @@ nvc0_state_validate_cp(struct nvc0_context *nvc0,
>>> uint32_t mask)
>>>  }
>>>
>>>  static void
>>> -nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
>>> +nvc0_compute_upload_input(struct nvc0_context *nvc0,
>>> +                          const struct pipe_grid_info *info)
>>>  {
>>>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>>>     struct nvc0_screen *screen = nvc0->screen;
>>> @@ -375,13 +376,24 @@ nvc0_compute_upload_input(struct nvc0_context
>>> *nvc0, const void *input)
>>>        /* NOTE: size is limited to 4 KiB, which is <
>>> NV04_PFIFO_MAX_PACKET_LEN */
>>>        BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
>>>        PUSH_DATA (push, 0);
>>> -      PUSH_DATAp(push, input, cp->parm_size / 4);
>>> +      PUSH_DATAp(push, info->input, cp->parm_size / 4);
>>>
>>>        nvc0_compute_invalidate_constbufs(nvc0);
>>> -
>>> -      BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
>>> -      PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
>>>     }
>>> +
>>> +   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
>>> +   PUSH_DATA (push, 2048);
>>> +   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
>>> +   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
>>> +   BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
>>> +   PUSH_DATA (push, (15 << 8) | 1);
>>
>> The above 2 lines shouldn't be necessary.
>>
>>> +   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
>>> +   /* (7) as we only upload work_dim on nvc0, the rest uses special
>>> regs */
>>> +   PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));
>>> +   PUSH_DATA (push, info->work_dim);
>>> +
>>> +   BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
>>> +   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
>>>  }
>>>
>>>  void
>>> @@ -398,7 +410,7 @@ nvc0_launch_grid(struct pipe_context *pipe, const
>>> struct pipe_grid_info *info)
>>>        return;
>>>     }
>>>
>>> -   nvc0_compute_upload_input(nvc0, info->input);
>>> +   nvc0_compute_upload_input(nvc0, info);
>>>
>>>     BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
>>>     PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>>> index 4868a64..912278d 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>>> @@ -114,9 +114,9 @@
>>>  /* 8 sets of 32-bits coordinate offsets */
>>>  #define NVC0_CB_AUX_MS_INFO         0x0a0 /* CP */
>>>  #define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
>>> -/* block/grid size, at 3 32-bits integers each and gridid */
>>> +/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
>>>  #define NVC0_CB_AUX_GRID_INFO(i)    0x0e0 + (i) * 4 /* CP */
>>> -#define NVC0_CB_AUX_GRID_SIZE       (7 * 4)
>>> +#define NVC0_CB_AUX_GRID_SIZE       (8 * 4)
>>>  /* 8 user clip planes, at 4 32-bits floats each */
>>>  #define NVC0_CB_AUX_UCP_INFO        0x100
>>>  #define NVC0_CB_AUX_UCP_SIZE        (PIPE_MAX_CLIP_PLANES * 4 * 4)
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>>> index f151d51..d49614f 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>>> @@ -562,13 +562,13 @@ nvc0_program_translate(struct nvc0_program
>>> *prog, uint16_t chipset,
>>>        if (chipset >= NVISA_GK104_CHIPSET) {
>>>           info->io.auxCBSlot = 7;
>>>           info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>>> -         info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO(0);
>>>           info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0);
>>>        }
>>>        info->io.msInfoCBSlot = 0;
>>>        info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
>>>        info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>>>        info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
>>> +      info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO(0);
>>>     } else {
>>>        if (chipset >= NVISA_GK104_CHIPSET) {
>>>           info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>>> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>>> index 5fddd92..d1cf59a 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>>> @@ -434,7 +434,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
>>>     PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO(0));
>>>     PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO(0));
>>>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>>> -   PUSH_DATA (push, 7 * 4);
>>> +   PUSH_DATA (push, NVC0_CB_AUX_GRID_SIZE);
>>
>> I would rather this stay an explicit value, i.e. 8 * 4. You're welcome
>> to throw in a STATIC_ASSERT to make sure the two are identical.
>> However the value here has to map to the number of bytes fed in on the
>> pushbuf, which is easiest to verify when it's in non-symbolic form.
>>
>> With these two minor items corrected, this series is
>>
>> Reviewed-by: Ilia Mirkin <imirkin at alum.mit.edu>
>
> Thanks, pushed with the 2 items corrected and I've also pushed the
> compiler warnings series.

Nice, thanks!

>
> Regards,
>
> Hans
>
>
>
>>
>>>     PUSH_DATA (push, 0x1);
>>>
>>>     if (unlikely(info->indirect)) {
>>> @@ -444,18 +444,19 @@ nve4_compute_upload_input(struct nvc0_context
>>> *nvc0,
>>>        nouveau_pushbuf_space(push, 16, 0, 1);
>>>        PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
>>>
>>> -      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
>>> +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>>>        PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>>>        PUSH_DATAp(push, info->block, 3);
>>>        nouveau_pushbuf_data(push, res->bo, offset,
>>>                             NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
>>>     } else {
>>> -      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
>>> +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>>>        PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>>>        PUSH_DATAp(push, info->block, 3);
>>>        PUSH_DATAp(push, info->grid, 3);
>>>     }
>>>     PUSH_DATA (push, 0);
>>> +   PUSH_DATA (push, info->work_dim);
>>>
>>>     BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
>>>     PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
>>> --
>>> 2.7.4
>>>


More information about the mesa-dev mailing list