[Mesa-dev] [PATCH 3/3] nouveau: Add support for SV_WORK_DIM

Samuel Pitoiset samuel.pitoiset at gmail.com
Thu Apr 28 14:14:09 UTC 2016



On 04/28/2016 04:12 PM, Samuel Pitoiset wrote:
>
>
> On 04/28/2016 04:05 PM, Hans de Goede wrote:
>> Add support for SV_WORK_DIM for nvc0 and nve4.
>>
>> Signed-off-by: Hans de Goede <hdegoede at redhat.com>
>> ---
>> Changes in v1 (first non RFC posting):
>> -Adjust NVC0_CB_AUX_GRID_SIZE for the extra value in grid-info
>> -Use NVC0_CB_AUX_GRID_SIZE instead of a hardcoded value when
>>  uploading the grid info
>> -Also implement SV_WORK_DIM for nvc0
>> ---
>>  src/gallium/drivers/nouveau/codegen/nv50_ir.h      |  1 +
>>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  1 +
>>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      |  2 ++
>>  .../nouveau/codegen/nv50_ir_target_nvc0.cpp        |  1 +
>>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    | 24
>> ++++++++++++++++------
>>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  4 ++--
>>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  2 +-
>>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c    |  7 ++++---
>>  8 files changed, 30 insertions(+), 12 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> index 94e54bb..41804b6 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> @@ -461,6 +461,7 @@ enum SVSemantic
>>     SV_BASEVERTEX,
>>     SV_BASEINSTANCE,
>>     SV_DRAWID,
>> +   SV_WORK_DIM,
>>     SV_UNDEFINED,
>>     SV_LAST
>>  };
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> index 3708f37..f75f480 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> @@ -408,6 +408,7 @@ static nv50_ir::SVSemantic translateSysVal(uint
>> sysval)
>>     case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
>>     case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
>>     case TGSI_SEMANTIC_DRAWID:     return nv50_ir::SV_DRAWID;
>> +   case TGSI_SEMANTIC_WORK_DIM:   return nv50_ir::SV_WORK_DIM;
>>     default:
>>        assert(0);
>>        return nv50_ir::SV_CLOCK;
>> diff --git
>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> index 3bce962..1785623 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> @@ -2178,6 +2178,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
>>           i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1));
>>           return true;
>>        }
>> +      // Fallthrough
>> +   case SV_WORK_DIM:
>>        addr += prog->driver->prop.cp.gridInfoBase;
>>        bld.mkLoad(TYPE_U32, i->getDef(0),
>>                   bld.mkSymbol(FILE_MEMORY_CONST,
>> prog->driver->io.auxCBSlot,
>> diff --git
>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> index 9e1e7bf..80cb9fd 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> @@ -293,6 +293,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile,
>> const Symbol *sym) const
>>     case SV_NTID:           return kepler ? (0x00 + idx * 4) : ~0;
>>     case SV_NCTAID:         return kepler ? (0x0c + idx * 4) : ~0;
>>     case SV_GRIDID:         return kepler ? 0x18 : ~0;
>> +   case SV_WORK_DIM:       return 0x1c;
>>     case SV_SAMPLE_INDEX:   return 0;
>>     case SV_SAMPLE_POS:     return 0;
>>     case SV_SAMPLE_MASK:    return 0;
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>> index bbc8edb..9f85ead 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>> @@ -284,7 +284,8 @@ nvc0_state_validate_cp(struct nvc0_context *nvc0,
>> uint32_t mask)
>>  }
>>
>>  static void
>> -nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
>> +nvc0_compute_upload_input(struct nvc0_context *nvc0,
>> +                          const struct pipe_grid_info *info)
>>  {
>>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>>     struct nvc0_screen *screen = nvc0->screen;
>> @@ -303,11 +304,22 @@ nvc0_compute_upload_input(struct nvc0_context
>> *nvc0, const void *input)
>>        /* NOTE: size is limited to 4 KiB, which is <
>> NV04_PFIFO_MAX_PACKET_LEN */
>>        BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
>>        PUSH_DATA (push, 0);
>> -      PUSH_DATAp(push, input, cp->parm_size / 4);
>> -
>> -      BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
>> -      PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
>> +      PUSH_DATAp(push, info->input, cp->parm_size / 4);
>>     }
>> +
>> +   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
>> +   PUSH_DATA (push, 2048);
>> +   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
>> +   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
>> +   BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
>> +   PUSH_DATA (push, (15 << 8) | 1);
>> +   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
>> +   /* + 0x1c as we only upload work_dim on nvc0, the rest uses
>> special regs */
>> +   PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO + 0x1c);
>
> Well, I would prefer to see NVC0_CB_AUX_GRID_INFO(3) here to avoid this
> magic offset. You also need to change:
>
> #define NVC0_CB_AUX_GRID_INFO       0x0e0 /* CP */
>
> into
>
> #define NVC0_CB_AUX_GRID_INFO(i)    0x0e0 + (i * 0x4) /* CP */
>
> And in nvc0_program.c as well.

Err, should be 7 actually, but you get the idea. :-)

>
>> +   PUSH_DATA (push, info->work_dim);
>> +
>> +   BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
>> +   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
>>  }
>>
>>  void
>> @@ -325,7 +337,7 @@ nvc0_launch_grid(struct pipe_context *pipe, const
>> struct pipe_grid_info *info)
>>        return;
>>     }
>>
>> -   nvc0_compute_upload_input(nvc0, info->input);
>> +   nvc0_compute_upload_input(nvc0, info);
>>
>>     BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
>>     PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> index 7fcbf4a..7d25c46 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> @@ -113,9 +113,9 @@
>>  /* 8 sets of 32-bits coordinate offsets */
>>  #define NVC0_CB_AUX_MS_INFO         0x0a0 /* CP */
>>  #define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
>> -/* block/grid size, at 3 32-bits integers each and gridid */
>> +/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
>>  #define NVC0_CB_AUX_GRID_INFO       0x0e0 /* CP */
>> -#define NVC0_CB_AUX_GRID_SIZE       (7 * 4)
>> +#define NVC0_CB_AUX_GRID_SIZE       (8 * 4)
>>  /* 8 user clip planes, at 4 32-bits floats each */
>>  #define NVC0_CB_AUX_UCP_INFO        0x100
>>  #define NVC0_CB_AUX_UCP_SIZE        (PIPE_MAX_CLIP_PLANES * 4 * 4)
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> index ca6349c..126a038 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> @@ -549,7 +549,6 @@ nvc0_program_translate(struct nvc0_program *prog,
>> uint16_t chipset,
>>        if (chipset >= NVISA_GK104_CHIPSET) {
>>           info->io.auxCBSlot = 7;
>>           info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>> -         info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
>>           info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0);
>>           info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
>>        } else {
>> @@ -558,6 +557,7 @@ nvc0_program_translate(struct nvc0_program *prog,
>> uint16_t chipset,
>>        info->io.msInfoCBSlot = 0;
>>        info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
>>        info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>> +      info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
>>     } else {
>>        if (chipset >= NVISA_GK104_CHIPSET) {
>>           info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> index 1fe6026..b6496d3 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> @@ -496,7 +496,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
>>     PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
>>     PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
>>     BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>> -   PUSH_DATA (push, 7 * 4);
>> +   PUSH_DATA (push, NVC0_CB_AUX_GRID_SIZE);
>>     PUSH_DATA (push, 0x1);
>>
>>     if (unlikely(info->indirect)) {
>> @@ -506,18 +506,19 @@ nve4_compute_upload_input(struct nvc0_context
>> *nvc0,
>>        nouveau_pushbuf_space(push, 16, 0, 1);
>>        PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
>>
>> -      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
>> +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>>        PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>>        PUSH_DATAp(push, info->block, 3);
>>        nouveau_pushbuf_data(push, res->bo, offset,
>>                             NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
>>     } else {
>> -      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
>> +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>>        PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>>        PUSH_DATAp(push, info->block, 3);
>>        PUSH_DATAp(push, info->grid, 3);
>>     }
>>     PUSH_DATA (push, 0);
>> +   PUSH_DATA (push, info->work_dim);
>>
>>     BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
>>     PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
>>
>

-- 
-Samuel


More information about the mesa-dev mailing list