[Mesa-dev] [PATCH 02/11] nvc0: bind driver cb for compute on c7[] for Kepler

Samuel Pitoiset samuel.pitoiset at gmail.com
Mon Feb 29 17:26:04 UTC 2016



On 02/27/2016 10:50 PM, Ilia Mirkin wrote:
> I think you're trying to resolve conflicts with images here again...
> please do that separately, and in a way that makes images available
> everywhere, not just compute. I don't think this needs to be part of
> this series though.

I removed the first patch of the series.

>
> On Sat, Feb 27, 2016 at 9:01 AM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>> ---
>>   .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      |  2 +-
>>   src/gallium/drivers/nouveau/nvc0/nvc0_context.h    | 11 ++++++-
>>   src/gallium/drivers/nouveau/nvc0/nvc0_program.c    | 10 +++---
>>   src/gallium/drivers/nouveau/nvc0/nve4_compute.c    | 38 ++++++++++++++--------
>>   src/gallium/drivers/nouveau/nvc0/nve4_compute.h    | 25 --------------
>>   5 files changed, 41 insertions(+), 45 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> index 8abdd93..d6dfed3 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> @@ -1734,7 +1734,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
>>         }
>>         addr += prog->driver->prop.cp.gridInfoBase;
>>         bld.mkLoad(TYPE_U32, i->getDef(0),
>> -                 bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
>> +                 bld.mkSymbol(FILE_MEMORY_CONST, 7, TYPE_U32, addr), NULL);
>>         break;
>>      case SV_SAMPLE_INDEX:
>>         // TODO: Properly pass source as an address in the PIX address space
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> index 203e479..dcb0bda 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> @@ -102,7 +102,7 @@
>>   #define NVC0_CB_USR_INFO(s)         (s << 16)
>>   #define NVC0_CB_USR_SIZE            (6 << 16)
>>   /* 6 driver constbuts, at 1K each */
>> -#define NVC0_CB_AUX_INFO(s)         NVC0_CB_USR_SIZE + (s << 10)
>> +#define NVC0_CB_AUX_INFO(s)         NVC0_CB_USR_SIZE + (s << 12)
>>   #define NVC0_CB_AUX_SIZE            (6 << 10)
>>   /* TIC/TSC entries (6 user clip planes, base instance id) */
>>   #define NVC0_CB_AUX_TXC_INFO        0x000
>> @@ -113,14 +113,23 @@
>>   /* 8 user clip planes, at 4 32-bits floats each */
>>   #define NVC0_CB_AUX_UCP_INFO        0x100
>>   #define NVC0_CB_AUX_UCP_SIZE        (PIPE_MAX_CLIP_PLANES * 4 * 4)
>> +/* 8 sets of 32-buts pairs MS offsets */
>> +#define NVC0_CB_AUX_MS_INFO         0x100 /* CP */
>> +#define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
>>   /* 8 sets of 32-bits integer pairs sample offsets */
>>   #define NVC0_CB_AUX_SAMPLE_INFO     0x180 /* FP */
>>   #define NVC0_CB_AUX_SAMPLE_SIZE     (8 * 4 * 2)
>>   /* draw parameters (index bais, base instance, drawid) */
>>   #define NVC0_CB_AUX_DRAW_INFO       0x180 /* VP */
>> +/* block/grid size, at 3 32-bits integers each and gridid */
>> +#define NVC0_CB_AUX_GRID_INFO       0x180 /* CP */
>> +#define NVC0_CB_AUX_GRID_SIZE       (7 * 4)
>>   /* 32 user buffers, at 4 32-bits integers each */
>>   #define NVC0_CB_AUX_BUF_INFO(i)     0x200 + (i) * 4 * 4
>>   #define NVC0_CB_AUX_BUF_SIZE        (NVC0_MAX_BUFFERS * 4 * 4)
>> +/* 32 surfaces, at 16 32-bits integers each */
>> +#define NVC0_CB_AUX_SUF_INFO(i)     0x400 + (i) * 16 * 4
>> +#define NVC0_CB_AUX_SUF_SIZE        (32 * 16 * 4)
>>   /* 4 32-bits floats for the vertex runout, put at the end */
>>   #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + NVC0_CB_AUX_SIZE
>>
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> index d01de73..8f1f942 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> @@ -540,10 +540,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
>>
>>      if (prog->type == PIPE_SHADER_COMPUTE) {
>>         if (chipset >= NVISA_GK104_CHIPSET) {
>> -         info->io.resInfoCBSlot = 0;
>> -         info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
>> -         info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
>> -         info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
>> +         info->io.resInfoCBSlot = 7;
>> +         info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>> +         info->io.suInfoBase = NVC0_CB_AUX_SUF_INFO(0);
>> +         info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
>>            info->io.bufInfoBase = 0; /* TODO */
>>         } else {
>>            info->io.resInfoCBSlot = 15;
>> @@ -551,7 +551,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
>>            info->io.suInfoBase = 0; /* TODO */
>>         }
>>         info->io.msInfoCBSlot = 0;
>> -      info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
>> +      info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
>>      } else {
>>         if (chipset >= NVISA_GK104_CHIPSET) {
>>            info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> index 4a4e836..b2059bb 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> @@ -41,6 +41,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>>      int i;
>>      int ret;
>>      uint32_t obj_class;
>> +   uint32_t address;
>>
>>      switch (dev->chipset & ~0xf) {
>>      case 0x100:
>> @@ -65,7 +66,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>>         return ret;
>>      }
>>
>> -   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL,
>> +   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
>>                           &screen->parm);
>>      if (ret)
>>         return ret;
>> @@ -128,15 +129,17 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>>      }
>>
>>      BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
>> -   PUSH_DATA (push, 0); /* does not interefere with 3D */
>> +   PUSH_DATA (push, 7); /* does not interefere with 3D */
>>
>>      if (obj_class == NVF0_COMPUTE_CLASS)
>>         IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
>>
>> +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
>> +
>>      /* MS sample coordinate offsets: these do not work with _ALT modes ! */
>>      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>> -   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
>> -   PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
>> +   PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);
>> +   PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);
>>      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>>      PUSH_DATA (push, 64);
>>      PUSH_DATA (push, 1);
>> @@ -159,7 +162,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
>>      PUSH_DATA (push, 3); /* 7 */
>>      PUSH_DATA (push, 1);
>>
>> -#ifdef DEBUG
>> +#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
>>      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>>      PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
>>      PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
>> @@ -194,6 +197,9 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
>>      uint32_t mask;
>>      unsigned i;
>>      const unsigned t = 1;
>> +   uint32_t address;
>> +
>> +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
>>
>>      mask = nvc0->surfaces_dirty[t];
>>      while (mask) {
>> @@ -205,8 +211,8 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
>>          * directly instead of via binding points, so we have to supply them.
>>          */
>>         BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>> -      PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
>> -      PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
>> +      PUSH_DATAh(push, address + NVC0_CB_AUX_SUF_INFO(i));
>> +      PUSH_DATA (push, address + NVC0_CB_AUX_SUF_INFO(i));
>>         BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>>         PUSH_DATA (push, 64);
>>         PUSH_DATA (push, 1);
>> @@ -271,6 +277,7 @@ static void
>>   nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
>>   {
>>      struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>> +   struct nvc0_screen *screen = nvc0->screen;
>>      uint64_t address;
>>      const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
>>      unsigned i, n;
>> @@ -282,11 +289,11 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
>>      n = util_logbase2(dirty) + 1 - i;
>>      assert(n);
>>
>> -   address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
>> +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
>>
>>      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>> -   PUSH_DATAh(push, address);
>> -   PUSH_DATA (push, address);
>> +   PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));
>> +   PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));
>>      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>>      PUSH_DATA (push, n * 4);
>>      PUSH_DATA (push, 0x1);
>> @@ -337,6 +344,9 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
>>      struct nvc0_screen *screen = nvc0->screen;
>>      struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>>      struct nvc0_program *cp = nvc0->compprog;
>> +   uint32_t address;
>> +
>> +   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
>>
>>      if (cp->parm_size) {
>>         BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>> @@ -350,8 +360,8 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
>>         PUSH_DATAp(push, input, cp->parm_size / 4);
>>      }
>>      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
>> -   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
>> -   PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
>> +   PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
>> +   PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
>>      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>>      PUSH_DATA (push, 7 * 4);
>>      PUSH_DATA (push, 0x1);
>> @@ -412,6 +422,8 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
>>            nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
>>      }
>>      nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
>> +   nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
>> +                              NVC0_CB_AUX_INFO(5), 1 << 10);
>>   }
>>
>>   static inline struct nve4_cp_launch_desc *
>> @@ -498,7 +510,7 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
>>      struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>>      const unsigned s = 5;
>>      unsigned i;
>> -   uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
>> +   uint32_t commands[2][32];
>>      unsigned n[2] = { 0, 0 };
>>
>>      for (i = 0; i < nvc0->num_textures[s]; ++i) {
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
>> index 84f8593..dcafbed 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
>> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
>> @@ -4,31 +4,6 @@
>>
>>   #include "nvc0/nve4_compute.xml.h"
>>
>> -/* Input space is implemented as c0[], to which we bind the screen->parm bo.
>> - */
>> -#define NVE4_CP_INPUT_USER           0x0000
>> -#define NVE4_CP_INPUT_USER_LIMIT     0x1000
>> -#define NVE4_CP_INPUT_GRID_INFO(i)  (0x1000 + (i) * 4)
>> -#define NVE4_CP_INPUT_NTID(i)       (0x1000 + (i) * 4)
>> -#define NVE4_CP_INPUT_NCTAID(i)     (0x100c + (i) * 4)
>> -#define NVE4_CP_INPUT_GRIDID         0x1018
>> -#define NVE4_CP_INPUT_TEX(i)        (0x1040 + (i) * 4)
>> -#define NVE4_CP_INPUT_TEX_STRIDE     4
>> -#define NVE4_CP_INPUT_TEX_MAX        32
>> -#define NVE4_CP_INPUT_MS_OFFSETS     0x10c0
>> -#define NVE4_CP_INPUT_SUF_STRIDE     64
>> -#define NVE4_CP_INPUT_SUF(i)        (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
>> -#define NVE4_CP_INPUT_SUF_MAX        32
>> -#define NVE4_CP_INPUT_TRAP_INFO_PTR  0x1900
>> -#define NVE4_CP_INPUT_TEMP_PTR       0x1908
>> -#define NVE4_CP_INPUT_MP_TEMP_SIZE   0x1910
>> -#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914
>> -#define NVE4_CP_INPUT_CSTACK_SIZE    0x1918
>> -#define NVE4_CP_INPUT_SIZE           0x1a00
>> -#define NVE4_CP_PARAM_TRAP_INFO      0x2000
>> -#define NVE4_CP_PARAM_TRAP_INFO_SZ  (1 << 16)
>> -#define NVE4_CP_PARAM_SIZE          (NVE4_CP_PARAM_TRAP_INFO + (1 << 16))
>> -
>>   struct nve4_cp_launch_desc
>>   {
>>      u32 unk0[8];
>> --
>> 2.7.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

-- 
-Samuel


More information about the mesa-dev mailing list