[Mesa-dev] [PATCH 5/5] nvc0: add ARB_indirect_parameters support

Samuel Pitoiset samuel.pitoiset at gmail.com
Mon Jan 4 12:21:56 PST 2016



On 01/04/2016 09:18 PM, Ilia Mirkin wrote:
> On Mon, Jan 4, 2016 at 3:04 PM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>> Did you write piglit tests for this new extension?
>> I don't see any tests which use MultiDrawArraysIndirectCountARB() or
>> MultiDrawElementsIndirectCountARB().
>
> http://patchwork.freedesktop.org/patch/69334/

Okay, good!

>
>>
>>
>> Are you going to submit the rest of the series for nvc0?
>
> Mmmmaybe. It's part of the ARB_multi_draw_indirect stuff.

This should be good if you want someone to have a look at it.

>
>>
>>
>> On 01/02/2016 09:38 PM, Ilia Mirkin wrote:
>>>
>>> I chose to make separate macros for this due to the additional
>>> complexity and extra scratch usage.
>>>
>>> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
>>> ---
>>>    docs/relnotes/11.2.0.html                          |   1 +
>>>    src/gallium/drivers/nouveau/nvc0/mme/com9097.mme   | 157
>>> +++++++++++++++++++++
>>>    src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 125
>>> ++++++++++++++++
>>>    src/gallium/drivers/nouveau/nvc0/nvc0_macros.h     |   4 +
>>>    src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |   4 +-
>>>    src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c        |  29 +++-
>>>    6 files changed, 314 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
>>> index d31da8b..616c134 100644
>>> --- a/docs/relnotes/11.2.0.html
>>> +++ b/docs/relnotes/11.2.0.html
>>> @@ -47,6 +47,7 @@ Note: some of the new features are only available with
>>> certain drivers.
>>>    <li>GL_ARB_base_instance on freedreno/a4xx</li>
>>>    <li>GL_ARB_compute_shader on i965</li>
>>>    <li>GL_ARB_copy_image on r600</li>
>>> +<li>GL_ARB_indirect_parameters on nvc0</li>
>>>    <li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
>>>    <li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman
>>> only)</li>
>>>    <li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
>>> b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
>>> index 35355ed..4daa57d 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
>>> +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
>>> @@ -334,3 +334,160 @@ dai_end:
>>>       mov $r6 (add $r6 1)
>>>       exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
>>>       send $r5
>>> +
>>> +/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT
>>> + *
>>> + * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
>>> + * Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE.
>>> + *
>>> + * arg     = mode
>>> + * parm[0] = start_drawid
>>> + * parm[1] = numparams
>>> + * parm[2] = totaldraws
>>> + * parm[3 + 5n + 0] = count
>>> + * parm[3 + 5n + 1] = instance_count
>>> + * parm[3 + 5n + 2] = start
>>> + * parm[3 + 5n + 3] = index_bias
>>> + * parm[3 + 5n + 4] = start_instance
>>> + *
>>> + * SCRATCH[0] = saved VB_ELEMENT_BASE
>>> + * SCRATCH[1] = saved VB_INSTANCE_BASE
>>> + * SCRATCH[2] = draws left
>>> + */
>>> +.section #mme9097_draw_elts_indirect_count
>>> +   read $r6 0x50d /* VB_ELEMENT_BASE */
>>> +   read $r7 0x50e /* VB_INSTANCE_BASE */
>>> +   maddr 0x1d00
>>> +   send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */
>>> +   send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */
>>> +   parm $r6 /* start_drawid */
>>> +   parm $r7 /* numparams */
>>> +   parm $r5 /* totaldraws */
>>> +   mov $r5 (sub $r5 $r6) /* draws left */
>>> +   braz $r5 #deic_runout
>>> +   mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
>>> +   branz $r3 #deic_runout
>>> +   send $r5
>>> +deic_draw_again:
>>> +   parm $r3 /* count */
>>> +   parm $r2 /* instance_count */
>>> +   parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
>>> +   parm $r4 send $r4 /* index_bias, send start */
>>> +   maddr 0x18e3 /* CB_POS */
>>> +   send 0x180 /* 256 + 128 */
>>> +   braz $r2 #deic_end
>>> +   parm $r5 send $r4 /* start_instance, send index_bias */
>>> +   send $r5 /* send start_instance */
>>> +   send $r6 /* draw id */
>>> +   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
>>> +   send $r4
>>> +   send $r5
>>> +   maddr 0x446
>>> +   send $r4
>>> +   mov $r4 0x1
>>> +   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
>>> +deic_again:
>>> +   maddr 0x586 /* VERTEX_BEGIN_GL */
>>> +   send $r1 /* mode */
>>> +   maddr 0x5f8 /* INDEX_BATCH_COUNT */
>>> +   send $r3 /* count */
>>> +   mov $r2 (sub $r2 $r4)
>>> +   maddrsend 0x585 /* VERTEX_END_GL */
>>> +   branz $r2 #deic_again
>>> +   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
>>> +deic_end:
>>> +   read $r5 0xd02
>>> +   mov $r5 (add $r5 -1)
>>> +   braz $r5 #deic_runout_check
>>> +   mov $r7 (add $r7 -1)
>>> +   maddr 0xd02
>>> +   send $r5
>>> +   branz $r7 #deic_draw_again
>>> +   mov $r6 (add $r6 1)
>>> +deic_restore:
>>> +   read $r6 0xd00
>>> +   read $r7 0xd01
>>> +   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
>>> +   send $r6
>>> +   send $r7
>>> +   exit maddr 0x446
>>> +   send $r6
>>> +deic_runout:
>>> +   parm $r2
>>> +   parm $r2
>>> +   parm $r2
>>> +   parm $r2
>>> +   parm $r2
>>> +   mov $r7 (add $r7 -1)
>>> +deic_runout_check:
>>> +   branz annul $r7 #deic_runout
>>> +   bra annul #deic_restore
>>> +
>>> +/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT:
>>> + *
>>> + * NOTE: Saves and restores VB_INSTANCE_BASE.
>>> + *
>>> + * arg     = mode
>>> + * parm[0] = start_drawid
>>> + * parm[1] = numparams
>>> + * parm[2] = totaldraws
>>> + * parm[3 + 4n + 0] = count
>>> + * parm[3 + 4n + 1] = instance_count
>>> + * parm[3 + 4n + 2] = start
>>> + * parm[3 + 4n + 3] = start_instance
>>> + *
>>> + * SCRATCH[0] = VB_INSTANCE_BASE
>>> + */
>>> +.section #mme9097_draw_arrays_indirect_count
>>> +   read $r5 0x50e /* VB_INSTANCE_BASE */
>>> +   maddr 0xd00
>>> +   parm $r6 send $r5 /* start_drawid, save VB_INSTANCE_BASE */
>>> +   parm $r7 /* numparams */
>>> +   parm $r5 /* totaldraws */
>>> +   mov $r5 (sub $r5 $r6) /* draws left */
>>> +   braz $r5 #daic_runout
>>> +   mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
>>> +   branz annul $r3 #daic_runout
>>> +daic_draw_again:
>>> +   parm $r2 /* count */
>>> +   parm $r3 /* instance_count */
>>> +   parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */
>>> +   braz $r3 #daic_end
>>> +   parm $r4 send $r4 /* start_instance */
>>> +   maddr 0x18e3 /* CB_POS */
>>> +   send 0x180 /* 256 + 128 */
>>> +   send 0x0 /* send 0 as base_vertex */
>>> +   send $r4 /* send start_instance */
>>> +   send $r6 /* draw id */
>>> +   maddr 0x50e /* VB_INSTANCE_BASE */
>>> +   send $r4
>>> +   mov $r4 0x1
>>> +   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
>>> +daic_again:
>>> +   maddr 0x586 /* VERTEX_BEGIN_GL */
>>> +   send $r1 /* mode */
>>> +   maddr 0x35e /* VERTEX_BUFFER_COUNT */
>>> +   send $r2
>>> +   mov $r3 (sub $r3 $r4)
>>> +   maddrsend 0x585 /* VERTEX_END_GL */
>>> +   branz $r3 #daic_again
>>> +   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
>>> +daic_end:
>>> +   mov $r5 (add $r5 -1)
>>> +   braz $r5 #daic_runout_check
>>> +   mov $r7 (add $r7 -1)
>>> +   branz $r7 #daic_draw_again
>>> +   mov $r6 (add $r6 1)
>>> +daic_restore:
>>> +   read $r5 0xd00
>>> +   exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
>>> +   send $r5
>>> +daic_runout:
>>> +   parm $r2
>>> +   parm $r2
>>> +   parm $r2
>>> +   parm $r2
>>> +   mov $r7 (add $r7 -1)
>>> +daic_runout_check:
>>> +   branz annul $r7 #daic_runout
>>> +   bra annul #daic_restore
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
>>> b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
>>> index 0aebeeb..bf8625e 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
>>> +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
>>> @@ -207,3 +207,128 @@ uint32_t mme9097_draw_arrays_indirect[] = {
>>>          0x014380a1,
>>>          0x00002841,
>>>    };
>>> +
>>> +uint32_t mme9097_draw_elts_indirect_count[] = {
>>> +       0x01434615,
>>> +       0x01438715,
>>> +       0x07400021,
>>> +/* 0x000d: deic_draw_again */
>>> +       0x00003041,
>>> +       0x00003841,
>>> +       0x00000601,
>>> +       0x00000701,
>>> +/* 0x001e: deic_again */
>>> +       0x00000501,
>>> +       0x0005ad10,
>>> +/* 0x0026: deic_end */
>>> +       0x000b2807,
>>> +       0x007f4312,
>>> +/* 0x002e: deic_restore */
>>> +       0x000a9817,
>>> +       0x00002841,
>>> +/* 0x0035: deic_runout */
>>> +       0x00000301,
>>> +/* 0x003b: deic_runout_check */
>>> +       0x00000201,
>>> +       0x017dc451,
>>> +       0x00002431,
>>> +       0x0638c021,
>>> +       0x00600041,
>>> +       0x0004d007,
>>> +       0x00002531,
>>> +       0x00002841,
>>> +       0x00003041,
>>> +       0x05434021,
>>> +       0x00002041,
>>> +       0x00002841,
>>> +       0x01118021,
>>> +       0x00002041,
>>> +       0x00004411,
>>> +       0xd0400912,
>>> +       0x01618021,
>>> +       0x00000841,
>>> +       0x017e0021,
>>> +       0x00001841,
>>> +       0x00051210,
>>> +       0x01614071,
>>> +       0xfffe9017,
>>> +       0xd0410912,
>>> +       0x03408515,
>>> +       0xffffed11,
>>> +       0x0004e807,
>>> +       0xffffff11,
>>> +       0x03408021,
>>> +       0x00002841,
>>> +       0xfff87817,
>>> +       0x00007611,
>>> +       0x03400615,
>>> +       0x03404715,
>>> +       0x05434021,
>>> +       0x00003041,
>>> +       0x00003841,
>>> +       0x011180a1,
>>> +       0x00003041,
>>> +       0x00000201,
>>> +       0x00000201,
>>> +       0x00000201,
>>> +       0x00000201,
>>> +       0x00000201,
>>> +       0xffffff11,
>>> +       0xfffeb837,
>>> +       0xfffc8027,
>>> +};
>>> +
>>> +uint32_t mme9097_draw_arrays_indirect_count[] = {
>>> +       0x01438515,
>>> +       0x03400021,
>>> +/* 0x0009: daic_draw_again */
>>> +       0x00002e31,
>>> +       0x00000701,
>>> +       0x00000501,
>>> +/* 0x0017: daic_again */
>>> +       0x0005ad10,
>>> +       0x00086807,
>>> +/* 0x001f: daic_end */
>>> +       0x007f4312,
>>> +       0x0007d837,
>>> +/* 0x0024: daic_restore */
>>> +/* 0x0027: daic_runout */
>>> +       0x00000201,
>>> +       0x00000301,
>>> +/* 0x002c: daic_runout_check */
>>> +       0x00d74451,
>>> +       0x0004d807,
>>> +       0x00002431,
>>> +       0x0638c021,
>>> +       0x00600041,
>>> +       0x00000041,
>>> +       0x00002041,
>>> +       0x00003041,
>>> +       0x01438021,
>>> +       0x00002041,
>>> +       0x00004411,
>>> +       0xd0400912,
>>> +       0x01618021,
>>> +       0x00000841,
>>> +       0x00d78021,
>>> +       0x00001041,
>>> +       0x00051b10,
>>> +       0x01614071,
>>> +       0xfffe9817,
>>> +       0xd0410912,
>>> +       0xffffed11,
>>> +       0x00032807,
>>> +       0xffffff11,
>>> +       0xfff9f817,
>>> +       0x00007611,
>>> +       0x03400515,
>>> +       0x014380a1,
>>> +       0x00002841,
>>> +       0x00000201,
>>> +       0x00000201,
>>> +       0x00000201,
>>> +       0x00000201,
>>> +       0xffffff11,
>>> +       0xfffef837,
>>> +       0xfffdc027,
>>> +};
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
>>> index bf2798a..27c026b 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
>>> @@ -29,4 +29,8 @@
>>>
>>>    #define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT                  0x00003840
>>>
>>> +#define NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT               0x00003848
>>> +
>>> +#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT             0x00003850
>>> +
>>>    #endif /* __NVC0_MACROS_H__ */
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> index 3aff551..22f7885 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> @@ -196,6 +196,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen,
>>> enum pipe_cap param)
>>>       case PIPE_CAP_CLEAR_TEXTURE:
>>>       case PIPE_CAP_DRAW_PARAMETERS:
>>>       case PIPE_CAP_MULTI_DRAW_INDIRECT:
>>> +   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
>>>          return 1;
>>>       case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
>>>          return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
>>> @@ -218,7 +219,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen,
>>> enum pipe_cap param)
>>>       case PIPE_CAP_VERTEXID_NOBASE:
>>>       case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
>>>       case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
>>> -   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
>>>          return 0;
>>>
>>>       case PIPE_CAP_VENDOR_ID:
>>> @@ -1038,6 +1038,8 @@ nvc0_screen_create(struct nouveau_device *dev)
>>>       MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
>>>       MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT,
>>> mme9097_draw_arrays_indirect);
>>>       MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT,
>>> mme9097_draw_elts_indirect);
>>> +   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT,
>>> mme9097_draw_arrays_indirect_count);
>>> +   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT,
>>> mme9097_draw_elts_indirect_count);
>>>
>>>       BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
>>>       PUSH_DATA (push, 1);
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
>>> index 3dbc1ad..a7e1c85 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
>>> @@ -807,12 +807,16 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const
>>> struct pipe_draw_info *info)
>>>    {
>>>       struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>>>       struct nv04_resource *buf = nv04_resource(info->indirect);
>>> +   struct nv04_resource *buf_count =
>>> nv04_resource(info->indirect_params);
>>>       unsigned size, macro, count = info->indirect_count, drawid =
>>> info->drawid;
>>>       uint32_t offset = buf->offset + info->indirect_offset;
>>>
>>>       /* must make FIFO wait for engines idle before continuing to process
>>> */
>>> -   if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
>>> +   if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
>>> +       (buf_count && buf_count->fence_wr &&
>>> +        !nouveau_fence_signalled(buf_count->fence_wr))) {
>>>          IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
>>> +   }
>>>
>>>       /* Queue things up to let the macros write params to the driver
>>> constbuf */
>>>       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
>>> @@ -824,7 +828,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const
>>> struct pipe_draw_info *info)
>>>          assert(nvc0->idxbuf.buffer);
>>>          assert(nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer));
>>>          size = 5;
>>> -      macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT;
>>> +      if (buf_count)
>>> +         macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT;
>>> +      else
>>> +         macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT;
>>>       } else {
>>>          if (nvc0->state.index_bias) {
>>>             /* index_bias is implied 0 if !info->indexed (really ?) */
>>> @@ -833,7 +840,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const
>>> struct pipe_draw_info *info)
>>>             nvc0->state.index_bias = 0;
>>>          }
>>>          size = 4;
>>> -      macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT;
>>> +      if (buf_count)
>>> +         macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT;
>>> +      else
>>> +         macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT;
>>>       }
>>>
>>>       /* If the stride is not the natural stride, we have to stick a
>>> separate
>>> @@ -851,12 +861,21 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const
>>> struct pipe_draw_info *info)
>>>             pushes = draws;
>>>          }
>>>
>>> -      nouveau_pushbuf_space(push, 8, 0, pushes);
>>> +      nouveau_pushbuf_space(push, 16, 0, pushes + !!buf_count);
>>>          PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
>>> -      PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(0, macro, 3 + draws * size));
>>> +      if (buf_count)
>>> +         PUSH_REFN(push, buf_count->bo, NOUVEAU_BO_RD |
>>> buf_count->domain);
>>> +      PUSH_DATA(push,
>>> +                NVC0_FIFO_PKHDR_1I(0, macro, 3 + !!buf_count + draws *
>>> size));
>>>          PUSH_DATA(push, nvc0_prim_gl(info->mode));
>>>          PUSH_DATA(push, drawid);
>>>          PUSH_DATA(push, draws);
>>> +      if (buf_count) {
>>> +         nouveau_pushbuf_data(push,
>>> +                              buf_count->bo,
>>> +                              buf_count->offset +
>>> info->indirect_params_offset,
>>> +                              NVC0_IB_ENTRY_1_NO_PREFETCH | 4);
>>> +      }
>>>          if (pushes == 1) {
>>>             nouveau_pushbuf_data(push,
>>>                                  buf->bo, offset,
>>>
>>


More information about the mesa-dev mailing list