[Mesa-dev] [PATCH 07/12] nvc0: add support for indirect compute on Fermi

Samuel Pitoiset samuel.pitoiset at gmail.com
Sun Feb 7 10:51:50 UTC 2016



On 02/07/2016 05:56 AM, Ilia Mirkin wrote:
> On Sat, Feb 6, 2016 at 6:13 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
>> On Sat, Feb 6, 2016 at 5:38 PM, Samuel Pitoiset
>> <samuel.pitoiset at gmail.com> wrote:
>>> When indirect compute is used, the size of the grid (in blocks) is
>>> stored as three integers inside a buffer. This requires a macro to
>>> set up GRIDDIM_YX and GRIDDIM_Z.
>>>
>>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>>> ---
>>>   src/gallium/drivers/nouveau/nvc0/mme/Makefile      |  2 +-
>>>   src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme   | 19 +++++++++++++++++++
>>>   src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h | 13 +++++++++++++
>>>   src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    | 18 +++++++++++++++---
>>>   src/gallium/drivers/nouveau/nvc0/nvc0_macros.h     |  2 ++
>>>   src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  2 ++
>>>   6 files changed, 52 insertions(+), 4 deletions(-)
>>>   create mode 100644 src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
>>>   create mode 100644 src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
>>>
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/Makefile b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
>>> index 1c0f583..52fb0a5 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/mme/Makefile
>>> +++ b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
>>> @@ -1,5 +1,5 @@
>>>   ENVYAS?=envyas
>>> -TARGETS=com9097.mme.h
>>> +TARGETS=com9097.mme.h com90c0.mme.h
>>>
>>>   all: $(TARGETS)
>>>
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
>>> new file mode 100644
>>> index 0000000..ee7f726
>>> --- /dev/null
>>> +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
>>> @@ -0,0 +1,19 @@
>>> +/* NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT
>>> + *
>>> + * arg     = num_groups_x
>>> + * parm[0] = num_groups_y
>>> + * parm[1] = num_groups_z
>>> + */
>>> +.section #mme90c0_launch_grid_indirect
>>> +   parm $r2
>>> +   parm $r3
>>> +   mov $r4 (or $r1 $r2)
>>> +   mov $r4 (or $r3 $r4)
>>> +   braz $r4 #fail
>>> +   maddr 0x108e /* GRIDDIM_YX */
>>
>> You can move this up, e.g.
>>
>> parm $r2 maddr 0x108e /* GRIDDIM_XY */
>>
>>> +   mov $r4 (extrshl $r2 $r0 0x10 0x10)
>>
>> If you make this
>>
>> (extrinsrt $r1 $r2 0x0 0x10 0x10)
>
> Oh and even better, do this as part of the computation that precedes
> the braz, that way you save another op :)

mmh? How this can still be reduced? Currently I have:

.section #mme90c0_launch_grid_indirect
    parm $r2 maddr 0x108e /* GRIDDOM_YX */
    parm $r3
    mov $r4 (or $r1 $r2)
    mov $r4 (or $r3 $r4)
    braz $r4 #fail
    exit send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* (num_groups_y << 16) 
| num_groups_x */
    send $r3
fail:
    nop
    exit

>
>>
>> then you can make it directly an argument to send, avoiding the separate or.
>>
>>> +   exit send (or $r4 $r1) /* (num_groups_y << 16) | num_groups_x */
>>> +   send $r3
>>> +fail:
>>> +   exit
>>
>> I think you need a nop here.
>>
>>> +
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
>>> new file mode 100644
>>
>> I think Emil is going to yell at you about not adding this file to
>> some list somewhere so that make dist picks it up.
>>
>>> index 0000000..89076cf
>>> --- /dev/null
>>> +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
>>> @@ -0,0 +1,13 @@
>>> +uint32_t mme90c0_launch_grid_indirect[] = {
>>> +       0x00000201,
>>> +       0x00000301,
>>> +/* 0x0009: fail */
>>> +       0x00128c10,
>>> +       0x00131c10,
>>> +       0x00016007,
>>> +       0x04238021,
>>> +       0x84008413,
>>> +       0x001260c0,
>>> +       0x00001841,
>>> +       0x00000091,
>>> +};
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>>> index e63bdcb..dbf2148 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
>>> @@ -452,9 +452,21 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
>>>      PUSH_DATA (push, cp->num_gprs);
>>>
>>>      /* grid/block setup */
>>> -   BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
>>> -   PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
>>> -   PUSH_DATA (push, info->grid[2]);
>>> +   if (unlikely(info->indirect)) {
>>> +      struct nv04_resource *res = nv04_resource(info->indirect);
>>> +      uint32_t offset = res->offset + info->indirect_offset;
>>> +      unsigned macro = NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT;
>>> +
>>> +      nouveau_pushbuf_space(push, 16, 0, 1);
>>> +      PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
>>> +      PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
>>> +      nouveau_pushbuf_data(push, res->bo, offset,
>>> +                           NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
>>> +   } else {
>>> +      BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
>>> +      PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
>>> +      PUSH_DATA (push, info->grid[2]);
>>> +   }
>>>      BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
>>>      PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
>>>      PUSH_DATA (push, info->block[2]);
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
>>> index 49e176c..57262fe 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
>>> @@ -35,4 +35,6 @@
>>>
>>>   #define NVC0_3D_MACRO_QUERY_BUFFER_WRITE                       0x00003858
>>>
>>> +#define NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT       0x00003860
>>> +
>>>   #endif /* __NVC0_MACROS_H__ */
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> index 84e4253..85be1cc 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> @@ -36,6 +36,7 @@
>>>   #include "nvc0/nvc0_screen.h"
>>>
>>>   #include "nvc0/mme/com9097.mme.h"
>>> +#include "nvc0/mme/com90c0.mme.h"
>>>
>>>   static boolean
>>>   nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
>>> @@ -1053,6 +1054,7 @@ nvc0_screen_create(struct nouveau_device *dev)
>>>      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
>>>      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
>>>      MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
>>> +   MK_MACRO(NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
>>>
>>>      BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
>>>      PUSH_DATA (push, 1);
>>> --
>>> 2.6.4
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list