[Mesa-dev] [PATCH 07/12] nvc0: add support for indirect compute on Fermi
Ilia Mirkin
imirkin at alum.mit.edu
Sat Feb 6 23:13:29 UTC 2016
On Sat, Feb 6, 2016 at 5:38 PM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
> When indirect compute is used, the size of the grid (in blocks) is
> stored as three integers inside a buffer. This requires a macro to
> set up GRIDDIM_YX and GRIDDIM_Z.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
> src/gallium/drivers/nouveau/nvc0/mme/Makefile | 2 +-
> src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme | 19 +++++++++++++++++++
> src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h | 13 +++++++++++++
> src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 18 +++++++++++++++---
> src/gallium/drivers/nouveau/nvc0/nvc0_macros.h | 2 ++
> src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 ++
> 6 files changed, 52 insertions(+), 4 deletions(-)
> create mode 100644 src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
> create mode 100644 src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/Makefile b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
> index 1c0f583..52fb0a5 100644
> --- a/src/gallium/drivers/nouveau/nvc0/mme/Makefile
> +++ b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
> @@ -1,5 +1,5 @@
> ENVYAS?=envyas
> -TARGETS=com9097.mme.h
> +TARGETS=com9097.mme.h com90c0.mme.h
>
> all: $(TARGETS)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
> new file mode 100644
> index 0000000..ee7f726
> --- /dev/null
> +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
> @@ -0,0 +1,19 @@
> +/* NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT
> + *
> + * arg = num_groups_x
> + * parm[0] = num_groups_y
> + * parm[1] = num_groups_z
> + */
> +.section #mme90c0_launch_grid_indirect
> + parm $r2
> + parm $r3
> + mov $r4 (or $r1 $r2)
> + mov $r4 (or $r3 $r4)
> + braz $r4 #fail
> + maddr 0x108e /* GRIDDIM_YX */
You can move this up, e.g.
parm $r2 maddr 0x108e /* GRIDDIM_XY */
> + mov $r4 (extrshl $r2 $r0 0x10 0x10)
If you make this
(extrinsrt $r1 $r2 0x0 0x10 0x10)
then you can make it directly an argument to send, avoiding the separate or.
> + exit send (or $r4 $r1) /* (num_groups_y << 16) | num_groups_x */
> + send $r3
> +fail:
> + exit
I think you need a nop here.
> +
> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
> new file mode 100644
I think Emil is going to yell at you about not adding this file to
some list somewhere so that make dist picks it up.
> index 0000000..89076cf
> --- /dev/null
> +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
> @@ -0,0 +1,13 @@
> +uint32_t mme90c0_launch_grid_indirect[] = {
> + 0x00000201,
> + 0x00000301,
> +/* 0x0009: fail */
> + 0x00128c10,
> + 0x00131c10,
> + 0x00016007,
> + 0x04238021,
> + 0x84008413,
> + 0x001260c0,
> + 0x00001841,
> + 0x00000091,
> +};
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> index e63bdcb..dbf2148 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> @@ -452,9 +452,21 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
> PUSH_DATA (push, cp->num_gprs);
>
> /* grid/block setup */
> - BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
> - PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
> - PUSH_DATA (push, info->grid[2]);
> + if (unlikely(info->indirect)) {
> + struct nv04_resource *res = nv04_resource(info->indirect);
> + uint32_t offset = res->offset + info->indirect_offset;
> + unsigned macro = NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT;
> +
> + nouveau_pushbuf_space(push, 16, 0, 1);
> + PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
> + PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
> + nouveau_pushbuf_data(push, res->bo, offset,
> + NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
> + } else {
> + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
> + PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
> + PUSH_DATA (push, info->grid[2]);
> + }
> BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
> PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
> PUSH_DATA (push, info->block[2]);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
> index 49e176c..57262fe 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
> @@ -35,4 +35,6 @@
>
> #define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
>
> +#define NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT 0x00003860
> +
> #endif /* __NVC0_MACROS_H__ */
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 84e4253..85be1cc 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -36,6 +36,7 @@
> #include "nvc0/nvc0_screen.h"
>
> #include "nvc0/mme/com9097.mme.h"
> +#include "nvc0/mme/com90c0.mme.h"
>
> static boolean
> nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
> @@ -1053,6 +1054,7 @@ nvc0_screen_create(struct nouveau_device *dev)
> MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
> MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
> MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
> + MK_MACRO(NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
>
> BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
> PUSH_DATA (push, 1);
> --
> 2.6.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list