[Mesa-dev] [PATCH] r600g: Implement GL_ARB_draw_indirect

Marek Olšák maraeo at gmail.com
Sun Nov 9 04:53:29 PST 2014


This might not always work due to these reasons:

These variables shouldn't be used anywhere if info.indirect != NULL:
- info.start
- info.count
- info.index_bias
For example, the translation of 8-bit indices is broken.

The code which uses these variables has no effect if info.indirect !=
NULL. For clarity, we shouldn't execute that code at all:
- info.start_instance
- info.instance_count

In get_param, you can just use "return the_bool_expression".

Marek


On Sat, Nov 8, 2014 at 11:52 PM, Glenn Kennard <glenn.kennard at gmail.com> wrote:
> Requires evergreen/cayman, and updated radeon kernel module.
>
> Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
> ---
> See also kernel side patch sent to dri-devel at lists.freedesktop.org
>
>  docs/GL3.txt                                 |  4 +-
>  docs/relnotes/10.4.html                      |  1 +
>  src/gallium/drivers/r600/evergreend.h        |  7 ++-
>  src/gallium/drivers/r600/r600_pipe.c         |  6 ++-
>  src/gallium/drivers/r600/r600_state_common.c | 80 ++++++++++++++++++++++------
>  5 files changed, 77 insertions(+), 21 deletions(-)
>
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index 2854431..06c52f9 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -95,7 +95,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft
>  GL 4.0, GLSL 4.00:
>
>    GL_ARB_draw_buffers_blend                            DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
> -  GL_ARB_draw_indirect                                 DONE (i965, nvc0, radeonsi, llvmpipe, softpipe)
> +  GL_ARB_draw_indirect                                 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
>    GL_ARB_gpu_shader5                                   DONE (i965, nvc0)
>    - 'precise' qualifier                                DONE
>    - Dynamically uniform sampler array indices          DONE (r600)
> @@ -159,7 +159,7 @@ GL 4.3, GLSL 4.30:
>    GL_ARB_framebuffer_no_attachments                    not started
>    GL_ARB_internalformat_query2                         not started
>    GL_ARB_invalidate_subdata                            DONE (all drivers)
> -  GL_ARB_multi_draw_indirect                           DONE (i965, nvc0, radeonsi, llvmpipe, softpipe)
> +  GL_ARB_multi_draw_indirect                           DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
>    GL_ARB_program_interface_query                       not started
>    GL_ARB_robust_buffer_access_behavior                 not started
>    GL_ARB_shader_image_size                             not started
> diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html
> index d0fbd3b..9c2a491 100644
> --- a/docs/relnotes/10.4.html
> +++ b/docs/relnotes/10.4.html
> @@ -49,6 +49,7 @@ Note: some of the new features are only available with certain drivers.
>  <li>GL_ARB_texture_view on nv50, nvc0</li>
>  <li>GL_ARB_clip_control on llvmpipe, softpipe, r300, r600, radeonsi</li>
>  <li>GL_KHR_context_flush_control on all drivers</li>
> +<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li>
>  </ul>
>
>
> diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
> index 4989996..b8880c8 100644
> --- a/src/gallium/drivers/r600/evergreend.h
> +++ b/src/gallium/drivers/r600/evergreend.h
> @@ -64,6 +64,8 @@
>  #define R600_TEXEL_PITCH_ALIGNMENT_MASK        0x7
>
>  #define PKT3_NOP                               0x10
> +#define PKT3_SET_BASE                          0x11
> +#define PKT3_INDEX_BUFFER_SIZE                 0x13
>  #define PKT3_DEALLOC_STATE                     0x14
>  #define PKT3_DISPATCH_DIRECT                   0x15
>  #define PKT3_DISPATCH_INDIRECT                 0x16
> @@ -72,12 +74,15 @@
>  #define PKT3_REG_RMW                           0x21
>  #define PKT3_COND_EXEC                         0x22
>  #define PKT3_PRED_EXEC                         0x23
> -#define PKT3_START_3D_CMDBUF                   0x24
> +#define PKT3_DRAW_INDIRECT                     0x24
> +#define PKT3_DRAW_INDEX_INDIRECT               0x25
> +#define PKT3_INDEX_BASE                        0x26
>  #define PKT3_DRAW_INDEX_2                      0x27
>  #define PKT3_CONTEXT_CONTROL                   0x28
>  #define PKT3_DRAW_INDEX_IMMD_BE                0x29
>  #define PKT3_INDEX_TYPE                        0x2A
>  #define PKT3_DRAW_INDEX                        0x2B
> +#define PKT3_DRAW_INDIRECT_MULTI               0x2C
>  #define PKT3_DRAW_INDEX_AUTO                   0x2D
>  #define PKT3_DRAW_INDEX_IMMD                   0x2E
>  #define PKT3_NUM_INSTANCES                     0x2F
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index 0b571e4..829deaf 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -313,6 +313,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>                 return family >= CHIP_CEDAR ? 1 : 0;
>         case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
>                 return family >= CHIP_CEDAR ? 4 : 0;
> +       case PIPE_CAP_DRAW_INDIRECT:
> +               /* needs kernel command checking support to work */
> +               if (family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41)
> +                       return 1;
> +               return 0;
>
>         /* Unsupported features. */
>         case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
> @@ -322,7 +327,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>         case PIPE_CAP_VERTEX_COLOR_CLAMPED:
>         case PIPE_CAP_USER_VERTEX_BUFFERS:
>         case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
> -       case PIPE_CAP_DRAW_INDIRECT:
>         case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
>         case PIPE_CAP_SAMPLER_VIEW_TARGET:
>                 return 0;
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index c3f21cb..649bf24 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -1362,7 +1362,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>         unsigned i;
>         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
>
> -       if (!info.count && (info.indexed || !info.count_from_stream_output)) {
> +       if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
>                 return;
>         }
>
> @@ -1391,7 +1391,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>                 ib.offset = rctx->index_buffer.offset + info.start * ib.index_size;
>
>                 /* Translate 8-bit indices to 16-bit. */
> -               if (ib.index_size == 1) {
> +               if (unlikely(ib.index_size == 1)) {
>                         struct pipe_resource *out_buffer = NULL;
>                         unsigned out_offset;
>                         void *ptr;
> @@ -1414,7 +1414,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>                  * and the indices are emitted via PKT3_DRAW_INDEX_IMMD.
>                  * Note: Instanced rendering in combination with immediate indices hangs. */
>                 if (ib.user_buffer && (R600_BIG_ENDIAN || info.instance_count > 1 ||
> -                                      info.count*ib.index_size > 20)) {
> +                                      info.count*ib.index_size > 20 ||
> +                                      info.indirect)) {
>                         u_upload_data(rctx->b.uploader, 0, info.count * ib.index_size,
>                                       ib.user_buffer, &ib.offset, &ib.buffer);
>                         ib.user_buffer = NULL;
> @@ -1521,6 +1522,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>         /* Draw packets. */
>         cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, rctx->b.predicate_drawing);
>         cs->buf[cs->cdw++] = info.instance_count;
> +
> +       if (unlikely(info.indirect)) {
> +               uint64_t va = r600_resource(info.indirect)->gpu_address;
> +               assert(rctx->b.chip_class >= EVERGREEN);
> +               cs->buf[cs->cdw++] = PKT3(0x11 /* PKT3_SET_BASE */, 2, rctx->b.predicate_drawing);
> +               cs->buf[cs->cdw++] = 1; // 1 means DX11 Draw_Index_Indirect Patch Table Base
> +               cs->buf[cs->cdw++] = va;
> +               cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
> +
> +               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
> +               cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
> +                                                          (struct r600_resource*)info.indirect,
> +                                                          RADEON_USAGE_READ, RADEON_PRIO_MIN);
> +       }
> +
>         if (info.indexed) {
>                 cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, rctx->b.predicate_drawing);
>                 cs->buf[cs->cdw++] = ib.index_size == 4 ?
> @@ -1537,18 +1553,40 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>                         cs->cdw += size_dw;
>                 } else {
>                         uint64_t va = r600_resource(ib.buffer)->gpu_address + ib.offset;
> -                       cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing);
> -                       cs->buf[cs->cdw++] = va;
> -                       cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
> -                       cs->buf[cs->cdw++] = info.count;
> -                       cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
> -                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
> -                       cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
> -                                                                  (struct r600_resource*)ib.buffer,
> -                                                                  RADEON_USAGE_READ, RADEON_PRIO_MIN);
> +
> +                       if (likely(!info.indirect)) {
> +                               cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing);
> +                               cs->buf[cs->cdw++] = va;
> +                               cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
> +                               cs->buf[cs->cdw++] = info.count;
> +                               cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
> +                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
> +                               cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
> +                                                                          (struct r600_resource*)ib.buffer,
> +                                                                          RADEON_USAGE_READ, RADEON_PRIO_MIN);
> +                       }
> +                       else {
> +                               uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;
> +
> +                               cs->buf[cs->cdw++] = PKT3(0x26 /* PKT3_INDEX_BASE */, 1, rctx->b.predicate_drawing);
> +                               cs->buf[cs->cdw++] = va;
> +                               cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
> +
> +                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
> +                               cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
> +                                                                          (struct r600_resource*)ib.buffer,
> +                                                                          RADEON_USAGE_READ, RADEON_PRIO_MIN);
> +
> +                               cs->buf[cs->cdw++] = PKT3(0x13 /* PKT3_INDEX_BUFFER_SIZE */, 0, rctx->b.predicate_drawing);
> +                               cs->buf[cs->cdw++] = max_size;
> +
> +                               cs->buf[cs->cdw++] = PKT3(0x25 /* PKT3_DRAW_INDEX_INDIRECT */, 1, rctx->b.predicate_drawing);
> +                               cs->buf[cs->cdw++] = info.indirect_offset;
> +                               cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
> +                       }
>                 }
>         } else {
> -               if (info.count_from_stream_output) {
> +               if (unlikely(info.count_from_stream_output)) {
>                         struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
>                         uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
>
> @@ -1567,10 +1605,18 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>                                                                    RADEON_PRIO_MIN);
>                 }
>
> -               cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing);
> -               cs->buf[cs->cdw++] = info.count;
> -               cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |
> -                                       (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
> +               if (likely(!info.indirect)) {
> +                       cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing);
> +                       cs->buf[cs->cdw++] = info.count;
> +                       cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |
> +                                               (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
> +               }
> +               else {
> +                       cs->buf[cs->cdw++] = PKT3(0x24 /* PKT3_DRAW_INDIRECT */, 1, rctx->b.predicate_drawing);
> +                       cs->buf[cs->cdw++] = info.indirect_offset;
> +                       cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |
> +                                               (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
> +               }
>         }
>
>         if (rctx->screen->b.trace_bo) {
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list