<div dir="ltr"><div><div><div><div><div>Hello,<br><br></div>You have defined some "define" but you don't use it everywhere, for instance :<br> cs->buf[cs->cdw++] = PKT3(0x24 /* PKT3_DRAW_INDIRECT */, 1, rctx->b.predicate_drawing);<br></div>instead of simply :<br> cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDIRECT, 1, rctx->b.predicate_drawing);<br><br></div>There is 5 instances like that.<br><br></div>Regards.<br><br></div>- Benjamin<br></div><div class="gmail_extra"><br><div class="gmail_quote">2014-11-08 23:52 GMT+01:00 Glenn Kennard <span dir="ltr"><<a href="mailto:glenn.kennard@gmail.com" target="_blank">glenn.kennard@gmail.com</a>></span>:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Requires evergreen/cayman, and updated radeon kernel module.<br>
<br>
Signed-off-by: Glenn Kennard <<a href="mailto:glenn.kennard@gmail.com">glenn.kennard@gmail.com</a>><br>
---<br>
See also kernel side patch sent to <a href="mailto:dri-devel@lists.freedesktop.org">dri-devel@lists.freedesktop.org</a><br>
<br>
 docs/GL3.txt                                 |  4 +-<br>
 docs/relnotes/10.4.html                      |  1 +<br>
 src/gallium/drivers/r600/evergreend.h        |  7 ++-<br>
 src/gallium/drivers/r600/r600_pipe.c         |  6 ++-<br>
 src/gallium/drivers/r600/r600_state_common.c | 80 ++++++++++++++++++++++------<br>
 5 files changed, 77 insertions(+), 21 deletions(-)<br>
<br>
diff --git a/docs/GL3.txt b/docs/GL3.txt<br>
index 2854431..06c52f9 100644<br>
--- a/docs/GL3.txt<br>
+++ b/docs/GL3.txt<br>
@@ -95,7 +95,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft<br>
 GL 4.0, GLSL 4.00:<br>
<br>
   GL_ARB_draw_buffers_blend                            DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)<br>
-  GL_ARB_draw_indirect                                 DONE (i965, nvc0, radeonsi, llvmpipe, softpipe)<br>
+  GL_ARB_draw_indirect                                 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)<br>
   GL_ARB_gpu_shader5                                   DONE (i965, nvc0)<br>
   - 'precise' qualifier                                DONE<br>
   - Dynamically uniform sampler array indices          DONE (r600)<br>
@@ -159,7 +159,7 @@ GL 4.3, GLSL 4.30:<br>
   GL_ARB_framebuffer_no_attachments                    not started<br>
   GL_ARB_internalformat_query2                         not started<br>
   GL_ARB_invalidate_subdata                            DONE (all drivers)<br>
-  GL_ARB_multi_draw_indirect                           DONE (i965, nvc0, radeonsi, llvmpipe, softpipe)<br>
+  GL_ARB_multi_draw_indirect                           DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)<br>
   GL_ARB_program_interface_query                       not started<br>
   GL_ARB_robust_buffer_access_behavior                 not started<br>
   GL_ARB_shader_image_size                             not started<br>
diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html<br>
index d0fbd3b..9c2a491 100644<br>
--- a/docs/relnotes/10.4.html<br>
+++ b/docs/relnotes/10.4.html<br>
@@ -49,6 +49,7 @@ Note: some of the new features are only available with certain drivers.<br>
 <li>GL_ARB_texture_view on nv50, nvc0</li><br>
 <li>GL_ARB_clip_control on llvmpipe, softpipe, r300, r600, radeonsi</li><br>
 <li>GL_KHR_context_flush_control on all drivers</li><br>
+<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li><br>
 </ul><br>
<br>
<br>
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h<br>
index 4989996..b8880c8 100644<br>
--- a/src/gallium/drivers/r600/evergreend.h<br>
+++ b/src/gallium/drivers/r600/evergreend.h<br>
@@ -64,6 +64,8 @@<br>
 #define R600_TEXEL_PITCH_ALIGNMENT_MASK        0x7<br>
<br>
 #define PKT3_NOP                               0x10<br>
+#define PKT3_SET_BASE                          0x11<br>
+#define PKT3_INDEX_BUFFER_SIZE                 0x13<br>
 #define PKT3_DEALLOC_STATE                     0x14<br>
 #define PKT3_DISPATCH_DIRECT                   0x15<br>
 #define PKT3_DISPATCH_INDIRECT                 0x16<br>
@@ -72,12 +74,15 @@<br>
 #define PKT3_REG_RMW                           0x21<br>
 #define PKT3_COND_EXEC                         0x22<br>
 #define PKT3_PRED_EXEC                         0x23<br>
-#define PKT3_START_3D_CMDBUF                   0x24<br>
+#define PKT3_DRAW_INDIRECT                     0x24<br>
+#define PKT3_DRAW_INDEX_INDIRECT               0x25<br>
+#define PKT3_INDEX_BASE                        0x26<br>
 #define PKT3_DRAW_INDEX_2                      0x27<br>
 #define PKT3_CONTEXT_CONTROL                   0x28<br>
 #define PKT3_DRAW_INDEX_IMMD_BE                0x29<br>
 #define PKT3_INDEX_TYPE                        0x2A<br>
 #define PKT3_DRAW_INDEX                        0x2B<br>
+#define PKT3_DRAW_INDIRECT_MULTI               0x2C<br>
 #define PKT3_DRAW_INDEX_AUTO                   0x2D<br>
 #define PKT3_DRAW_INDEX_IMMD                   0x2E<br>
 #define PKT3_NUM_INSTANCES                     0x2F<br>
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c<br>
index 0b571e4..829deaf 100644<br>
--- a/src/gallium/drivers/r600/r600_pipe.c<br>
+++ b/src/gallium/drivers/r600/r600_pipe.c<br>
@@ -313,6 +313,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)<br>
                return family >= CHIP_CEDAR ? 1 : 0;<br>
        case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:<br>
                return family >= CHIP_CEDAR ? 4 : 0;<br>
+       case PIPE_CAP_DRAW_INDIRECT:<br>
+               /* needs kernel command checking support to work */<br>
+               if (family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41)<br>
+                       return 1;<br>
+               return 0;<br>
<br>
        /* Unsupported features. */<br>
        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:<br>
@@ -322,7 +327,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)<br>
        case PIPE_CAP_VERTEX_COLOR_CLAMPED:<br>
        case PIPE_CAP_USER_VERTEX_BUFFERS:<br>
        case PIPE_CAP_TEXTURE_GATHER_OFFSETS:<br>
-       case PIPE_CAP_DRAW_INDIRECT:<br>
        case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:<br>
        case PIPE_CAP_SAMPLER_VIEW_TARGET:<br>
                return 0;<br>
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c<br>
index c3f21cb..649bf24 100644<br>
--- a/src/gallium/drivers/r600/r600_state_common.c<br>
+++ b/src/gallium/drivers/r600/r600_state_common.c<br>
@@ -1362,7 +1362,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info<br>
        unsigned i;<br>
        struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;<br>
<br>
-       if (!info.count && (info.indexed || !info.count_from_stream_output)) {<br>
+       if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {<br>
                return;<br>
        }<br>
<br>
@@ -1391,7 +1391,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info<br>
                ib.offset = rctx->index_buffer.offset + info.start * ib.index_size;<br>
<br>
                /* Translate 8-bit indices to 16-bit. */<br>
-               if (ib.index_size == 1) {<br>
+               if (unlikely(ib.index_size == 1)) {<br>
                        struct pipe_resource *out_buffer = NULL;<br>
                        unsigned out_offset;<br>
                        void *ptr;<br>
@@ -1414,7 +1414,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info<br>
                 * and the indices are emitted via PKT3_DRAW_INDEX_IMMD.<br>
                 * Note: Instanced rendering in combination with immediate indices hangs. */<br>
                if (ib.user_buffer && (R600_BIG_ENDIAN || info.instance_count > 1 ||<br>
-                                      info.count*ib.index_size > 20)) {<br>
+                                      info.count*ib.index_size > 20 ||<br>
+                                      info.indirect)) {<br>
                        u_upload_data(rctx->b.uploader, 0, info.count * ib.index_size,<br>
                                      ib.user_buffer, &ib.offset, &ib.buffer);<br>
                        ib.user_buffer = NULL;<br>
@@ -1521,6 +1522,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info<br>
        /* Draw packets. */<br>
        cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, rctx->b.predicate_drawing);<br>
        cs->buf[cs->cdw++] = info.instance_count;<br>
+<br>
+       if (unlikely(info.indirect)) {<br>
+               uint64_t va = r600_resource(info.indirect)->gpu_address;<br>
+               assert(rctx->b.chip_class >= EVERGREEN);<br>
+               cs->buf[cs->cdw++] = PKT3(0x11 /* PKT3_SET_BASE */, 2, rctx->b.predicate_drawing);<br>
+               cs->buf[cs->cdw++] = 1; // 1 means DX11 Draw_Index_Indirect Patch Table Base<br>
+               cs->buf[cs->cdw++] = va;<br>
+               cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;<br>
+<br>
+               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);<br>
+               cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,<br>
+                                                          (struct r600_resource*)info.indirect,<br>
+                                                          RADEON_USAGE_READ, RADEON_PRIO_MIN);<br>
+       }<br>
+<br>
        if (info.indexed) {<br>
                cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, rctx->b.predicate_drawing);<br>
                cs->buf[cs->cdw++] = ib.index_size == 4 ?<br>
@@ -1537,18 +1553,40 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info<br>
                        cs->cdw += size_dw;<br>
                } else {<br>
                        uint64_t va = r600_resource(ib.buffer)->gpu_address + ib.offset;<br>
-                       cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing);<br>
-                       cs->buf[cs->cdw++] = va;<br>
-                       cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;<br>
-                       cs->buf[cs->cdw++] = info.count;<br>
-                       cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;<br>
-                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);<br>
-                       cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,<br>
-                                                                  (struct r600_resource*)ib.buffer,<br>
-                                                                  RADEON_USAGE_READ, RADEON_PRIO_MIN);<br>
+<br>
+                       if (likely(!info.indirect)) {<br>
+                               cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing);<br>
+                               cs->buf[cs->cdw++] = va;<br>
+                               cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;<br>
+                               cs->buf[cs->cdw++] = info.count;<br>
+                               cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;<br>
+                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);<br>
+                               cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,<br>
+                                                                          (struct r600_resource*)ib.buffer,<br>
+                                                                          RADEON_USAGE_READ, RADEON_PRIO_MIN);<br>
+                       }<br>
+                       else {<br>
+                               uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;<br>
+<br>
+                               cs->buf[cs->cdw++] = PKT3(0x26 /* PKT3_INDEX_BASE */, 1, rctx->b.predicate_drawing);<br>
+                               cs->buf[cs->cdw++] = va;<br>
+                               cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;<br>
+<br>
+                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);<br>
+                               cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,<br>
+                                                                          (struct r600_resource*)ib.buffer,<br>
+                                                                          RADEON_USAGE_READ, RADEON_PRIO_MIN);<br>
+<br>
+                               cs->buf[cs->cdw++] = PKT3(0x13 /* PKT3_INDEX_BUFFER_SIZE */, 0, rctx->b.predicate_drawing);<br>
+                               cs->buf[cs->cdw++] = max_size;<br>
+<br>
+                               cs->buf[cs->cdw++] = PKT3(0x25 /* PKT3_DRAW_INDEX_INDIRECT */, 1, rctx->b.predicate_drawing);<br>
+                               cs->buf[cs->cdw++] = info.indirect_offset;<br>
+                               cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;<br>
+                       }<br>
                }<br>
        } else {<br>
-               if (info.count_from_stream_output) {<br>
+               if (unlikely(info.count_from_stream_output)) {<br>
                        struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;<br>
                        uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;<br>
<br>
@@ -1567,10 +1605,18 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info<br>
                                                                   RADEON_PRIO_MIN);<br>
                }<br>
<br>
-               cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing);<br>
-               cs->buf[cs->cdw++] = info.count;<br>
-               cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |<br>
-                                       (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);<br>
+               if (likely(!info.indirect)) {<br>
+                       cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing);<br>
+                       cs->buf[cs->cdw++] = info.count;<br>
+                       cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |<br>
+                                               (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);<br>
+               }<br>
+               else {<br>
+                       cs->buf[cs->cdw++] = PKT3(0x24 /* PKT3_DRAW_INDIRECT */, 1, rctx->b.predicate_drawing);<br>
+                       cs->buf[cs->cdw++] = info.indirect_offset;<br>
+                       cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |<br>
+                                               (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);<br>
+               }<br>
        }<br>
<br>
        if (rctx->screen->b.trace_bo) {<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.9.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>