Mesa (master): zink: implement transform feedback support to finish off opengl 3.0

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jun 17 22:01:36 UTC 2020


Module: Mesa
Branch: master
Commit: 37778fcd9a352430af0cd3b28a8776479a7c8380
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=37778fcd9a352430af0cd3b28a8776479a7c8380

Author: Mike Blumenkrantz <michael.blumenkrantz at gmail.com>
Date:   Mon Jun  1 14:59:15 2020 -0400

zink: implement transform feedback support to finish off opengl 3.0

this adds:
* context hooks for gallium stream output methods
* handling for xfb-related queries
* barrier management for pausing and resuming xfb

loosely based on patches originally written by Dave Airlie <airlied at redhat.com>

Reviewed-by: Erik Faye-Lund <erik.faye-lund at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5163>

---

 src/gallium/drivers/zink/zink_blit.c     |   1 +
 src/gallium/drivers/zink/zink_compiler.c |   1 +
 src/gallium/drivers/zink/zink_compiler.h |   2 +
 src/gallium/drivers/zink/zink_context.c  |  82 +++++++++++++++++
 src/gallium/drivers/zink/zink_context.h  |  19 ++++
 src/gallium/drivers/zink/zink_draw.c     | 151 ++++++++++++++++++++++++++++++-
 src/gallium/drivers/zink/zink_query.c    |  89 ++++++++++++++----
 src/gallium/drivers/zink/zink_resource.c |   6 ++
 src/gallium/drivers/zink/zink_resource.h |   2 +
 src/gallium/drivers/zink/zink_screen.c   |   9 +-
 src/gallium/drivers/zink/zink_screen.h   |   9 ++
 11 files changed, 349 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/zink/zink_blit.c b/src/gallium/drivers/zink/zink_blit.c
index 2aeb2247833..74e1024b616 100644
--- a/src/gallium/drivers/zink/zink_blit.c
+++ b/src/gallium/drivers/zink/zink_blit.c
@@ -208,6 +208,7 @@ zink_blit(struct pipe_context *pctx,
    util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->ubos[PIPE_SHADER_FRAGMENT]);
    util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->buffers);
    util_blitter_save_sample_mask(ctx->blitter, ctx->gfx_pipeline_state.sample_mask);
+   util_blitter_save_so_targets(ctx->blitter, ctx->num_so_targets, ctx->so_targets);
 
    util_blitter_blit(ctx->blitter, info);
 }
diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index 465056f85cb..8ffdcc4607c 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -21,6 +21,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include "zink_context.h"
 #include "zink_compiler.h"
 #include "zink_program.h"
 #include "zink_screen.h"
diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h
index 73bdebec9dd..abc1fbc44f6 100644
--- a/src/gallium/drivers/zink/zink_compiler.h
+++ b/src/gallium/drivers/zink/zink_compiler.h
@@ -55,6 +55,8 @@ struct zink_shader {
 
    shader_info info;
 
+   struct pipe_stream_output_info stream_output;
+
    struct {
       int index;
       int binding;
diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c
index 05702ada104..83e048216aa 100644
--- a/src/gallium/drivers/zink/zink_context.c
+++ b/src/gallium/drivers/zink/zink_context.c
@@ -366,7 +366,16 @@ zink_set_vertex_buffers(struct pipe_context *pctx,
    if (buffers) {
       for (int i = 0; i < num_buffers; ++i) {
          const struct pipe_vertex_buffer *vb = buffers + i;
+         struct zink_resource *res = zink_resource(vb->buffer.resource);
+
          ctx->gfx_pipeline_state.bindings[start_slot + i].stride = vb->stride;
+         if (res && res->needs_xfb_barrier) {
+            /* if we're binding a previously-used xfb buffer, we need cmd buffer synchronization to ensure
+             * that we use the right buffer data
+             */
+            pctx->flush(pctx, NULL, 0);
+            res->needs_xfb_barrier = false;
+         }
       }
    }
 
@@ -912,6 +921,9 @@ zink_flush(struct pipe_context *pctx,
    struct zink_batch *batch = zink_curr_batch(ctx);
    flush_batch(ctx);
 
+   if (zink_screen(pctx->screen)->have_EXT_transform_feedback && ctx->num_so_targets)
+      ctx->dirty_so_targets = true;
+
    if (pfence)
       zink_fence_reference(zink_screen(pctx->screen),
                            (struct zink_fence **)pfence,
@@ -1014,6 +1026,73 @@ zink_resource_copy_region(struct pipe_context *pctx,
       debug_printf("zink: TODO resource copy\n");
 }
 
+static struct pipe_stream_output_target *
+zink_create_stream_output_target(struct pipe_context *pctx,
+                                 struct pipe_resource *pres,
+                                 unsigned buffer_offset,
+                                 unsigned buffer_size)
+{
+   struct zink_so_target *t;
+   t = CALLOC_STRUCT(zink_so_target);
+   if (!t)
+      return NULL;
+
+   t->base.reference.count = 1;
+   t->base.context = pctx;
+   pipe_resource_reference(&t->base.buffer, pres);
+   t->base.buffer_offset = buffer_offset;
+   t->base.buffer_size = buffer_size;
+
+   /* using PIPE_BIND_CUSTOM here lets us create a custom pipe buffer resource,
+    * which allows us to differentiate and use VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT
+    * as we must for this case
+    */
+   t->counter_buffer = pipe_buffer_create(pctx->screen, PIPE_BIND_STREAM_OUTPUT | PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, 4);
+   if (!t->counter_buffer) {
+      FREE(t);
+      return NULL;
+   }
+
+   return &t->base;
+}
+
+static void
+zink_stream_output_target_destroy(struct pipe_context *pctx,
+                                  struct pipe_stream_output_target *psot)
+{
+   struct zink_so_target *t = (struct zink_so_target *)psot;
+   pipe_resource_reference(&t->counter_buffer, NULL);
+   pipe_resource_reference(&t->base.buffer, NULL);
+   FREE(t);
+}
+
+static void
+zink_set_stream_output_targets(struct pipe_context *pctx,
+                               unsigned num_targets,
+                               struct pipe_stream_output_target **targets,
+                               const unsigned *offsets)
+{
+   struct zink_context *ctx = zink_context(pctx);
+
+   if (num_targets == 0) {
+      for (unsigned i = 0; i < ctx->num_so_targets; i++)
+         pipe_so_target_reference(&ctx->so_targets[i], NULL);
+      ctx->num_so_targets = 0;
+   } else {
+      for (unsigned i = 0; i < num_targets; i++)
+         pipe_so_target_reference(&ctx->so_targets[i], targets[i]);
+      for (unsigned i = num_targets; i < ctx->num_so_targets; i++)
+         pipe_so_target_reference(&ctx->so_targets[i], NULL);
+      ctx->num_so_targets = num_targets;
+
+      /* emit memory barrier on next draw for synchronization */
+      if (offsets[0] == (unsigned)-1)
+         ctx->xfb_barrier = true;
+      /* TODO: possibly avoid rebinding on resume if resuming from same buffers? */
+      ctx->dirty_so_targets = true;
+   }
+}
+
 struct pipe_context *
 zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
@@ -1063,7 +1142,10 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 
    ctx->base.resource_copy_region = zink_resource_copy_region;
    ctx->base.blit = zink_blit;
+   ctx->base.create_stream_output_target = zink_create_stream_output_target;
+   ctx->base.stream_output_target_destroy = zink_stream_output_target_destroy;
 
+   ctx->base.set_stream_output_targets = zink_set_stream_output_targets;
    ctx->base.flush_resource = zink_flush_resource;
    zink_context_surface_init(&ctx->base);
    zink_context_resource_init(&ctx->base);
diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h
index 76fa780b9f7..6affea61523 100644
--- a/src/gallium/drivers/zink/zink_context.h
+++ b/src/gallium/drivers/zink/zink_context.h
@@ -57,6 +57,20 @@ zink_sampler_view(struct pipe_sampler_view *pview)
    return (struct zink_sampler_view *)pview;
 }
 
+struct zink_so_target {
+   struct pipe_stream_output_target base;
+   struct pipe_resource *counter_buffer;
+   VkDeviceSize counter_buffer_offset;
+   uint32_t stride;
+   bool counter_buffer_valid;
+};
+
+static inline struct zink_so_target *
+zink_so_target(struct pipe_stream_output_target *so_target)
+{
+   return (struct zink_so_target *)so_target;
+}
+
 struct zink_context {
    struct pipe_context base;
    struct slab_child_pool transfer_pool;
@@ -111,6 +125,11 @@ struct zink_context {
    bool queries_disabled;
 
    struct pipe_resource *dummy_buffer;
+
+   uint32_t num_so_targets;
+   struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_OUTPUTS];
+   bool dirty_so_targets;
+   bool xfb_barrier;
 };
 
 static inline struct zink_context *
diff --git a/src/gallium/drivers/zink/zink_draw.c b/src/gallium/drivers/zink/zink_draw.c
index 553579acf64..1699a7b601b 100644
--- a/src/gallium/drivers/zink/zink_draw.c
+++ b/src/gallium/drivers/zink/zink_draw.c
@@ -36,6 +36,100 @@ allocate_descriptor_set(struct zink_screen *screen,
    return desc_set;
 }
 
+static void
+zink_emit_xfb_counter_barrier(struct zink_context *ctx)
+{
+   /* Between the pause and resume there needs to be a memory barrier for the counter buffers
+    * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
+    * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
+    * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
+    * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
+    *
+    * - from VK_EXT_transform_feedback spec
+    */
+   VkBufferMemoryBarrier barriers[PIPE_MAX_SO_OUTPUTS] = {};
+   unsigned barrier_count = 0;
+
+   for (unsigned i = 0; i < ctx->num_so_targets; i++) {
+      struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
+      if (t->counter_buffer_valid) {
+          barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+          barriers[i].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
+          barriers[i].dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
+          barriers[i].buffer = zink_resource(t->counter_buffer)->buffer;
+          barriers[i].size = VK_WHOLE_SIZE;
+          barrier_count++;
+      }
+   }
+   struct zink_batch *batch = zink_batch_no_rp(ctx);
+   vkCmdPipelineBarrier(batch->cmdbuf,
+      VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
+      VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
+      0,
+      0, NULL,
+      barrier_count, barriers,
+      0, NULL
+   );
+   ctx->xfb_barrier = false;
+}
+
+static void
+zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res)
+{
+   /* A pipeline barrier is required between using the buffers as
+    * transform feedback buffers and vertex buffers to
+    * ensure all writes to the transform feedback buffers are visible
+    * when the data is read as vertex attributes.
+    * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
+    * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
+    * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
+    * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively.
+    *
+    * - 20.3.1. Drawing Transform Feedback
+    */
+   VkBufferMemoryBarrier barriers[1] = {};
+   barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+   barriers[0].srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
+   barriers[0].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
+   barriers[0].buffer = res->buffer;
+   barriers[0].size = VK_WHOLE_SIZE;
+   struct zink_batch *batch = zink_batch_no_rp(ctx);
+   zink_batch_reference_resoure(batch, res);
+   vkCmdPipelineBarrier(batch->cmdbuf,
+      VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
+      VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+      0,
+      0, NULL,
+      ARRAY_SIZE(barriers), barriers,
+      0, NULL
+   );
+   res->needs_xfb_barrier = false;
+}
+
+static void
+zink_emit_stream_output_targets(struct pipe_context *pctx)
+{
+   struct zink_context *ctx = zink_context(pctx);
+   struct zink_screen *screen = zink_screen(pctx->screen);
+   struct zink_batch *batch = zink_curr_batch(ctx);
+   VkBuffer buffers[PIPE_MAX_SO_OUTPUTS];
+   VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS];
+   VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS];
+
+   for (unsigned i = 0; i < ctx->num_so_targets; i++) {
+      struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
+      buffers[i] = zink_resource(t->base.buffer)->buffer;
+      zink_batch_reference_resoure(batch, zink_resource(t->base.buffer));
+      buffer_offsets[i] = t->base.buffer_offset;
+      buffer_sizes[i] = t->base.buffer_size;
+   }
+
+   screen->vk_CmdBindTransformFeedbackBuffersEXT(batch->cmdbuf, 0, ctx->num_so_targets,
+                                                 buffers, buffer_offsets,
+                                                 buffer_sizes);
+   ctx->dirty_so_targets = false;
+}
+
 static void
 zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
 {
@@ -110,6 +204,9 @@ zink_draw_vbo(struct pipe_context *pctx,
    struct zink_context *ctx = zink_context(pctx);
    struct zink_screen *screen = zink_screen(pctx->screen);
    struct zink_rasterizer_state *rast_state = ctx->rast_state;
+   struct zink_so_target *so_target = zink_so_target(dinfo->count_from_stream_output);
+   VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS];
+   VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {};
 
    if (dinfo->mode >= PIPE_PRIM_QUADS ||
        dinfo->mode == PIPE_PRIM_LINE_LOOP ||
@@ -175,6 +272,13 @@ zink_draw_vbo(struct pipe_context *pctx,
       if (!shader)
          continue;
 
+      if (i == MESA_SHADER_VERTEX && ctx->num_so_targets) {
+         for (unsigned i = 0; i < ctx->num_so_targets; i++) {
+            struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
+            t->stride = shader->stream_output.stride[i] * sizeof(uint32_t);
+         }
+      }
+
       for (int j = 0; j < shader->num_bindings; j++) {
          int index = shader->bindings[j].index;
          if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
@@ -227,6 +331,16 @@ zink_draw_vbo(struct pipe_context *pctx,
                                VK_IMAGE_LAYOUT_GENERAL);
    }
 
+   if (ctx->xfb_barrier)
+      zink_emit_xfb_counter_barrier(ctx);
+
+   if (ctx->dirty_so_targets)
+      zink_emit_stream_output_targets(pctx);
+
+   if (so_target && zink_resource(so_target->base.buffer)->needs_xfb_barrier)
+      zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer));
+
+
    batch = zink_batch_rp(ctx);
 
    if (batch->descs_left < gfx_program->num_descriptors) {
@@ -295,6 +409,20 @@ zink_draw_vbo(struct pipe_context *pctx,
                            gfx_program->layout, 0, 1, &desc_set, 0, NULL);
    zink_bind_vertex_buffers(batch, ctx);
 
+   if (ctx->num_so_targets) {
+      for (unsigned i = 0; i < ctx->num_so_targets; i++) {
+         struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
+         struct zink_resource *res = zink_resource(t->counter_buffer);
+         if (t->counter_buffer_valid) {
+            zink_batch_reference_resoure(batch, zink_resource(t->counter_buffer));
+            counter_buffers[i] = res->buffer;
+            counter_buffer_offsets[i] = t->counter_buffer_offset;
+         } else
+            counter_buffers[i] = NULL;
+      }
+      screen->vk_CmdBeginTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
+   }
+
    if (dinfo->index_size > 0) {
       assert(dinfo->index_size != 1);
       VkIndexType index_type = dinfo->index_size == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
@@ -304,9 +432,28 @@ zink_draw_vbo(struct pipe_context *pctx,
       vkCmdDrawIndexed(batch->cmdbuf,
          dinfo->count, dinfo->instance_count,
          dinfo->start, dinfo->index_bias, dinfo->start_instance);
-   } else
-      vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance);
+   } else {
+      if (so_target && screen->tf_props.transformFeedbackDraw) {
+         zink_batch_reference_resoure(batch, zink_resource(so_target->counter_buffer));
+         screen->vk_CmdDrawIndirectByteCountEXT(batch->cmdbuf, dinfo->instance_count, dinfo->start_instance,
+                                       zink_resource(so_target->counter_buffer)->buffer, so_target->counter_buffer_offset, 0,
+                                       MIN2(so_target->stride, screen->tf_props.maxTransformFeedbackBufferDataStride));
+      }
+      else
+         vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance);
+   }
 
    if (dinfo->index_size > 0 && dinfo->has_user_indices)
       pipe_resource_reference(&index_buffer, NULL);
+
+   if (ctx->num_so_targets) {
+      for (unsigned i = 0; i < ctx->num_so_targets; i++) {
+         struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
+         counter_buffers[i] = zink_resource(t->counter_buffer)->buffer;
+         counter_buffer_offsets[i] = t->counter_buffer_offset;
+         t->counter_buffer_valid = true;
+         zink_resource(ctx->so_targets[i]->buffer)->needs_xfb_barrier = true;
+      }
+      screen->vk_CmdEndTransformFeedbackEXT(batch->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
+   }
 }
diff --git a/src/gallium/drivers/zink/zink_query.c b/src/gallium/drivers/zink/zink_query.c
index 1d0050306d1..10cb9b0750f 100644
--- a/src/gallium/drivers/zink/zink_query.c
+++ b/src/gallium/drivers/zink/zink_query.c
@@ -15,6 +15,7 @@ struct zink_query {
    unsigned curr_query, num_queries;
 
    VkQueryType vkqtype;
+   unsigned index;
    bool use_64bit;
    bool precise;
 
@@ -37,7 +38,11 @@ convert_query_type(unsigned query_type, bool *use_64bit, bool *precise)
       *use_64bit = true;
       return VK_QUERY_TYPE_TIMESTAMP;
    case PIPE_QUERY_PIPELINE_STATISTICS:
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
       return VK_QUERY_TYPE_PIPELINE_STATISTICS;
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      *use_64bit = true;
+      return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
    default:
       debug_printf("unknown query: %s\n",
                    util_str_query_type(query_type, true));
@@ -56,6 +61,7 @@ zink_create_query(struct pipe_context *pctx,
    if (!query)
       return NULL;
 
+   query->index = index;
    query->type = query_type;
    query->vkqtype = convert_query_type(query_type, &query->use_64bit, &query->precise);
    if (query->vkqtype == -1)
@@ -67,6 +73,8 @@ zink_create_query(struct pipe_context *pctx,
    pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
    pool_create.queryType = query->vkqtype;
    pool_create.queryCount = query->num_queries;
+   if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED)
+     pool_create.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT;
 
    VkResult status = vkCreateQueryPool(screen->dev, &pool_create, NULL, &query->query_pool);
    if (status != VK_SUCCESS) {
@@ -106,13 +114,20 @@ zink_destroy_query(struct pipe_context *pctx,
 }
 
 static void
-begin_query(struct zink_batch *batch, struct zink_query *q)
+begin_query(struct zink_context *ctx, struct zink_query *q)
 {
    VkQueryControlFlags flags = 0;
+   struct zink_batch *batch = zink_curr_batch(ctx);
    if (q->precise)
       flags |= VK_QUERY_CONTROL_PRECISE_BIT;
-
-   vkCmdBeginQuery(batch->cmdbuf, q->query_pool, q->curr_query, flags);
+   if (q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)
+      zink_screen(ctx->base.screen)->vk_CmdBeginQueryIndexedEXT(batch->cmdbuf,
+                                                                q->query_pool,
+                                                                q->curr_query,
+                                                                flags,
+                                                                q->index);
+   else
+      vkCmdBeginQuery(batch->cmdbuf, q->query_pool, q->curr_query, flags);
 }
 
 static bool
@@ -134,17 +149,22 @@ zink_begin_query(struct pipe_context *pctx,
    vkCmdResetQueryPool(batch->cmdbuf, query->query_pool, 0, MIN2(query->curr_query + 1, query->num_queries));
    query->curr_query = 0;
 
-   begin_query(batch, query);
+   begin_query(ctx, query);
    list_addtail(&query->active_list, &ctx->active_queries);
 
    return true;
 }
 
 static void
-end_query(struct zink_batch *batch, struct zink_query *q)
+end_query(struct zink_context *ctx, struct zink_query *q)
 {
+   struct zink_screen *screen = zink_screen(ctx->base.screen);
+   struct zink_batch *batch = zink_curr_batch(ctx);
    assert(q->type != PIPE_QUERY_TIMESTAMP);
-   vkCmdEndQuery(batch->cmdbuf, q->query_pool, q->curr_query);
+   if (q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)
+      screen->vk_CmdEndQueryIndexedEXT(batch->cmdbuf, q->query_pool, q->curr_query, q->index);
+   else
+      vkCmdEndQuery(batch->cmdbuf, q->query_pool, q->curr_query);
    if (++q->curr_query == q->num_queries) {
       assert(0);
       /* need to reset pool! */
@@ -156,15 +176,15 @@ zink_end_query(struct pipe_context *pctx,
                struct pipe_query *q)
 {
    struct zink_context *ctx = zink_context(pctx);
-   struct zink_batch *batch = zink_curr_batch(ctx);
    struct zink_query *query = (struct zink_query *)q;
 
    if (query->type == PIPE_QUERY_TIMESTAMP) {
       assert(query->curr_query == 0);
+      struct zink_batch *batch = zink_curr_batch(ctx);
       vkCmdWriteTimestamp(batch->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
                           query->query_pool, 0);
    } else {
-      end_query(batch, query);
+      end_query(ctx, query);
       list_delinit(&query->active_list);
    }
 
@@ -194,17 +214,36 @@ zink_get_query_result(struct pipe_context *pctx,
    // union pipe_query_result results[100];
    uint64_t results[100];
    memset(results, 0, sizeof(results));
-   assert(query->curr_query <= ARRAY_SIZE(results));
-   if (vkGetQueryPoolResults(screen->dev, query->query_pool,
-                             0, query->curr_query,
-                             sizeof(results),
-                             results,
-                             sizeof(uint64_t),
-                             flags) != VK_SUCCESS)
-      return false;
+   int num_results;
+   if (query->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT) {
+      char tf_result[16] = {};
+      /* this query emits 2 values */
+      assert(query->curr_query <= ARRAY_SIZE(results) / 2);
+      num_results = query->curr_query * 2;
+      VkResult status = vkGetQueryPoolResults(screen->dev, query->query_pool,
+                                              0, query->curr_query,
+                                              sizeof(results),
+                                              results,
+                                              sizeof(uint64_t),
+                                              flags);
+      if (status != VK_SUCCESS)
+         return false;
+      memcpy(result, tf_result + (query->type == PIPE_QUERY_PRIMITIVES_GENERATED ? 8 : 0), 8);
+   } else {
+      assert(query->curr_query <= ARRAY_SIZE(results));
+      num_results = query->curr_query;
+      VkResult status = vkGetQueryPoolResults(screen->dev, query->query_pool,
+                                              0, query->curr_query,
+                                              sizeof(results),
+                                              results,
+                                              sizeof(uint64_t),
+                                              flags);
+      if (status != VK_SUCCESS)
+         return false;
+   }
 
    util_query_clear_result(result, query->type);
-   for (int i = 0; i < query->curr_query; ++i) {
+   for (int i = 0; i < num_results; ++i) {
       switch (query->type) {
       case PIPE_QUERY_OCCLUSION_PREDICATE:
       case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
@@ -217,6 +256,18 @@ zink_get_query_result(struct pipe_context *pctx,
       case PIPE_QUERY_OCCLUSION_COUNTER:
          result->u64 += results[i];
          break;
+      case PIPE_QUERY_PRIMITIVES_GENERATED:
+         result->u32 += results[i];
+         break;
+      case PIPE_QUERY_PRIMITIVES_EMITTED:
+         /* A query pool created with this type will capture 2 integers -
+          * numPrimitivesWritten and numPrimitivesNeeded -
+          * for the specified vertex stream output from the last vertex processing stage.
+          * - from VK_EXT_transform_feedback spec
+          */
+         result->u64 += results[i];
+         i++;
+         break;
 
       default:
          debug_printf("unhangled query type: %s\n",
@@ -233,7 +284,7 @@ zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch)
 {
    struct zink_query *query;
    LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, active_list) {
-      end_query(batch, query);
+      end_query(ctx, query);
    }
 }
 
@@ -243,7 +294,7 @@ zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch)
    struct zink_query *query;
    LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, active_list) {
       vkCmdResetQueryPool(batch->cmdbuf, query->query_pool, query->curr_query, 1);
-      begin_query(batch, query);
+      begin_query(ctx, query);
    }
 }
 
diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c
index a52d8bb6c70..5ae75dba2be 100644
--- a/src/gallium/drivers/zink/zink_resource.c
+++ b/src/gallium/drivers/zink/zink_resource.c
@@ -122,6 +122,12 @@ resource_create(struct pipe_screen *pscreen,
       if (templ->bind & PIPE_BIND_COMMAND_ARGS_BUFFER)
          bci.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
 
+      if (templ->bind == (PIPE_BIND_STREAM_OUTPUT | PIPE_BIND_CUSTOM)) {
+         bci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
+      } else if (templ->bind & PIPE_BIND_STREAM_OUTPUT) {
+         bci.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
+      }
+
       if (vkCreateBuffer(screen->dev, &bci, NULL, &res->buffer) !=
           VK_SUCCESS) {
          FREE(res);
diff --git a/src/gallium/drivers/zink/zink_resource.h b/src/gallium/drivers/zink/zink_resource.h
index 65e5e19dc73..9bca4c53a43 100644
--- a/src/gallium/drivers/zink/zink_resource.h
+++ b/src/gallium/drivers/zink/zink_resource.h
@@ -49,6 +49,8 @@ struct zink_resource {
 
    struct sw_displaytarget *dt;
    unsigned dt_stride;
+
+   bool needs_xfb_barrier;
 };
 
 struct zink_transfer {
diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c
index 0fe77d9e2d0..2510715454f 100644
--- a/src/gallium/drivers/zink/zink_screen.c
+++ b/src/gallium/drivers/zink/zink_screen.c
@@ -725,7 +725,14 @@ load_device_extensions(struct zink_screen *screen)
       if (!screen->vk_##x)                                                  \
          return false;                                                      \
    } while (0)
-
+   if (screen->have_EXT_transform_feedback) {
+      GET_PROC_ADDR(CmdBindTransformFeedbackBuffersEXT);
+      GET_PROC_ADDR(CmdBeginTransformFeedbackEXT);
+      GET_PROC_ADDR(CmdEndTransformFeedbackEXT);
+      GET_PROC_ADDR(CmdBeginQueryIndexedEXT);
+      GET_PROC_ADDR(CmdEndQueryIndexedEXT);
+      GET_PROC_ADDR(CmdDrawIndirectByteCountEXT);
+   }
    if (screen->have_KHR_external_memory_fd)
       GET_PROC_ADDR(GetMemoryFdKHR);
 
diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h
index e50a5f32370..e1138c9249f 100644
--- a/src/gallium/drivers/zink/zink_screen.h
+++ b/src/gallium/drivers/zink/zink_screen.h
@@ -48,10 +48,12 @@ struct zink_screen {
    VkPhysicalDeviceProperties props;
    VkPhysicalDeviceFeatures feats;
    VkPhysicalDeviceMemoryProperties mem_props;
+   VkPhysicalDeviceTransformFeedbackPropertiesEXT tf_props;
 
    bool have_KHR_maintenance1;
    bool have_KHR_external_memory_fd;
    bool have_EXT_conditional_rendering;
+   bool have_EXT_transform_feedback;
 
    bool have_X8_D24_UNORM_PACK32;
    bool have_D24_UNORM_S8_UINT;
@@ -62,6 +64,13 @@ struct zink_screen {
    PFN_vkGetMemoryFdKHR vk_GetMemoryFdKHR;
    PFN_vkCmdBeginConditionalRenderingEXT vk_CmdBeginConditionalRenderingEXT;
    PFN_vkCmdEndConditionalRenderingEXT vk_CmdEndConditionalRenderingEXT;
+
+   PFN_vkCmdBindTransformFeedbackBuffersEXT vk_CmdBindTransformFeedbackBuffersEXT;
+   PFN_vkCmdBeginTransformFeedbackEXT vk_CmdBeginTransformFeedbackEXT;
+   PFN_vkCmdEndTransformFeedbackEXT vk_CmdEndTransformFeedbackEXT;
+   PFN_vkCmdBeginQueryIndexedEXT vk_CmdBeginQueryIndexedEXT;
+   PFN_vkCmdEndQueryIndexedEXT vk_CmdEndQueryIndexedEXT;
+   PFN_vkCmdDrawIndirectByteCountEXT vk_CmdDrawIndirectByteCountEXT;
 };
 
 static inline struct zink_screen *



More information about the mesa-commit mailing list