[Mesa-dev] [PATCH v2 11/11] nvc0: add support for ARB_query_buffer_object

Ilia Mirkin imirkin at alum.mit.edu
Sun Jan 31 12:32:04 PST 2016


Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 docs/GL3.txt                                       |   2 +-
 docs/relnotes/11.2.0.html                          |   1 +
 src/gallium/drivers/nouveau/nvc0/mme/com9097.mme   |  50 ++++++++++
 src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h |  34 +++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_macros.h     |   2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c      |  19 ++++
 src/gallium/drivers/nouveau/nvc0/nvc0_query.h      |   7 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   | 106 +++++++++++++++++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |   3 +-
 9 files changed, 222 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 7623ada..257fc73 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -186,7 +186,7 @@ GL 4.4, GLSL 4.40:
   - specified transform/feedback layout                in progress
   - input/output block locations                       DONE
   GL_ARB_multi_bind                                    DONE (all drivers)
-  GL_ARB_query_buffer_object                           not started
+  GL_ARB_query_buffer_object                           DONE (nvc0)
   GL_ARB_texture_mirror_clamp_to_edge                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_texture_stencil8                              DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_vertex_type_10f_11f_11f_rev                   DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
index 404e293..c35ee9a 100644
--- a/docs/relnotes/11.2.0.html
+++ b/docs/relnotes/11.2.0.html
@@ -48,6 +48,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_compute_shader on i965</li>
 <li>GL_ARB_copy_image on r600</li>
 <li>GL_ARB_indirect_parameters on nvc0</li>
+<li>GL_ARB_query_buffer_object on nvc0</li>
 <li>GL_ARB_shader_atomic_counters on nvc0</li>
 <li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
 <li>GL_ARB_shader_storage_buffer_object on nvc0</li>
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 4daa57d..c3dba96 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -491,3 +491,53 @@ daic_runout:
 daic_runout_check:
    branz annul $r7 #daic_runout
    bra annul #daic_restore
+
+/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
+ *
+ * This is a combination macro for all of our query buffer object needs.
+ * It has the option to clamp results to a configurable amount, as well as
+ * to write out one or two words.
+ *
+ * We use the query engine to write out the values, and expect the query
+ * address to point to the right place.
+ *
+ * arg = clamp value (0 means unclamped). clamped means just 1 written value.
+ * parm[0] = LSB of end value
+ * parm[1] = MSB of end value
+ * parm[2] = LSB of start value
+ * parm[3] = MSB of start value
+ * parm[4] = desired sequence
+ * parm[5] = actual sequence
+ */
+.section #mme9097_query_buffer_write
+   maddrsend 0x44 /* SERIALIZE */
+   parm $r2
+   parm $r3
+   parm $r4
+   parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */
+   parm $r6
+   parm $r7
+   mov $r6 (sub $r7 $r6) /* actual - desired */
+   mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
+   braz annul $r6 #qbw_ready
+   exit
+qbw_ready:
+   mov $r2 (sub $r2 $r4)
+   braz $r1 #qbw_postclamp
+   mov $r3 (sbb $r3 $r5)
+   branz annul $r3 #qbw_clamp
+   mov $r4 (sub $r1 $r2)
+   mov $r4 (sbb 0x0 0x0)
+   braz annul $r4 #qbw_postclamp
+qbw_clamp:
+   mov $r2 $r1
+qbw_postclamp:
+   send $r2
+   mov $r4 0x1000
+   branz annul $r1 #qbw_done
+   send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+   maddr 0x16c2 /* QUERY_SEQUENCE */
+   send $r3
+qbw_done:
+   exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+   maddrsend 0x44 /* SERIALIZE */
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index bf8625e..ac65d4b 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -332,3 +332,37 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
 	0xfffef837,
 	0xfffdc027,
 };
+
+uint32_t mme9097_query_buffer_write[] = {
+	0x00110071,
+	0x00000201,
+/* 0x000b: qbw_ready */
+	0x00000301,
+	0x00000401,
+/* 0x0012: qbw_clamp */
+/* 0x0013: qbw_postclamp */
+	0x05b08551,
+	0x00000601,
+/* 0x0019: qbw_done */
+	0x00000701,
+	0x0005be10,
+	0x00060610,
+	0x0000b027,
+	0x00000091,
+	0x00051210,
+	0x0001c807,
+	0x00075b10,
+	0x00011837,
+	0x00048c10,
+	0x00060410,
+	0x0000a027,
+	0x00000a11,
+	0x00001041,
+	0x04000411,
+	0x00010837,
+	0x84010042,
+	0x05b08021,
+	0x00001841,
+	0x840100c2,
+	0x00110071,
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index 27c026b..49e176c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -33,4 +33,6 @@
 
 #define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT		0x00003850
 
+#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE			0x00003858
+
 #endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 7497317..9b07841 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -74,6 +74,24 @@ nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
 }
 
 static void
+nvc0_get_query_result_resource(struct pipe_context *pipe,
+                               struct pipe_query *pq,
+                               boolean wait,
+                               enum pipe_query_value_type result_type,
+                               int index,
+                               struct pipe_resource *resource,
+                               unsigned offset)
+{
+   struct nvc0_query *q = nvc0_query(pq);
+   if (!q->funcs->get_query_result_resource) {
+      assert(!"Unexpected lack of get_query_result_resource");
+      return;
+   }
+   q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type,
+                                       index, resource, offset);
+}
+
+static void
 nvc0_render_condition(struct pipe_context *pipe,
                       struct pipe_query *pq,
                       boolean condition, uint mode)
@@ -262,6 +280,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
    pipe->begin_query = nvc0_begin_query;
    pipe->end_query = nvc0_end_query;
    pipe->get_query_result = nvc0_get_query_result;
+   pipe->get_query_result_resource = nvc0_get_query_result_resource;
    pipe->render_condition = nvc0_render_condition;
    nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
index c46361c..a887b22 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
@@ -14,6 +14,13 @@ struct nvc0_query_funcs {
    void (*end_query)(struct nvc0_context *, struct nvc0_query *);
    boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *,
                                boolean, union pipe_query_result *);
+   void (*get_query_result_resource)(struct nvc0_context *nvc0,
+                                     struct nvc0_query *q,
+                                     boolean wait,
+                                     enum pipe_query_value_type result_type,
+                                     int index,
+                                     struct pipe_resource *resource,
+                                     unsigned offset);
 };
 
 struct nvc0_query {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 1bed016..34b4b46 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -358,11 +358,117 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
    return true;
 }
 
+static void
+nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
+                                  struct nvc0_query *q,
+                                  boolean wait,
+                                  enum pipe_query_value_type result_type,
+                                  int index,
+                                  struct pipe_resource *resource,
+                                  unsigned offset)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_hw_query *hq = nvc0_hw_query(q);
+   struct nv04_resource *buf = nv04_resource(resource);
+   unsigned stride;
+
+   assert(!hq->funcs || !hq->funcs->get_query_result);
+
+   if (index == -1) {
+      if (hq->state != NVC0_HW_QUERY_STATE_READY)
+         nvc0_hw_query_update(nvc0->screen->base.client, q);
+      uint64_t ready = hq->state == NVC0_HW_QUERY_STATE_READY;
+      nvc0->base.push_cb(&nvc0->base, buf, offset,
+                         result_type >= QUERY_I64 ? 2 : 1, (uint32_t *)&ready);
+      return;
+   }
+
+   /* We either need to compute a 32- or 64-bit difference between 2 values,
+    * and then store the result as either a 32- or 64-bit value. As such let's
+    * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
+    * ones), and have one macro that clamps result to i32, u32, or just
+    * outputs the difference (no need to worry about 64-bit clamping).
+    */
+   if (wait) {
+      nvc0_hw_query_fifo_wait(push, q);
+   }
+   nouveau_pushbuf_space(push, 16, 2, 0);
+   PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+   PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
+   IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
+   BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
+   PUSH_DATAh(push, buf->address + offset);
+   PUSH_DATA (push, buf->address + offset);
+   BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7);
+   if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */
+      PUSH_DATA(push, 0x00000001);
+   else if (result_type == QUERY_I32)
+      PUSH_DATA(push, 0x7fffffff);
+   else if (result_type == QUERY_U32)
+      PUSH_DATA(push, 0xffffffff);
+   else
+      PUSH_DATA(push, 0x00000000);
+
+   switch (q->type) {
+   case PIPE_QUERY_SO_STATISTICS:
+      stride = 2;
+      break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      stride = 12;
+      break;
+   default:
+      assert(index == 0);
+      stride = 1;
+      break;
+   }
+
+   if (hq->is64bit) {
+      nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index,
+                           8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride),
+                           8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+   } else {
+      nouveau_pushbuf_data(push, hq->bo, hq->offset + 4,
+                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      PUSH_DATA(push, 0);
+      nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4,
+                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      PUSH_DATA(push, 0);
+   }
+
+   if (!hq->is64bit) {
+      PUSH_DATA(push, hq->sequence);
+      if (hq->state != NVC0_HW_QUERY_STATE_READY) {
+         nouveau_pushbuf_data(push, hq->bo, hq->offset,
+                              4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      } else {
+         PUSH_DATA(push, hq->sequence);
+      }
+   } else {
+      if (nouveau_fence_signalled(hq->fence)) {
+         PUSH_DATA(push, 0);
+         PUSH_DATA(push, 0);
+      } else {
+         PUSH_DATA(push, hq->fence->sequence);
+         nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
+                              4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      }
+   }
+
+   if (buf->mm) {
+      nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+      nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+   }
+
+   nvc0->cb_dirty = true;
+}
+
 static const struct nvc0_query_funcs hw_query_funcs = {
    .destroy_query = nvc0_hw_destroy_query,
    .begin_query = nvc0_hw_begin_query,
    .end_query = nvc0_hw_end_query,
    .get_query_result = nvc0_hw_get_query_result,
+   .get_query_result_resource = nvc0_hw_get_query_result_resource,
 };
 
 struct nvc0_query *
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 2911a77..4988803 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -191,6 +191,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_MULTI_DRAW_INDIRECT:
    case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
    case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+   case PIPE_CAP_QUERY_BUFFER_OBJECT:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
       return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -217,7 +218,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_INVALIDATE_BUFFER:
    case PIPE_CAP_GENERATE_MIPMAP:
    case PIPE_CAP_STRING_MARKER:
-   case PIPE_CAP_QUERY_BUFFER_OBJECT:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -1047,6 +1047,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
    MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
    MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
+   MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
 
    BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
    PUSH_DATA (push, 1);
-- 
2.4.10



More information about the mesa-dev mailing list