[Mesa-dev] [PATCH v3 2/3] nvc0: rewrite query buffer write macro to output 64-bit predicates

Rhys Perry pendingchaos02 at gmail.com
Tue May 22 23:15:43 UTC 2018


Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
 src/gallium/drivers/nouveau/nvc0/mme/com9097.mme   | 91 ++++++++++++----------
 src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 64 ++++++++-------
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   | 81 ++++++++++---------
 3 files changed, 133 insertions(+), 103 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 38c2e86843..0e5ad66f56 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -494,62 +494,75 @@ daic_runout_check:
 
 /* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
  *
- * This is a combination macro for all of our query buffer object needs.
- * It has the option to clamp results to a configurable amount, as well as
+ * This macro writes out a query's result into a resource.
+ * It has the options to either clamp the result to a configurable amount and
  * to write out one or two words.
  *
  * We use the query engine to write out the values, and expect the query
  * address to point to the right place.
  *
- * arg = clamp value (0 means unclamped). clamped means just 1 written value.
- * parm[0] = LSB of end value
- * parm[1] = MSB of end value
- * parm[2] = LSB of start value
- * parm[3] = MSB of start value
- * parm[4] = desired sequence
- * parm[5] = actual sequence
- * parm[6] = query high address
+ * Also note that although the result availablility is determined at the start,
+ * the macro only exits if the result is unavailable right before clamping.
+ *
+ * arg = write64 | (clamp<<1)
+ * parm[0] = desired sequence
+ * parm[1] = actual sequence
+ * parm[2] = LSB of end value
+ * parm[3] = MSB of end value
+ * parm[4] = LSB of start value
+ * parm[5] = MSB of start value
+ * parm[6] = clamp value
  * parm[7] = query low address
+ * parm[8] = query high address
  */
 .section #mme9097_query_buffer_write
+/* determine result availability */
+   parm $r2
+   parm $r3
+   mov $r6 (sub $r3 $r2)
+   mov $r6 (sbb 0x0 0x0)
+/* calculate result and write high into $r3 and low into $r2 */
    parm $r2
    parm $r3
    parm $r4
-   parm $r5 maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
-   parm $r6
-   parm $r7
-   mov $r6 (sub $r7 $r6) /* actual - desired */
-   mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
-   parm $r7
-   exit braz $r6 #qbw_ready
-   parm $r6
-qbw_ready:
+   parm $r5
    mov $r2 (sub $r2 $r4)
-   braz $r1 #qbw_postclamp
    mov $r3 (sbb $r3 $r5)
-   branz annul $r3 #qbw_clamp
-   mov $r4 (sub $r1 $r2)
-   mov $r4 (sbb 0x0 0x0)
-   braz annul $r4 #qbw_postclamp
-qbw_clamp:
-   mov $r2 $r1
-qbw_postclamp:
-   send $r7
-   send $r6
+   braz $r6 #qbw_available
+   parm $r4 /* clamp value */
+   exit parm $r7 /* result not available - drain remaining parameters and exit */
+   parm $r7
+qbw_available:
+   mov $r6 (extrinsrt 0x0 $r1 1 1 0)
+   braz annul $r6 #qbw_write
+   branz $r3 #qbw_doclamp /* clamp if the high word is set */
+   mov $r7 (sub $r4 $r2)
+   mov $r7 (sbb 0x0 0x0)
+   braz annul $r7 #qbw_write
+qbw_doclamp:
+   mov $r2 $r4
+   mov $r3 0x0
+qbw_write:
+   parm $r5
+   parm $r4 maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
+   send $r4
+   send $r5
    send $r2
-   branz $r1 #qbw_done
-   mov $r4 0x1000
-   send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+   mov $r6 (extrinsrt 0x0 $r1 0 1 0)
+   braz $r6 #qbw_done
+   mov $r7 0x1000
+   send (extrinsrt 0x0 $r7 0 16 16)
+   /* XXX: things seem to mess up if $r6 is replaced with 0x4 in the add */
+   mov $r6 0x4
+   mov $r5 (add $r5 $r6)
+   mov $r4 (adc $r4 0x0)
    maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
-   mov $r5 0x4
-   mov $r6 (add $r6 $r5)
-   mov $r7 (adc $r7 0x0)
-   send $r7
-   send $r6
+   send $r4
+   send $r5
    send $r3
 qbw_done:
-   exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
-   maddrsend 0x44
+   exit send (extrinsrt 0x0 $r7 0 16 16)
+   maddrsend 0x44 /* SERIALIZE */
 
 /* NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE:
  *
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index 49c0891114..3ebfda47ee 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -336,41 +336,47 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
 uint32_t mme9097_query_buffer_write[] = {
 	0x00000201,
 	0x00000301,
-/* 0x000b: qbw_ready */
-	0x00000401,
-	0x05b00551,
-/* 0x0012: qbw_clamp */
-/* 0x0013: qbw_postclamp */
-	0x00000601,
-	0x00000701,
-	0x0005be10,
+	0x00049e10,
+/* 0x000e: qbw_available */
 	0x00060610,
-/* 0x0020: qbw_done */
-	0x00000701,
-	0x0000b087,
-	0x00000601,
+	0x00000201,
+/* 0x0014: qbw_doclamp */
+/* 0x0016: qbw_write */
+	0x00000301,
+	0x00000401,
+	0x00000501,
 	0x00051210,
-	0x0001c807,
+/* 0x0026: qbw_done */
 	0x00075b10,
-	0x00011837,
-	0x00048c10,
-	0x00060410,
-	0x0000a027,
-	0x00000a11,
-	0x00003841,
-	0x00003041,
+	0x00013007,
+	0x00000401,
+	0x00000781,
+	0x00000701,
+	0x00424612,
+	0x0001f027,
+	0x00011817,
+	0x0004a710,
+	0x00060710,
+	0x0000f827,
+	0x00002211,
+	0x00000311,
+	0x00000501,
+	0x05b00451,
+	0x00002041,
+	0x00002841,
 	0x00001041,
-	0x00028817,
-	0x04000411,
-	0x84010042,
+	0x00404612,
+	0x0002b007,
+	0x04000711,
+	0x8401c042,
+	0x00010611,
+	0x0001ad10,
+	0x00022410,
 	0x05b00021,
-	0x00010511,
-	0x00017610,
-	0x00023f10,
-	0x00003841,
-	0x00003041,
+	0x00002041,
+	0x00002841,
 	0x00001841,
-	0x840100c2,
+	0x8401c0c2,
 	0x00110071,
 };
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index db5f5092ba..835742bbc6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -381,6 +381,8 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
    struct nvc0_hw_query *hq = nvc0_hw_query(q);
    struct nv04_resource *buf = nv04_resource(resource);
    unsigned qoffset = 0, stride;
+   bool predicate = false;
+   uint32_t arg;
 
    assert(!hq->funcs || !hq->funcs->get_query_result);
 
@@ -401,18 +403,27 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
       return;
    }
 
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+      predicate = true;
+      break;
+   }
+
+   arg = result_type >= PIPE_QUERY_TYPE_I64 ? 1 : 0;
+   /* Only clamp if the output is 32-bit or a predicate, we don't bother
+    * clamping 64-bit outputs */
+   if ((result_type<PIPE_QUERY_TYPE_I64 || predicate) && index!=-1)
+      arg |= 1 << 1;
+
    /* If the fence guarding this query has not been emitted, that makes a lot
     * of the following logic more complicated.
     */
    if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
       nouveau_fence_emit(hq->fence);
 
-   /* We either need to compute a 32- or 64-bit difference between 2 values,
-    * and then store the result as either a 32- or 64-bit value. As such let's
-    * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
-    * ones), and have one macro that clamps result to i32, u32, or just
-    * outputs the difference (no need to worry about 64-bit clamping).
-    */
    if (hq->state != NVC0_HW_QUERY_STATE_READY)
       nvc0_hw_query_update(nvc0->screen->base.client, q);
 
@@ -425,22 +436,20 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
    nouveau_pushbuf_space(push, 32, 2, 0);
    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
    PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
-   BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 9);
-   switch (q->type) {
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: /* XXX what if 64-bit? */
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
-      PUSH_DATA(push, 0x00000001);
-      break;
-   default:
-      if (result_type == PIPE_QUERY_TYPE_I32)
-         PUSH_DATA(push, 0x7fffffff);
-      else if (result_type == PIPE_QUERY_TYPE_U32)
-         PUSH_DATA(push, 0xffffffff);
-      else
-         PUSH_DATA(push, 0x00000000);
-      break;
+   BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 10);
+   PUSH_DATA(push, arg);
+
+   if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
+      PUSH_DATA(push, 0);
+      PUSH_DATA(push, 0);
+   } else if (hq->is64bit) {
+      PUSH_DATA(push, hq->fence->sequence);
+      nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
+                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+   } else {
+      PUSH_DATA(push, hq->sequence);
+      nouveau_pushbuf_data(push, hq->bo, hq->offset,
+                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
    }
 
    switch (q->type) {
@@ -460,6 +469,11 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
       break;
    }
 
+   /* We need to compute the difference between 2 values, and then store the
+    * result as either a 32- or 64-bit value. As such let's treat all inputs
+    * as 64-bit (and just push an extra 0 for the 32-bit ones), and clamp
+    * the result to an limit if it's 32 bit or a predicate.
+    */
    if (hq->is64bit || qoffset) {
       nouveau_pushbuf_data(push, hq->bo, hq->offset + qoffset + 16 * index,
                            8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
@@ -480,20 +494,17 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
       PUSH_DATA(push, 0);
    }
 
-   if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
-      PUSH_DATA(push, 0);
-      PUSH_DATA(push, 0);
-   } else if (hq->is64bit) {
-      PUSH_DATA(push, hq->fence->sequence);
-      nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
-                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
-   } else {
-      PUSH_DATA(push, hq->sequence);
-      nouveau_pushbuf_data(push, hq->bo, hq->offset,
-                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
-   }
-   PUSH_DATAh(push, buf->address + offset);
+   if (predicate)
+      PUSH_DATA(push, 0x00000001);
+   else if (result_type == PIPE_QUERY_TYPE_I32)
+      PUSH_DATA(push, 0x7fffffff);
+   else if (result_type == PIPE_QUERY_TYPE_U32)
+      PUSH_DATA(push, 0xffffffff);
+   else
+      PUSH_DATA(push, 0x00000000);
+
    PUSH_DATA (push, buf->address + offset);
+   PUSH_DATAh(push, buf->address + offset);
 
    util_range_add(&buf->valid_buffer_range, offset,
                   offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4));
-- 
2.14.3



More information about the mesa-dev mailing list