[Mesa-dev] [PATCH] llvmpipe: handle more queries

sroland at vmware.com sroland at vmware.com
Tue Jun 18 18:58:09 PDT 2013


From: Roland Scheidegger <sroland at vmware.com>

Handle PIPE_QUERY_GPU_FINISHED and PIPE_QUERY_TIMESTAMP_DISJOINT, and
also fill out the ps_invocations and c_primitives from the
PIPE_QUERY_PIPELINE_STATISTICS (the others in there should already
be handled). Note that ps_invocations isn't pixel exact, just 16 pixel
exact but I guess it's better than nothing.
Doesn't really seem to work correctly but there's probably bugs elsewhere.
Also use a 64bit counter for occlusion queries.
---
 src/gallium/drivers/llvmpipe/lp_bld_depth.c   |   11 ++++++++---
 src/gallium/drivers/llvmpipe/lp_jit.c         |    2 +-
 src/gallium/drivers/llvmpipe/lp_jit.h         |    2 +-
 src/gallium/drivers/llvmpipe/lp_query.c       |   23 ++++++++++++++++++++---
 src/gallium/drivers/llvmpipe/lp_rast.c        |   19 ++++++++++++++++---
 src/gallium/drivers/llvmpipe/lp_rast_priv.h   |    6 +++++-
 src/gallium/drivers/llvmpipe/lp_setup.c       |    4 ++--
 src/gallium/drivers/llvmpipe/lp_setup_line.c  |   13 ++++++++-----
 src/gallium/drivers/llvmpipe/lp_setup_point.c |   10 +++++++++-
 src/gallium/drivers/llvmpipe/lp_setup_tri.c   |    8 ++++++++
 10 files changed, 78 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index edb59cc..79891cf 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -429,7 +429,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
  * Test the depth mask. Add the number of channel which has none zero mask
  * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
  * The counter will add 4.
- * TODO: could get that out of the loop, and need to use 64bit counter.
+ * TODO: could get that out of the fs loop.
  *
  * \param type holds element type of the mask vector.
  * \param maskvalue is the depth test mask.
@@ -458,6 +458,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
                                       LLVMInt32TypeInContext(context), bits);
       count = lp_build_intrinsic_unary(builder, popcntintr,
                                        LLVMInt32TypeInContext(context), bits);
+      count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
    }
    else if(util_cpu_caps.has_avx && type.length == 8) {
       const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
@@ -468,6 +469,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
                                       LLVMInt32TypeInContext(context), bits);
       count = lp_build_intrinsic_unary(builder, popcntintr,
                                        LLVMInt32TypeInContext(context), bits);
+      count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
    }
    else {
       unsigned i;
@@ -510,8 +512,11 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
        }
        count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
 
-       if (type.length > 4) {
-          count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
+       if (type.length > 8) {
+          count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), "");
+       }
+       else if (type.length < 8) {
+          count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
        }
    }
    newcount = LLVMBuildLoad(builder, counter, "origcount");
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index f517b67..fa0f128 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -195,7 +195,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
       LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT];
       LLVMTypeRef thread_data_type;
 
-      elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt32TypeInContext(lc);
+      elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
 
       thread_data_type = LLVMStructTypeInContext(lc, elem_types,
                                                  Elements(elem_types), 0);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 2ecfde7..30cfaae 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -164,7 +164,7 @@ enum {
 
 struct lp_jit_thread_data
 {
-   uint32_t vis_counter;
+   uint64_t vis_counter;
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index 922913d..7fbf5f7 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -40,6 +40,7 @@
 #include "lp_query.h"
 #include "lp_screen.h"
 #include "lp_state.h"
+#include "lp_rast.h"
 
 
 static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p )
@@ -128,7 +129,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
    case PIPE_QUERY_OCCLUSION_PREDICATE:
       for (i = 0; i < num_threads; i++) {
          /* safer (still not guaranteed) when there's an overflow */
-         *result = *result || pq->count[i];
+         vresult->b = vresult->b || pq->count[i];
       }
       break;
    case PIPE_QUERY_TIMESTAMP:
@@ -140,6 +141,17 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
             *result = os_time_get_nano();
       }
       break;
+   case PIPE_QUERY_TIMESTAMP_DISJOINT: {
+      struct pipe_query_data_timestamp_disjoint *td =
+         (struct pipe_query_data_timestamp_disjoint *)vresult;
+      /* os_get_time_nano return nanoseconds, but we sum all threads */
+      td->frequency = UINT64_C(1000000000) * num_threads;
+      td->disjoint = FALSE;
+   }
+      break;
+   case PIPE_QUERY_GPU_FINISHED:
+      vresult->b = TRUE;
+      break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
       *result = pq->num_primitives_generated;
       break;
@@ -147,7 +159,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
       *result = pq->num_primitives_written;
       break;
    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-      *result = pq->so_has_overflown;
+      vresult->b = pq->so_has_overflown;
       break;
    case PIPE_QUERY_SO_STATISTICS: {
       struct pipe_query_data_so_statistics *stats =
@@ -159,8 +171,13 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
    case PIPE_QUERY_PIPELINE_STATISTICS: {
       struct pipe_query_data_pipeline_statistics *stats =
          (struct pipe_query_data_pipeline_statistics *)vresult;
+      /* only ps_invocations come from binned query */
+      for (i = 0; i < num_threads; i++) {
+         pq->stats.ps_invocations += pq->count[i];
+      }
+      pq->stats.ps_invocations *= LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE;
       *stats = pq->stats;
-   }
+    }
       break;
    default:
       assert(0);
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index d802d53..62a82e3 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -455,6 +455,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
     * allocated 4x4 blocks hence need to filter them out here.
     */
    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+      if (task->query[PIPE_QUERY_PIPELINE_STATISTICS]) {
+         /* not very accurate would need a popcount on the mask */
+         task->ps_invocations++;
+      }
       /* run shader on 4x4 block */
       BEGIN_JIT_CALL(state, task);
       variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
@@ -493,11 +497,14 @@ lp_rast_begin_query(struct lp_rasterizer_task *task,
    case PIPE_QUERY_OCCLUSION_PREDICATE:
       task->thread_data.vis_counter = 0;
       break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      task->ps_invocations = 0;
+      break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
    case PIPE_QUERY_PRIMITIVES_EMITTED:
    case PIPE_QUERY_SO_STATISTICS:
-   case PIPE_QUERY_PIPELINE_STATISTICS:
    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
       break;
    default:
       assert(0);
@@ -518,7 +525,9 @@ lp_rast_end_query(struct lp_rasterizer_task *task,
                   const union lp_rast_cmd_arg arg)
 {
    struct llvmpipe_query *pq = arg.query_obj;
-   assert(task->query[pq->type] == pq || pq->type == PIPE_QUERY_TIMESTAMP);
+   assert(task->query[pq->type] == pq ||
+          pq->type == PIPE_QUERY_TIMESTAMP ||
+          pq->type == PIPE_QUERY_GPU_FINISHED);
 
    switch (pq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -528,11 +537,15 @@ lp_rast_end_query(struct lp_rasterizer_task *task,
    case PIPE_QUERY_TIMESTAMP:
       pq->count[task->thread_index] = os_time_get_nano();
       break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      pq->count[task->thread_index] += task->ps_invocations;
+      break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
    case PIPE_QUERY_PRIMITIVES_EMITTED:
    case PIPE_QUERY_SO_STATISTICS:
-   case PIPE_QUERY_PIPELINE_STATISTICS:
    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
       break;
    default:
       assert(0);
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 6f03023..3048940 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -99,8 +99,8 @@ struct lp_rasterizer_task
 
    /* occlude counter for visible pixels */
    struct lp_jit_thread_data thread_data;
-   uint64_t query_start;
    struct llvmpipe_query *query[PIPE_QUERY_TYPES];
+   uint64_t ps_invocations;
 
    pipe_semaphore work_ready;
    pipe_semaphore work_done;
@@ -307,6 +307,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
     * allocated 4x4 blocks hence need to filter them out here.
     */
    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+      if (task->query[PIPE_QUERY_PIPELINE_STATISTICS]) {
+         /* not very accurate would need a popcount on the mask */
+         task->ps_invocations++;
+      }
       /* run shader on 4x4 block */
       BEGIN_JIT_CALL(state, task);
       variant->jit_function[RAST_WHOLE]( &state->jit_context,
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index e2fb257..6b64446 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -1215,7 +1215,7 @@ lp_setup_begin_query(struct lp_setup_context *setup,
    assert(setup->active_query[pq->type] == NULL);
 
    set_scene_state(setup, SETUP_ACTIVE, "begin_query");
-   
+
    setup->active_query[pq->type] = pq;
 
    /* XXX: It is possible that a query is created before the scene
@@ -1249,7 +1249,7 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
 {
    set_scene_state(setup, SETUP_ACTIVE, "end_query");
 
-   if (pq->type != PIPE_QUERY_TIMESTAMP) {
+   if (pq->type != PIPE_QUERY_TIMESTAMP && pq->type != PIPE_QUERY_GPU_FINISHED) {
       assert(setup->active_query[pq->type] == pq);
       setup->active_query[pq->type] = NULL;
    }
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index a9809a0..4b59bf3 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -36,6 +36,7 @@
 #include "lp_rast.h"
 #include "lp_state_fs.h"
 #include "lp_state_setup.h"
+#include "lp_context.h"
 
 #define NUM_CHANNELS 4
 
@@ -541,11 +542,6 @@ try_setup_line( struct lp_setup_context *setup,
       y[3] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset);
    }
 
-
-
-   LP_COUNT(nr_tris);
-
- 
    /* Bounding rectangle (in pixels) */
    {
       /* Yes this is necessary to accurately calculate bounding boxes
@@ -598,6 +594,13 @@ try_setup_line( struct lp_setup_context *setup,
    line->v[1][1] = v2[0][1];
 #endif
 
+   LP_COUNT(nr_tris);
+
+   if (setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS]) {
+      struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+      lp_context->pipeline_statistics.c_primitives++;
+   }
+
    /* calculate the deltas */
    plane = GET_PLANES(line);
    plane[0].dcdy = x[0] - x[1];
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 789caa8..7fe7bc5 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -29,13 +29,14 @@
  * Binning code for points
  */
 
-#include "lp_setup_context.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "lp_setup_context.h"
 #include "lp_perf.h"
 #include "lp_rast.h"
 #include "lp_state_fs.h"
 #include "lp_state_setup.h"
+#include "lp_context.h"
 #include "tgsi/tgsi_scan.h"
 
 #define NUM_CHANNELS 4
@@ -376,6 +377,13 @@ try_setup_point( struct lp_setup_context *setup,
    point->v[0][1] = v0[0][1];
 #endif
 
+   LP_COUNT(nr_tris);
+
+   if (setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS]) {
+      struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+      lp_context->pipeline_statistics.c_primitives++;
+   }
+
    info.v0 = v0;
    info.dx01 = 0;
    info.dx12 = fixed_width;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 2ca47bc..bedd16b 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -38,6 +38,7 @@
 #include "lp_rast.h"
 #include "lp_state_fs.h"
 #include "lp_state_setup.h"
+#include "lp_context.h"
 
 #define NUM_CHANNELS 4
 
@@ -333,6 +334,11 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
    LP_COUNT(nr_tris);
 
+   if (setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS]) {
+      struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+      lp_context->pipeline_statistics.c_primitives++;
+   }
+
    /* Setup parameter interpolants:
     */
    setup->setup.variant->jit_function( v0,
@@ -883,6 +889,8 @@ typedef void (*triangle_func_t)(struct lp_setup_context *setup,
 /**
  * Subdivide this triangle by bisecting edge (v0, v1).
  * \param pv  the provoking vertex (must = v0 or v1 or v2)
+ * TODO: should probably think about non-overflowing arithmetic elsewhere.
+ * This will definitely screw with pipeline counters for instance.
  */
 static void
 subdiv_tri(struct lp_setup_context *setup,
-- 
1.7.9.5


More information about the mesa-dev mailing list