Mesa (master): llvmpipe: rework query logic

Wed Jun 26 21:18:25 UTC 2013

Module: Mesa
Branch: master
Commit: 08203428800554215657f1ebf19d74328103800e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=08203428800554215657f1ebf19d74328103800e

Author: Roland Scheidegger <sroland at vmware.com>
Date:   Tue Jun 25 23:27:04 2013 +0200

llvmpipe: rework query logic

Previously lp_rast_begin_query commands were always inserted into each bin,
and re-issued if the scene was restarted, while lp_rast_end_query commands
were executed for each still active query at the end of tile rasterization.
Also, the ps_invocations and vis_counter were set to zero when the respective
command was encountered.
This however cannot work for multiple queries of the same type (note that
occlusion counter and occlusion predicate while different type were also
affected).
So, change the logic to always set the ps_invocations and vis_counter to zero
at the start of tile rasterization, and then use "start" and "end" per-thread
query values when encountering the begin/end query commands instead, which
should work for multiple queries of the same type. This also means queries do
not have to be reissued in a new scene, however they still need to be finished
at end of tile rasterization, so a list of queries still active at the end of
a scene needs to be maintained.
Also while here don't bin the queries which don't do anything in rasterization.
(This change does not actually handle multiple queries of the same type yet,
as the list of active queries is just a simple fixed array and setup can still
only have one query active per type.)

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

---

 src/gallium/drivers/llvmpipe/lp_query.c     |   15 ++---
 src/gallium/drivers/llvmpipe/lp_query.h     |    3 +-
 src/gallium/drivers/llvmpipe/lp_rast.c      |   56 ++++++-------------
 src/gallium/drivers/llvmpipe/lp_rast_priv.h |    9 +--
 src/gallium/drivers/llvmpipe/lp_scene.h     |    4 +
 src/gallium/drivers/llvmpipe/lp_setup.c     |   81 +++++++++++++++++----------
 src/gallium/drivers/llvmpipe/lp_setup_tri.c |    5 ++
 7 files changed, 92 insertions(+), 81 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index 1d3edff..38d6b84 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -120,19 +120,19 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
    switch (pq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
       for (i = 0; i < num_threads; i++) {
-         *result += pq->count[i];
+         *result += pq->end[i];
       }
       break;
    case PIPE_QUERY_OCCLUSION_PREDICATE:
       for (i = 0; i < num_threads; i++) {
          /* safer (still not guaranteed) when there's an overflow */
-         vresult->b = vresult->b || pq->count[i];
+         vresult->b = vresult->b || pq->end[i];
       }
       break;
    case PIPE_QUERY_TIMESTAMP:
       for (i = 0; i < num_threads; i++) {
-         if (pq->count[i] > *result) {
-            *result = pq->count[i];
+         if (pq->end[i] > *result) {
+            *result = pq->end[i];
          }
          if (*result == 0)
             *result = os_time_get_nano();
@@ -170,7 +170,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
          (struct pipe_query_data_pipeline_statistics *)vresult;
       /* only ps_invocations come from binned query */
       for (i = 0; i < num_threads; i++) {
-         pq->stats.ps_invocations += pq->count[i];
+         pq->stats.ps_invocations += pq->end[i];
       }
       pq->stats.ps_invocations *= LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE;
       *stats = pq->stats;
@@ -200,7 +200,8 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
    }
 
 
-   memset(pq->count, 0, sizeof(pq->count));
+   memset(pq->start, 0, sizeof(pq->start));
+   memset(pq->end, 0, sizeof(pq->end));
    lp_setup_begin_query(llvmpipe->setup, pq);
 
    switch (pq->type) {
@@ -232,8 +233,6 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
       break;
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
-      /* Both active at same time will still fail all over the place.
-       * Then again several of each type can be active too... */
       llvmpipe->active_occlusion_query++;
       llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
       break;
diff --git a/src/gallium/drivers/llvmpipe/lp_query.h b/src/gallium/drivers/llvmpipe/lp_query.h
index e29022a..62ad5fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.h
+++ b/src/gallium/drivers/llvmpipe/lp_query.h
@@ -42,7 +42,8 @@ struct llvmpipe_context;
 
 
 struct llvmpipe_query {
-   uint64_t count[LP_MAX_THREADS];  /* a counter for each thread */
+   uint64_t start[LP_MAX_THREADS];  /* start count value for each thread */
+   uint64_t end[LP_MAX_THREADS];    /* end count value for each thread */
    struct lp_fence *fence;          /* fence from last scene this was binned in */
    unsigned type;                   /* PIPE_QUERY_* */
    unsigned num_primitives_generated;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 62a82e3..871cc50 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -61,7 +61,6 @@ static void
 lp_rast_begin( struct lp_rasterizer *rast,
                struct lp_scene *scene )
 {
-
    rast->curr_scene = scene;
 
    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
@@ -100,6 +99,9 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
    task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
                     task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
 
+   task->thread_data.vis_counter = 0;
+   task->ps_invocations = 0;
+
    /* reset pointers to color and depth tile(s) */
    memset(task->color_tiles, 0, sizeof(task->color_tiles));
    task->depth_tile = NULL;
@@ -455,10 +457,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
     * allocated 4x4 blocks hence need to filter them out here.
     */
    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
-      if (task->query[PIPE_QUERY_PIPELINE_STATISTICS]) {
-         /* not very accurate would need a popcount on the mask */
-         task->ps_invocations++;
-      }
+      /* not very accurate would need a popcount on the mask */
+      /* always count this not worth bothering? */
+      task->ps_invocations++;
+
       /* run shader on 4x4 block */
       BEGIN_JIT_CALL(state, task);
       variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
@@ -490,28 +492,18 @@ lp_rast_begin_query(struct lp_rasterizer_task *task,
 {
    struct llvmpipe_query *pq = arg.query_obj;
 
-   assert(task->query[pq->type] == NULL);
-
    switch (pq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
-      task->thread_data.vis_counter = 0;
+      pq->start[task->thread_index] = task->thread_data.vis_counter;
       break;
    case PIPE_QUERY_PIPELINE_STATISTICS:
-      task->ps_invocations = 0;
-      break;
-   case PIPE_QUERY_PRIMITIVES_GENERATED:
-   case PIPE_QUERY_PRIMITIVES_EMITTED:
-   case PIPE_QUERY_SO_STATISTICS:
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+      pq->start[task->thread_index] = task->ps_invocations;
       break;
    default:
       assert(0);
       break;
    }
-
-   task->query[pq->type] = pq;
 }
 
 
@@ -525,36 +517,26 @@ lp_rast_end_query(struct lp_rasterizer_task *task,
                   const union lp_rast_cmd_arg arg)
 {
    struct llvmpipe_query *pq = arg.query_obj;
-   assert(task->query[pq->type] == pq ||
-          pq->type == PIPE_QUERY_TIMESTAMP ||
-          pq->type == PIPE_QUERY_GPU_FINISHED);
 
    switch (pq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
-      pq->count[task->thread_index] += task->thread_data.vis_counter;
+      pq->end[task->thread_index] +=
+         task->thread_data.vis_counter - pq->start[task->thread_index];
+      pq->start[task->thread_index] = 0;
       break;
    case PIPE_QUERY_TIMESTAMP:
-      pq->count[task->thread_index] = os_time_get_nano();
+      pq->end[task->thread_index] = os_time_get_nano();
       break;
    case PIPE_QUERY_PIPELINE_STATISTICS:
-      pq->count[task->thread_index] += task->ps_invocations;
-      break;
-   case PIPE_QUERY_PRIMITIVES_GENERATED:
-   case PIPE_QUERY_PRIMITIVES_EMITTED:
-   case PIPE_QUERY_SO_STATISTICS:
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-   case PIPE_QUERY_TIMESTAMP_DISJOINT:
-   case PIPE_QUERY_GPU_FINISHED:
+      pq->end[task->thread_index] +=
+         task->ps_invocations - pq->start[task->thread_index];
+      pq->start[task->thread_index] = 0;
       break;
    default:
       assert(0);
       break;
    }
-
-   if (task->query[pq->type] == pq) {
-      task->query[pq->type] = NULL;
-   }
 }
 
 
@@ -575,10 +557,8 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
 {
    unsigned i;
 
-   for (i = 0; i < PIPE_QUERY_TYPES; ++i) {
-      if (task->query[i]) {
-         lp_rast_end_query(task, lp_rast_arg_query(task->query[i]));
-      }
+   for (i = 0; i < task->scene->num_active_queries; ++i) {
+      lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i]));
    }
 
    /* debug */
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 3048940..b8bc99c 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -99,7 +99,6 @@ struct lp_rasterizer_task
 
    /* occlude counter for visible pixels */
    struct lp_jit_thread_data thread_data;
-   struct llvmpipe_query *query[PIPE_QUERY_TYPES];
    uint64_t ps_invocations;
 
    pipe_semaphore work_ready;
@@ -307,10 +306,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
     * allocated 4x4 blocks hence need to filter them out here.
     */
    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
-      if (task->query[PIPE_QUERY_PIPELINE_STATISTICS]) {
-         /* not very accurate would need a popcount on the mask */
-         task->ps_invocations++;
-      }
+      /* not very accurate would need a popcount on the mask */
+      /* always count this not worth bothering? */
+      task->ps_invocations++;
+
       /* run shader on 4x4 block */
       BEGIN_JIT_CALL(state, task);
       variant->jit_function[RAST_WHOLE]( &state->jit_context,
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index 2d63c00..16f6969 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -129,6 +129,10 @@ struct lp_scene {
    struct pipe_context *pipe;
    struct lp_fence *fence;
 
+   /* The queries still active at end of scene */
+   struct llvmpipe_query *active_queries[3];
+   unsigned num_active_queries;
+
    /* Framebuffer mappings - valid only between begin_rasterization()
     * and end_rasterization().
     */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 6b64446..d2c5325 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -155,6 +155,23 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup )
    struct lp_scene *scene = setup->scene;
    struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen);
 
+   scene->num_active_queries = 0;
+   if (setup->active_query[PIPE_QUERY_OCCLUSION_COUNTER]) {
+      scene->active_queries[scene->num_active_queries] =
+         setup->active_query[PIPE_QUERY_OCCLUSION_COUNTER];
+      scene->num_active_queries++;
+   }
+   if (setup->active_query[PIPE_QUERY_OCCLUSION_PREDICATE]) {
+      scene->active_queries[scene->num_active_queries] =
+         setup->active_query[PIPE_QUERY_OCCLUSION_PREDICATE];
+      scene->num_active_queries++;
+   }
+   if (setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS]) {
+      scene->active_queries[scene->num_active_queries] =
+         setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS];
+      scene->num_active_queries++;
+   }
+
    lp_scene_end_binning(scene);
 
    lp_fence_reference(&setup->last_fence, scene->fence);
@@ -181,7 +198,6 @@ begin_binning( struct lp_setup_context *setup )
    struct lp_scene *scene = setup->scene;
    boolean need_zsload = FALSE;
    boolean ok;
-   unsigned i;
 
    assert(scene);
    assert(scene->fence == NULL);
@@ -230,16 +246,6 @@ begin_binning( struct lp_setup_context *setup )
       }
    }
 
-   for (i = 0; i < PIPE_QUERY_TYPES; ++i) {
-      if (setup->active_query[i]) {
-         ok = lp_scene_bin_everywhere( scene,
-                                       LP_RAST_OP_BEGIN_QUERY,
-                                       lp_rast_arg_query(setup->active_query[i]) );
-         if (!ok)
-            return FALSE;
-      }
-   }
-
    setup->clear.flags = 0;
    setup->clear.zsmask = 0;
    setup->clear.zsvalue = 0;
@@ -1211,18 +1217,20 @@ void
 lp_setup_begin_query(struct lp_setup_context *setup,
                      struct llvmpipe_query *pq)
 {
-   /* init the query to its beginning state */
-   assert(setup->active_query[pq->type] == NULL);
 
    set_scene_state(setup, SETUP_ACTIVE, "begin_query");
 
-   setup->active_query[pq->type] = pq;
+   if (!(pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
+         pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+         pq->type == PIPE_QUERY_PIPELINE_STATISTICS))
+      return;
 
-   /* XXX: It is possible that a query is created before the scene
-    * has been created. This means that setup->scene == NULL resulting
-    * in the query not being binned and thus is ignored.
-    */
+   /* init the query to its beginning state */
+   assert(setup->active_query[pq->type] == NULL);
+
+   setup->active_query[pq->type] = pq;
 
+   assert(setup->scene);
    if (setup->scene) {
       if (!lp_scene_bin_everywhere(setup->scene,
                                    LP_RAST_OP_BEGIN_QUERY,
@@ -1249,31 +1257,46 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
 {
    set_scene_state(setup, SETUP_ACTIVE, "end_query");
 
-   if (pq->type != PIPE_QUERY_TIMESTAMP && pq->type != PIPE_QUERY_GPU_FINISHED) {
+   if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
+       pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+       pq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
       assert(setup->active_query[pq->type] == pq);
-      setup->active_query[pq->type] = NULL;
    }
 
-   /* Setup will automatically re-issue any query which carried over a
-    * scene boundary, and the rasterizer automatically "ends" queries
-    * which are active at the end of a scene, so there is no need to
-    * retry this commands on failure.
-    */
+   assert(setup->scene);
    if (setup->scene) {
       /* pq->fence should be the fence of the *last* scene which
        * contributed to the query result.
        */
       lp_fence_reference(&pq->fence, setup->scene->fence);
 
-      if (!lp_scene_bin_everywhere(setup->scene,
-                                   LP_RAST_OP_END_QUERY,
-                                   lp_rast_arg_query(pq))) {
-         lp_setup_flush(setup, NULL, __FUNCTION__);
+      if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
+          pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+          pq->type == PIPE_QUERY_PIPELINE_STATISTICS ||
+          pq->type == PIPE_QUERY_TIMESTAMP) {
+         if (!lp_scene_bin_everywhere(setup->scene,
+                                      LP_RAST_OP_END_QUERY,
+                                      lp_rast_arg_query(pq))) {
+            if (!lp_setup_flush_and_restart(setup))
+               goto fail;
+
+            if (!lp_scene_bin_everywhere(setup->scene,
+                                         LP_RAST_OP_END_QUERY,
+                                         lp_rast_arg_query(pq))) {
+               goto fail;
+            }
+         }
       }
    }
    else {
       lp_fence_reference(&pq->fence, setup->last_fence);
    }
+
+fail:
+   /* Need to do this now not earlier since it still needs to be marked as
+    * active when binning it would cause a flush.
+    */
+   setup->active_query[pq->type] = NULL;
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index bedd16b..62df5df 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -215,6 +215,11 @@ lp_setup_whole_tile(struct lp_setup_context *setup,
       if (!scene->fb.zsbuf) {
          /*
           * All previous rendering will be overwritten so reset the bin.
+          * XXX This is wrong wrt to all queries arriving here (timestamp,
+          * occlusion, ps invocations). Not counting stuff might be ok but it
+          * will kill the begin/end query commands too which is definitely
+          * wrong (and at this point we don't even know if there were any
+          * such commands here).
           */
          lp_scene_bin_reset( scene, tx, ty );
       }