[Mesa-dev] [PATCH 48/51] i965: Use fences for tracking QueryCounters
Chris Wilson
chris at chris-wilson.co.uk
Tue Jan 10 21:24:11 UTC 2017
We can use our fence tracking mechanism for fine-grained waiting on
results.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
src/mesa/drivers/dri/i965/brw_conditional_render.c | 4 +-
src/mesa/drivers/dri/i965/brw_context.c | 2 +
src/mesa/drivers/dri/i965/brw_context.h | 10 +-
src/mesa/drivers/dri/i965/brw_queryobj.c | 17 +--
src/mesa/drivers/dri/i965/gen6_queryobj.c | 132 +++++++++++++--------
src/mesa/drivers/dri/i965/hsw_queryobj.c | 16 +--
6 files changed, 115 insertions(+), 66 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c
index 59f12d5df3..2c73697d5b 100644
--- a/src/mesa/drivers/dri/i965/brw_conditional_render.c
+++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c
@@ -70,13 +70,13 @@ set_predicate_for_result(struct brw_context *brw,
query->bo,
I915_GEM_DOMAIN_INSTRUCTION,
0, /* write domain */
- 0 /* offset */);
+ 8*query->index /* offset */);
brw_load_register_mem64(brw,
MI_PREDICATE_SRC1,
query->bo,
I915_GEM_DOMAIN_INSTRUCTION,
0, /* write domain */
- 8 /* offset */);
+ 8*(query->index+1) /* offset */);
if (inverted)
load_op = MI_PREDICATE_LOADOP_LOAD;
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 541c2885fe..15e467b00a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -998,6 +998,7 @@ brwCreateContext(gl_api api,
brw->has_swizzling = screen->hw_has_swizzling;
isl_device_init(&brw->isl_dev, devinfo, screen->hw_has_swizzling);
+ brw->query.last_index = 4096;
brw->vs.base.stage = MESA_SHADER_VERTEX;
brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
@@ -1153,6 +1154,7 @@ intelDestroyContext(__DRIcontext * driContextPriv)
brw_destroy_state(brw);
brw_draw_destroy(brw);
+ brw_bo_put(brw->query.bo);
brw_bo_put(brw->curbe.curbe_bo);
brw_bo_put(brw->vs.base.scratch_bo);
brw_bo_put(brw->tcs.base.scratch_bo);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0152f0a482..5e2df95508 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -542,11 +542,15 @@ struct brw_vertex_element {
struct brw_query_object {
struct gl_query_object Base;
+ struct brw_fence fence;
+
/** Last query BO associated with this query. */
brw_bo *bo;
/** Last index in bo with query data for this object. */
- int last_index;
+ unsigned index;
+
+ uint64_t *results;
};
#define MAX_GS_INPUT_VERTICES 6
@@ -1093,6 +1097,10 @@ struct brw_context
} cc;
struct {
+ brw_bo *bo;
+ uint64_t *map;
+ unsigned last_index;
+
struct brw_query_object *obj;
bool begin_emitted;
} query;
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index 34f55152ae..70c5d57f0a 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -125,7 +125,7 @@ brw_queryobj_get_results(struct gl_context *ctx,
* run out of space in the query's BO and allocated a new one. If so,
* this function was already called to accumulate the results so far.
*/
- for (i = 0; i < query->last_index; i++) {
+ for (i = 0; i < query->index; i++) {
query->Base.Result += results[i * 2 + 1] - results[i * 2];
}
break;
@@ -135,7 +135,7 @@ brw_queryobj_get_results(struct gl_context *ctx,
/* If the starting and ending PS_DEPTH_COUNT from any of the batches
* differ, then some fragments passed the depth test.
*/
- for (i = 0; i < query->last_index; i++) {
+ for (i = 0; i < query->index; i++) {
if (results[i * 2 + 1] != results[i * 2]) {
query->Base.Result = GL_TRUE;
break;
@@ -182,6 +182,7 @@ brw_delete_query(struct gl_context *ctx, struct gl_query_object *q)
{
struct brw_query_object *query = (struct brw_query_object *)q;
+ brw_fence_finish(&query->fence);
brw_bo_put(query->bo);
free(query);
}
@@ -242,7 +243,7 @@ brw_begin_query(struct gl_context *ctx, struct gl_query_object *q)
*/
brw_bo_put(query->bo);
query->bo = NULL;
- query->last_index = -1;
+ query->index = -1;
brw->query.obj = query;
@@ -379,7 +380,7 @@ ensure_bo_has_space(struct gl_context *ctx, struct brw_query_object *query)
assert(brw->gen < 6);
- if (!query->bo || query->last_index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+ if (!query->bo || query->index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
if (query->bo != NULL) {
/* The old query BO did not have enough space, so we allocated a new
@@ -391,7 +392,7 @@ ensure_bo_has_space(struct gl_context *ctx, struct brw_query_object *query)
query->bo = brw_bo_create(&brw->batch, "query",
4096, 0, BO_ALLOC_FOR_RENDER);
- query->last_index = 0;
+ query->index = 0;
}
}
@@ -432,7 +433,7 @@ brw_emit_query_begin(struct brw_context *brw)
ensure_bo_has_space(ctx, query);
- brw_write_depth_count(brw, query->bo, query->last_index * 2);
+ brw_write_depth_count(brw, query->bo, query->index * 2);
brw->query.begin_emitted = true;
}
@@ -454,10 +455,10 @@ brw_emit_query_end(struct brw_context *brw)
if (!brw->query.begin_emitted)
return;
- brw_write_depth_count(brw, query->bo, query->last_index * 2 + 1);
+ brw_write_depth_count(brw, query->bo, query->index * 2 + 1);
brw->query.begin_emitted = false;
- query->last_index++;
+ query->index++;
}
/**
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index 508398c8a2..18030090be 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -70,7 +70,8 @@ set_query_availability(struct brw_context *brw, struct brw_query_object *query,
flags |= PIPE_CONTROL_CS_STALL;
brw_emit_pipe_control_write(brw, flags,
- query->bo, 2 * sizeof(uint64_t),
+ query->bo,
+ (query->index + 2) * sizeof(uint64_t),
available);
}
}
@@ -168,12 +169,8 @@ gen6_queryobj_get_results(struct gl_context *ctx,
struct brw_query_object *query)
{
struct brw_context *brw = brw_context(ctx);
+ uint64_t *results = query->results;
- if (query->bo == NULL)
- return;
-
- uint64_t *results =
- brw_bo_map(query->bo, MAP_READ, PERF_DEBUG(brw, "GetQuery"));
switch (query->Base.Target) {
case GL_TIME_ELAPSED:
/* The query BO contains the starting and ending timestamps.
@@ -256,12 +253,46 @@ gen6_queryobj_get_results(struct gl_context *ctx,
/* Now that we've processed the data stored in the query's buffer object,
* we can release it.
*/
+ brw_fence_finish(&query->fence);
brw_bo_put(query->bo);
query->bo = NULL;
query->Base.Ready = true;
}
+static int gen6_alloc_query(struct brw_context *brw,
+ struct brw_query_object *query)
+{
+ int idx;
+
+ brw_fence_finish(&query->fence);
+ brw_bo_put(query->bo);
+
+ if (brw->query.last_index > 4096/sizeof(uint64_t) - 4) {
+ brw_bo_put(brw->query.bo);
+ brw->query.bo = brw_bo_create(&brw->batch, "query results",
+ 4096, 0, BO_ALLOC_FOR_RENDER);
+ brw_bo_enable_snoop(brw->query.bo);
+ brw->query.map =
+ brw_bo_map(brw->query.bo,
+ MAP_READ | MAP_PERSISTENT | MAP_ASYNC,
+ NULL);
+ brw->query.last_index = 0;
+ }
+
+ idx = brw->query.last_index;
+ brw->query.last_index += 2;
+ if (brw->ctx.Extensions.ARB_query_buffer_object &&
+ brw_is_query_pipelined(query))
+ brw->query.last_index += 2;
+
+ query->bo = brw_bo_get(brw->query.bo);
+ query->index = idx;
+ query->results = brw->query.map + idx;
+
+ return idx;
+}
+
/**
* Driver hook for glBeginQuery().
*
@@ -273,11 +304,7 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
{
struct brw_context *brw = brw_context(ctx);
struct brw_query_object *query = (struct brw_query_object *)q;
-
- /* Since we're starting a new query, we need to throw away old results. */
- brw_bo_put(query->bo);
- query->bo = brw_bo_create(&brw->batch, "query results",
- 4096, 4096, BO_ALLOC_FOR_RENDER);
+ int idx = gen6_alloc_query(brw, query);
if (brw_batch_begin(&brw->batch, 120, RENDER_RING) < 0)
return;
@@ -306,23 +333,23 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
* obtain the time elapsed. Notably, this includes time elapsed while
* the system was doing other work, such as running other applications.
*/
- brw_write_timestamp(brw, query->bo, 0);
+ brw_write_timestamp(brw, query->bo, idx);
break;
case GL_ANY_SAMPLES_PASSED:
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
case GL_SAMPLES_PASSED_ARB:
- brw_write_depth_count(brw, query->bo, 0);
+ brw_write_depth_count(brw, query->bo, idx);
break;
case GL_PRIMITIVES_GENERATED:
- write_primitives_generated(brw, query->bo, query->Base.Stream, 0);
+ write_primitives_generated(brw, query->bo, query->Base.Stream, idx);
if (query->Base.Stream == 0)
ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD;
break;
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
- write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 0);
+ write_xfb_primitives_written(brw, query->bo, query->Base.Stream, idx);
break;
case GL_VERTICES_SUBMITTED_ARB:
@@ -336,7 +363,7 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
- emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 0);
+ emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, idx);
break;
default:
@@ -359,6 +386,7 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
{
struct brw_context *brw = brw_context(ctx);
struct brw_query_object *query = (struct brw_query_object *)q;
+ int idx = query->index + 1;
if (brw_batch_begin(&brw->batch, 120, RENDER_RING) < 0) {
query->Base.Ready = true; /* failed to submit query, return garbage */
@@ -369,23 +397,23 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
switch (query->Base.Target) {
case GL_TIME_ELAPSED:
- brw_write_timestamp(brw, query->bo, 1);
+ brw_write_timestamp(brw, query->bo, idx);
break;
case GL_ANY_SAMPLES_PASSED:
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
case GL_SAMPLES_PASSED_ARB:
- brw_write_depth_count(brw, query->bo, 1);
+ brw_write_depth_count(brw, query->bo, idx);
break;
case GL_PRIMITIVES_GENERATED:
- write_primitives_generated(brw, query->bo, query->Base.Stream, 1);
+ write_primitives_generated(brw, query->bo, query->Base.Stream, idx);
if (query->Base.Stream == 0)
ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD;
break;
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
- write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 1);
+ write_xfb_primitives_written(brw, query->bo, query->Base.Stream, idx);
break;
case GL_VERTICES_SUBMITTED_ARB:
@@ -400,19 +428,47 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
emit_pipeline_stat(brw, query->bo,
- query->Base.Stream, query->Base.Target, 1);
+ query->Base.Stream, query->Base.Target, idx);
break;
default:
unreachable("Unrecognized query target in brw_end_query()");
}
- /* The current batch contains the commands to handle EndQuery(),
- * but they won't actually execute until it is flushed.
- */
+ /* For ARB_query_buffer_object: The result is now available */
+ set_query_availability(brw, query, true);
+ brw_batch_insert_fence(&brw->batch, &query->fence, 0);
+
+ brw_batch_end(&brw->batch);
+}
+
+/**
+ * Driver hook for glQueryCounter().
+ *
+ * This handles GL_TIMESTAMP queries, which perform a pipelined read of the
+ * current GPU time. This is unlike GL_TIME_ELAPSED, which measures the
+ * time while the query is active.
+ */
+static void
+gen6_query_counter(struct gl_context *ctx, struct gl_query_object *q)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_query_object *query = (struct brw_query_object *) q;
+ int idx = gen6_alloc_query(brw, query);
+
+ assert(q->Target == GL_TIMESTAMP);
+ if (brw_batch_begin(&brw->batch, 60, RENDER_RING) < 0)
+ return;
+
+ brw_write_timestamp(brw, query->bo, idx);
/* For ARB_query_buffer_object: The result is now available */
set_query_availability(brw, query, true);
+ brw_batch_insert_fence(&brw->batch, &query->fence, 0);
+
+ /* The current batch contains the commands to handle EndQuery(),
+ * but they won't actually execute until it is flushed.
+ */
brw_batch_end(&brw->batch);
}
@@ -425,12 +481,14 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
*/
static void gen6_wait_query(struct gl_context *ctx, struct gl_query_object *q)
{
+ struct brw_context *brw = brw_context(ctx);
struct brw_query_object *query = (struct brw_query_object *)q;
/* If the application has requested the query result, but this batch is
* still contributing to it, flush it now to finish that work so the
* result will become available (eventually).
*/
+ brw_fence_wait(&query->fence, -1, PERF_DEBUG(brw, "GetQuery"));
gen6_queryobj_get_results(ctx, query);
}
@@ -442,14 +500,9 @@ static void gen6_wait_query(struct gl_context *ctx, struct gl_query_object *q)
*/
static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q)
{
+ struct brw_context *brw = brw_context(ctx);
struct brw_query_object *query = (struct brw_query_object *)q;
- /* If query->bo is NULL, we've already gathered the results - this is a
- * redundant CheckQuery call. Ignore it.
- */
- if (query->bo == NULL)
- return;
-
/* From the GL_ARB_occlusion_query spec:
*
* "Instead of allowing for an infinite loop, performing a
@@ -457,25 +510,10 @@ static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q)
* not ready yet on the first time it is queried. This ensures that
* the async query will return true in finite time.
*/
- if (!brw_bo_busy(query->bo, BUSY_READ | BUSY_FLUSH,
- PERF_DEBUG(brw_context(ctx), "CheckQuery"))) {
- gen6_queryobj_get_results(ctx, query);
- }
-}
-
-static void
-gen6_query_counter(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
-
- brw_query_counter(ctx, q);
-
- if (brw_batch_begin(&brw->batch, 120, RENDER_RING) < 0)
+ if (brw_fence_busy(&query->fence, PERF_DEBUG(brw, "CheckQuery")))
return;
- set_query_availability(brw, query, true);
- brw_batch_end(&brw->batch);
+ gen6_queryobj_get_results(ctx, query);
}
/* Initialize Gen6+-specific query object functions. */
diff --git a/src/mesa/drivers/dri/i965/hsw_queryobj.c b/src/mesa/drivers/dri/i965/hsw_queryobj.c
index 47ffb881de..357e57bd9b 100644
--- a/src/mesa/drivers/dri/i965/hsw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/hsw_queryobj.c
@@ -203,7 +203,7 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query,
query->bo,
I915_GEM_DOMAIN_INSTRUCTION,
I915_GEM_DOMAIN_INSTRUCTION,
- 2 * sizeof(uint64_t));
+ (query->index + 2) * sizeof(uint64_t));
return;
}
@@ -222,20 +222,20 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query,
query->bo,
I915_GEM_DOMAIN_INSTRUCTION,
I915_GEM_DOMAIN_INSTRUCTION,
- 0 * sizeof(uint64_t));
+ (query->index + 0) * sizeof(uint64_t));
} else {
brw_load_register_mem64(brw,
HSW_CS_GPR(1),
query->bo,
I915_GEM_DOMAIN_INSTRUCTION,
I915_GEM_DOMAIN_INSTRUCTION,
- 0 * sizeof(uint64_t));
+ (query->index + 0) * sizeof(uint64_t));
brw_load_register_mem64(brw,
HSW_CS_GPR(2),
query->bo,
I915_GEM_DOMAIN_INSTRUCTION,
I915_GEM_DOMAIN_INSTRUCTION,
- 1 * sizeof(uint64_t));
+ (query->index + 1) * sizeof(uint64_t));
BEGIN_BATCH(5);
OUT_BATCH(HSW_MI_MATH | (5 - 2));
@@ -299,14 +299,14 @@ store_query_result_imm(struct brw_context *brw, brw_bo *bo,
}
static void
-set_predicate(struct brw_context *brw, brw_bo *query_bo)
+set_predicate(struct brw_context *brw, struct brw_query_object *query)
{
brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
/* Load query availability into SRC0 */
- brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query_bo,
+ brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query->bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
- 2 * sizeof(uint64_t));
+ (query->index + 2) * sizeof(uint64_t));
/* predicate = !(query_availability == 0); */
BEGIN_BATCH(1);
@@ -376,7 +376,7 @@ hsw_store_query_result(struct gl_context *ctx, struct gl_query_object *q,
*/
hsw_result_to_gpr0(ctx, query, buf, offset, pname, ptype);
if (pipelined)
- set_predicate(brw, query->bo);
+ set_predicate(brw, query);
store_query_result_reg(brw, bo->buffer, offset, ptype, HSW_CS_GPR(0),
pipelined);
} else {
--
2.11.0
More information about the mesa-dev
mailing list