[Mesa-dev] [PATCH 22/25] radeonsi: move query suspend logic into the top-level si_query struct

Nicolai Hähnle nhaehnle at gmail.com
Thu Dec 6 14:00:43 UTC 2018


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/drivers/radeonsi/si_perfcounter.c | 13 ++--
 src/gallium/drivers/radeonsi/si_query.c       | 75 ++++++++++---------
 src/gallium/drivers/radeonsi/si_query.h       | 18 +++--
 3 files changed, 62 insertions(+), 44 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 69e149c76b6..0b3d8f89273 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -861,21 +861,24 @@ static void si_pc_query_add_result(struct si_screen *screen,
 			uint32_t value = results[counter->base + j * counter->stride];
 			result->batch[i].u64 += value;
 		}
 	}
 }
 
 static struct si_query_ops batch_query_ops = {
 	.destroy = si_pc_query_destroy,
 	.begin = si_query_hw_begin,
 	.end = si_query_hw_end,
-	.get_result = si_query_hw_get_result
+	.get_result = si_query_hw_get_result,
+
+	.suspend = si_query_hw_suspend,
+	.resume = si_query_hw_resume,
 };
 
 static struct si_query_hw_ops batch_query_hw_ops = {
 	.prepare_buffer = si_pc_query_prepare_buffer,
 	.emit_start = si_pc_query_emit_start,
 	.emit_stop = si_pc_query_emit_stop,
 	.clear_result = si_pc_query_clear_result,
 	.add_result = si_pc_query_add_result,
 };
 
@@ -994,41 +997,41 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
 			fprintf(stderr,
 				"perfcounter group %s: too many selected\n",
 				block->b->b->name);
 			goto error;
 		}
 		group->selectors[group->num_counters] = sub_index;
 		++group->num_counters;
 	}
 
 	/* Compute result bases and CS size per group */
-	query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
-	query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
+	query->b.b.num_cs_dw_suspend = pc->num_stop_cs_dwords;
+	query->b.b.num_cs_dw_suspend += pc->num_instance_cs_dwords;
 
 	i = 0;
 	for (group = query->groups; group; group = group->next) {
 		struct si_pc_block *block = group->block;
 		unsigned read_dw;
 		unsigned instances = 1;
 
 		if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0)
 			instances = screen->info.max_se;
 		if (group->instance < 0)
 			instances *= block->num_instances;
 
 		group->result_base = i;
 		query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
 		i += instances * group->num_counters;
 
 		read_dw = 6 * group->num_counters;
-		query->b.num_cs_dw_end += instances * read_dw;
-		query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
+		query->b.b.num_cs_dw_suspend += instances * read_dw;
+		query->b.b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords;
 	}
 
 	if (query->shaders) {
 		if (query->shaders == SI_PC_SHADERS_WINDOWING)
 			query->shaders = 0xffffffff;
 	}
 
 	/* Map user-supplied query array to result indices */
 	query->counters = CALLOC(num_queries, sizeof(*query->counters));
 	for (i = 0; i < num_queries; ++i) {
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index aed3e1e80c1..479a1bbf2c4 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -27,20 +27,22 @@
 #include "si_pipe.h"
 #include "si_query.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
 #include "util/os_time.h"
 #include "util/u_suballoc.h"
 #include "amd/common/sid.h"
 
 #define SI_MAX_STREAMS 4
 
+static struct si_query_ops query_hw_ops;
+
 struct si_hw_query_params {
 	unsigned start_offset;
 	unsigned end_offset;
 	unsigned fence_offset;
 	unsigned pair_stride;
 	unsigned pair_count;
 };
 
 /* Queries without buffer handling or suspend/resume. */
 struct si_query_sw {
@@ -600,28 +602,20 @@ static bool si_query_hw_prepare_buffer(struct si_screen *sscreen,
 }
 
 static void si_query_hw_get_result_resource(struct si_context *sctx,
 					    struct si_query *rquery,
 					    bool wait,
 					    enum pipe_query_value_type result_type,
 					    int index,
 					    struct pipe_resource *resource,
 					    unsigned offset);
 
-static struct si_query_ops query_hw_ops = {
-	.destroy = si_query_hw_destroy,
-	.begin = si_query_hw_begin,
-	.end = si_query_hw_end,
-	.get_result = si_query_hw_get_result,
-	.get_result_resource = si_query_hw_get_result_resource,
-};
-
 static void si_query_hw_do_emit_start(struct si_context *sctx,
 				      struct si_query_hw *query,
 				      struct r600_resource *buffer,
 				      uint64_t va);
 static void si_query_hw_do_emit_stop(struct si_context *sctx,
 				     struct si_query_hw *query,
 				     struct r600_resource *buffer,
 				     uint64_t va);
 static void si_query_hw_add_result(struct si_screen *sscreen,
 				   struct si_query_hw *, void *buffer,
@@ -658,55 +652,54 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen,
 	query->b.type = query_type;
 	query->b.ops = &query_hw_ops;
 	query->ops = &query_hw_default_hw_ops;
 
 	switch (query_type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 	case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 		query->result_size = 16 * sscreen->info.num_render_backends;
 		query->result_size += 16; /* for the fence + alignment */
-		query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
+		query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
 		break;
 	case SI_QUERY_TIME_ELAPSED_SDMA:
 		/* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple of 32. */
 		query->result_size = 64;
-		query->num_cs_dw_end = 0;
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		query->result_size = 24;
-		query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
+		query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen);
 		break;
 	case PIPE_QUERY_TIMESTAMP:
 		query->result_size = 16;
-		query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
+		query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen);
 		query->flags = SI_QUERY_HW_FLAG_NO_START;
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
 		query->result_size = 32;
-		query->num_cs_dw_end = 6;
+		query->b.num_cs_dw_suspend = 6;
 		query->stream = index;
 		break;
 	case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
 		query->result_size = 32 * SI_MAX_STREAMS;
-		query->num_cs_dw_end = 6 * SI_MAX_STREAMS;
+		query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS;
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		/* 11 values on GCN. */
 		query->result_size = 11 * 16;
 		query->result_size += 8; /* for the fence + alignment */
-		query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
+		query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
 		break;
 	default:
 		assert(0);
 		FREE(query);
 		return NULL;
 	}
 
 	if (!si_query_hw_init(sscreen, query)) {
 		FREE(query);
 		return NULL;
@@ -833,22 +826,20 @@ static void si_query_hw_emit_start(struct si_context *sctx,
 		query->buffer.previous = qbuf;
 		query->buffer.buf = si_new_query_buffer(sctx->screen, query);
 		if (!query->buffer.buf)
 			return;
 	}
 
 	/* emit begin query */
 	va = query->buffer.buf->gpu_address + query->buffer.results_end;
 
 	query->ops->emit_start(sctx, query, query->buffer.buf, va);
-
-	sctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
 }
 
 static void si_query_hw_do_emit_stop(struct si_context *sctx,
 				       struct si_query_hw *query,
 				       struct r600_resource *buffer,
 				       uint64_t va)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	uint64_t fence_va = 0;
 
@@ -928,23 +919,20 @@ static void si_query_hw_emit_stop(struct si_context *sctx,
 	if (query->flags & SI_QUERY_HW_FLAG_NO_START)
 		si_need_gfx_cs_space(sctx);
 
 	/* emit end query */
 	va = query->buffer.buf->gpu_address + query->buffer.results_end;
 
 	query->ops->emit_stop(sctx, query, query->buffer.buf, va);
 
 	query->buffer.results_end += query->result_size;
 
-	if (!(query->flags & SI_QUERY_HW_FLAG_NO_START))
-		sctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
-
 	si_update_occlusion_query_state(sctx, query->b.type, -1);
 	si_update_prims_generated_query_state(sctx, query->b.type, -1);
 }
 
 static void emit_set_predicate(struct si_context *ctx,
 			       struct r600_resource *buf, uint64_t va,
 			       uint32_t op)
 {
 	struct radeon_cmdbuf *cs = ctx->gfx_cs;
 
@@ -1112,21 +1100,22 @@ bool si_query_hw_begin(struct si_context *sctx,
 
 	if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES))
 		si_query_hw_reset_buffers(sctx, query);
 
 	r600_resource_reference(&query->workaround_buf, NULL);
 
 	si_query_hw_emit_start(sctx, query);
 	if (!query->buffer.buf)
 		return false;
 
-	LIST_ADDTAIL(&query->list, &sctx->active_queries);
+	LIST_ADDTAIL(&query->b.active_list, &sctx->active_queries);
+	sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
 	return true;
 }
 
 static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_query *rquery = (struct si_query *)query;
 
 	return rquery->ops->end(sctx, rquery);
 }
@@ -1134,22 +1123,24 @@ static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query)
 bool si_query_hw_end(struct si_context *sctx,
 		     struct si_query *rquery)
 {
 	struct si_query_hw *query = (struct si_query_hw *)rquery;
 
 	if (query->flags & SI_QUERY_HW_FLAG_NO_START)
 		si_query_hw_reset_buffers(sctx, query);
 
 	si_query_hw_emit_stop(sctx, query);
 
-	if (!(query->flags & SI_QUERY_HW_FLAG_NO_START))
-		LIST_DELINIT(&query->list);
+	if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) {
+		LIST_DELINIT(&query->b.active_list);
+		sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend;
+	}
 
 	if (!query->buffer.buf)
 		return false;
 
 	return true;
 }
 
 static void si_get_hw_query_params(struct si_context *sctx,
 				   struct si_query_hw *rquery, int index,
 				   struct si_hw_query_params *params)
@@ -1342,20 +1333,41 @@ static void si_query_hw_add_result(struct si_screen *sscreen,
 		       result->pipeline_statistics.c_primitives,
 		       result->pipeline_statistics.ps_invocations,
 		       result->pipeline_statistics.cs_invocations);
 #endif
 		break;
 	default:
 		assert(0);
 	}
 }
 
+void si_query_hw_suspend(struct si_context *sctx, struct si_query *query)
+{
+	si_query_hw_emit_stop(sctx, (struct si_query_hw *)query);
+}
+
+void si_query_hw_resume(struct si_context *sctx, struct si_query *query)
+{
+	si_query_hw_emit_start(sctx, (struct si_query_hw *)query);
+}
+
+static struct si_query_ops query_hw_ops = {
+	.destroy = si_query_hw_destroy,
+	.begin = si_query_hw_begin,
+	.end = si_query_hw_end,
+	.get_result = si_query_hw_get_result,
+	.get_result_resource = si_query_hw_get_result_resource,
+
+	.suspend = si_query_hw_suspend,
+	.resume = si_query_hw_resume,
+};
+
 static boolean si_get_query_result(struct pipe_context *ctx,
 				   struct pipe_query *query, boolean wait,
 				   union pipe_query_result *result)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_query *rquery = (struct si_query *)query;
 
 	return rquery->ops->get_result(sctx, rquery, wait, result);
 }
 
@@ -1637,40 +1649,35 @@ static void si_render_condition(struct pipe_context *ctx,
 
 	sctx->render_cond = query;
 	sctx->render_cond_invert = condition;
 	sctx->render_cond_mode = mode;
 
 	si_set_atom_dirty(sctx, atom, query != NULL);
 }
 
 void si_suspend_queries(struct si_context *sctx)
 {
-	struct si_query_hw *query;
+	struct si_query *query;
 
-	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
-		si_query_hw_emit_stop(sctx, query);
-	}
-	assert(sctx->num_cs_dw_queries_suspend == 0);
+	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list)
+		query->ops->suspend(sctx, query);
 }
 
 void si_resume_queries(struct si_context *sctx)
 {
-	struct si_query_hw *query;
-
-	assert(sctx->num_cs_dw_queries_suspend == 0);
+	struct si_query *query;
 
 	/* Check CS space here. Resuming must not be interrupted by flushes. */
 	si_need_gfx_cs_space(sctx);
 
-	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
-		si_query_hw_emit_start(sctx, query);
-	}
+	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list)
+		query->ops->resume(sctx, query);
 }
 
 #define XFULL(name_, query_type_, type_, result_type_, group_id_) \
 	{ \
 		.name = name_, \
 		.query_type = SI_QUERY_##query_type_, \
 		.type = PIPE_DRIVER_QUERY_TYPE_##type_, \
 		.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
 		.group_id = group_id_ \
 	}
diff --git a/src/gallium/drivers/radeonsi/si_query.h b/src/gallium/drivers/radeonsi/si_query.h
index 032946edf4d..ebd965a004f 100644
--- a/src/gallium/drivers/radeonsi/si_query.h
+++ b/src/gallium/drivers/radeonsi/si_query.h
@@ -126,28 +126,37 @@ struct si_query_ops {
 	bool (*end)(struct si_context *, struct si_query *);
 	bool (*get_result)(struct si_context *,
 			   struct si_query *, bool wait,
 			   union pipe_query_result *result);
 	void (*get_result_resource)(struct si_context *,
 				    struct si_query *, bool wait,
 				    enum pipe_query_value_type result_type,
 				    int index,
 				    struct pipe_resource *resource,
 				    unsigned offset);
+
+	void (*suspend)(struct si_context *, struct si_query *);
+	void (*resume)(struct si_context *, struct si_query *);
 };
 
 struct si_query {
 	struct threaded_query b;
 	struct si_query_ops *ops;
 
-	/* The type of query */
+	/* The PIPE_QUERY_xxx type of query */
 	unsigned type;
+
+	/* The number of dwords for suspend. */
+	unsigned num_cs_dw_suspend;
+
+	/* Linked list of queries that must be suspended at end of CS. */
+	struct list_head active_list;
 };
 
 enum {
 	SI_QUERY_HW_FLAG_NO_START = (1 << 0),
 	/* gap */
 	/* whether begin_query doesn't clear the result */
 	SI_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
 };
 
 struct si_query_hw_ops {
@@ -180,44 +189,43 @@ struct si_query_buffer {
 struct si_query_hw {
 	struct si_query b;
 	struct si_query_hw_ops *ops;
 	unsigned flags;
 
 	/* The query buffer and how many results are in it. */
 	struct si_query_buffer buffer;
 	/* Size of the result in memory for both begin_query and end_query,
 	 * this can be one or two numbers, or it could even be a size of a structure. */
 	unsigned result_size;
-	/* The number of dwords for end_query. */
-	unsigned num_cs_dw_end;
-	/* Linked list of queries */
-	struct list_head list;
 	/* For transform feedback: which stream the query is for */
 	unsigned stream;
 
 	/* Workaround via compute shader */
 	struct r600_resource *workaround_buf;
 	unsigned workaround_offset;
 };
 
 bool si_query_hw_init(struct si_screen *sscreen,
 		      struct si_query_hw *query);
 void si_query_hw_destroy(struct si_screen *sscreen,
 			 struct si_query *rquery);
 bool si_query_hw_begin(struct si_context *sctx,
 		       struct si_query *rquery);
 bool si_query_hw_end(struct si_context *sctx,
 		     struct si_query *rquery);
 bool si_query_hw_get_result(struct si_context *sctx,
 			    struct si_query *rquery,
 			    bool wait,
 			    union pipe_query_result *result);
+void si_query_hw_suspend(struct si_context *sctx, struct si_query *query);
+void si_query_hw_resume(struct si_context *sctx, struct si_query *query);
+
 
 /* Performance counters */
 struct si_perfcounters {
 	unsigned num_groups;
 	unsigned num_blocks;
 	struct si_pc_block *blocks;
 
 	unsigned num_stop_cs_dwords;
 	unsigned num_instance_cs_dwords;
 
-- 
2.19.1



More information about the mesa-dev mailing list