[Mesa-dev] [PATCH 22/25] radeonsi: move query suspend logic into the top-level si_query struct
Nicolai Hähnle
nhaehnle at gmail.com
Thu Dec 6 14:00:43 UTC 2018
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/gallium/drivers/radeonsi/si_perfcounter.c | 13 ++--
src/gallium/drivers/radeonsi/si_query.c | 75 ++++++++++---------
src/gallium/drivers/radeonsi/si_query.h | 18 +++--
3 files changed, 62 insertions(+), 44 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 69e149c76b6..0b3d8f89273 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -861,21 +861,24 @@ static void si_pc_query_add_result(struct si_screen *screen,
uint32_t value = results[counter->base + j * counter->stride];
result->batch[i].u64 += value;
}
}
}
static struct si_query_ops batch_query_ops = {
.destroy = si_pc_query_destroy,
.begin = si_query_hw_begin,
.end = si_query_hw_end,
- .get_result = si_query_hw_get_result
+ .get_result = si_query_hw_get_result,
+
+ .suspend = si_query_hw_suspend,
+ .resume = si_query_hw_resume,
};
static struct si_query_hw_ops batch_query_hw_ops = {
.prepare_buffer = si_pc_query_prepare_buffer,
.emit_start = si_pc_query_emit_start,
.emit_stop = si_pc_query_emit_stop,
.clear_result = si_pc_query_clear_result,
.add_result = si_pc_query_add_result,
};
@@ -994,41 +997,41 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
fprintf(stderr,
"perfcounter group %s: too many selected\n",
block->b->b->name);
goto error;
}
group->selectors[group->num_counters] = sub_index;
++group->num_counters;
}
/* Compute result bases and CS size per group */
- query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
- query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
+ query->b.b.num_cs_dw_suspend = pc->num_stop_cs_dwords;
+ query->b.b.num_cs_dw_suspend += pc->num_instance_cs_dwords;
i = 0;
for (group = query->groups; group; group = group->next) {
struct si_pc_block *block = group->block;
unsigned read_dw;
unsigned instances = 1;
if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0)
instances = screen->info.max_se;
if (group->instance < 0)
instances *= block->num_instances;
group->result_base = i;
query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
i += instances * group->num_counters;
read_dw = 6 * group->num_counters;
- query->b.num_cs_dw_end += instances * read_dw;
- query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
+ query->b.b.num_cs_dw_suspend += instances * read_dw;
+ query->b.b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords;
}
if (query->shaders) {
if (query->shaders == SI_PC_SHADERS_WINDOWING)
query->shaders = 0xffffffff;
}
/* Map user-supplied query array to result indices */
query->counters = CALLOC(num_queries, sizeof(*query->counters));
for (i = 0; i < num_queries; ++i) {
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index aed3e1e80c1..479a1bbf2c4 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -27,20 +27,22 @@
#include "si_pipe.h"
#include "si_query.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include "util/os_time.h"
#include "util/u_suballoc.h"
#include "amd/common/sid.h"
#define SI_MAX_STREAMS 4
+static struct si_query_ops query_hw_ops;
+
struct si_hw_query_params {
unsigned start_offset;
unsigned end_offset;
unsigned fence_offset;
unsigned pair_stride;
unsigned pair_count;
};
/* Queries without buffer handling or suspend/resume. */
struct si_query_sw {
@@ -600,28 +602,20 @@ static bool si_query_hw_prepare_buffer(struct si_screen *sscreen,
}
static void si_query_hw_get_result_resource(struct si_context *sctx,
struct si_query *rquery,
bool wait,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *resource,
unsigned offset);
-static struct si_query_ops query_hw_ops = {
- .destroy = si_query_hw_destroy,
- .begin = si_query_hw_begin,
- .end = si_query_hw_end,
- .get_result = si_query_hw_get_result,
- .get_result_resource = si_query_hw_get_result_resource,
-};
-
static void si_query_hw_do_emit_start(struct si_context *sctx,
struct si_query_hw *query,
struct r600_resource *buffer,
uint64_t va);
static void si_query_hw_do_emit_stop(struct si_context *sctx,
struct si_query_hw *query,
struct r600_resource *buffer,
uint64_t va);
static void si_query_hw_add_result(struct si_screen *sscreen,
struct si_query_hw *, void *buffer,
@@ -658,55 +652,54 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen,
query->b.type = query_type;
query->b.ops = &query_hw_ops;
query->ops = &query_hw_default_hw_ops;
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
query->result_size = 16 * sscreen->info.num_render_backends;
query->result_size += 16; /* for the fence + alignment */
- query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
+ query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
break;
case SI_QUERY_TIME_ELAPSED_SDMA:
/* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple of 32. */
query->result_size = 64;
- query->num_cs_dw_end = 0;
break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 24;
- query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
+ query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen);
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 16;
- query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
+ query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen);
query->flags = SI_QUERY_HW_FLAG_NO_START;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32;
- query->num_cs_dw_end = 6;
+ query->b.num_cs_dw_suspend = 6;
query->stream = index;
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32 * SI_MAX_STREAMS;
- query->num_cs_dw_end = 6 * SI_MAX_STREAMS;
+ query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on GCN. */
query->result_size = 11 * 16;
query->result_size += 8; /* for the fence + alignment */
- query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
+ query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
break;
default:
assert(0);
FREE(query);
return NULL;
}
if (!si_query_hw_init(sscreen, query)) {
FREE(query);
return NULL;
@@ -833,22 +826,20 @@ static void si_query_hw_emit_start(struct si_context *sctx,
query->buffer.previous = qbuf;
query->buffer.buf = si_new_query_buffer(sctx->screen, query);
if (!query->buffer.buf)
return;
}
/* emit begin query */
va = query->buffer.buf->gpu_address + query->buffer.results_end;
query->ops->emit_start(sctx, query, query->buffer.buf, va);
-
- sctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
}
static void si_query_hw_do_emit_stop(struct si_context *sctx,
struct si_query_hw *query,
struct r600_resource *buffer,
uint64_t va)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
uint64_t fence_va = 0;
@@ -928,23 +919,20 @@ static void si_query_hw_emit_stop(struct si_context *sctx,
if (query->flags & SI_QUERY_HW_FLAG_NO_START)
si_need_gfx_cs_space(sctx);
/* emit end query */
va = query->buffer.buf->gpu_address + query->buffer.results_end;
query->ops->emit_stop(sctx, query, query->buffer.buf, va);
query->buffer.results_end += query->result_size;
- if (!(query->flags & SI_QUERY_HW_FLAG_NO_START))
- sctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
-
si_update_occlusion_query_state(sctx, query->b.type, -1);
si_update_prims_generated_query_state(sctx, query->b.type, -1);
}
static void emit_set_predicate(struct si_context *ctx,
struct r600_resource *buf, uint64_t va,
uint32_t op)
{
struct radeon_cmdbuf *cs = ctx->gfx_cs;
@@ -1112,21 +1100,22 @@ bool si_query_hw_begin(struct si_context *sctx,
if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES))
si_query_hw_reset_buffers(sctx, query);
r600_resource_reference(&query->workaround_buf, NULL);
si_query_hw_emit_start(sctx, query);
if (!query->buffer.buf)
return false;
- LIST_ADDTAIL(&query->list, &sctx->active_queries);
+ LIST_ADDTAIL(&query->b.active_list, &sctx->active_queries);
+ sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
return true;
}
static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_query *rquery = (struct si_query *)query;
return rquery->ops->end(sctx, rquery);
}
@@ -1134,22 +1123,24 @@ static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query)
bool si_query_hw_end(struct si_context *sctx,
struct si_query *rquery)
{
struct si_query_hw *query = (struct si_query_hw *)rquery;
if (query->flags & SI_QUERY_HW_FLAG_NO_START)
si_query_hw_reset_buffers(sctx, query);
si_query_hw_emit_stop(sctx, query);
- if (!(query->flags & SI_QUERY_HW_FLAG_NO_START))
- LIST_DELINIT(&query->list);
+ if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) {
+ LIST_DELINIT(&query->b.active_list);
+ sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend;
+ }
if (!query->buffer.buf)
return false;
return true;
}
static void si_get_hw_query_params(struct si_context *sctx,
struct si_query_hw *rquery, int index,
struct si_hw_query_params *params)
@@ -1342,20 +1333,41 @@ static void si_query_hw_add_result(struct si_screen *sscreen,
result->pipeline_statistics.c_primitives,
result->pipeline_statistics.ps_invocations,
result->pipeline_statistics.cs_invocations);
#endif
break;
default:
assert(0);
}
}
+void si_query_hw_suspend(struct si_context *sctx, struct si_query *query)
+{
+ si_query_hw_emit_stop(sctx, (struct si_query_hw *)query);
+}
+
+void si_query_hw_resume(struct si_context *sctx, struct si_query *query)
+{
+ si_query_hw_emit_start(sctx, (struct si_query_hw *)query);
+}
+
+static struct si_query_ops query_hw_ops = {
+ .destroy = si_query_hw_destroy,
+ .begin = si_query_hw_begin,
+ .end = si_query_hw_end,
+ .get_result = si_query_hw_get_result,
+ .get_result_resource = si_query_hw_get_result_resource,
+
+ .suspend = si_query_hw_suspend,
+ .resume = si_query_hw_resume,
+};
+
static boolean si_get_query_result(struct pipe_context *ctx,
struct pipe_query *query, boolean wait,
union pipe_query_result *result)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_query *rquery = (struct si_query *)query;
return rquery->ops->get_result(sctx, rquery, wait, result);
}
@@ -1637,40 +1649,35 @@ static void si_render_condition(struct pipe_context *ctx,
sctx->render_cond = query;
sctx->render_cond_invert = condition;
sctx->render_cond_mode = mode;
si_set_atom_dirty(sctx, atom, query != NULL);
}
void si_suspend_queries(struct si_context *sctx)
{
- struct si_query_hw *query;
+ struct si_query *query;
- LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
- si_query_hw_emit_stop(sctx, query);
- }
- assert(sctx->num_cs_dw_queries_suspend == 0);
+ LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list)
+ query->ops->suspend(sctx, query);
}
void si_resume_queries(struct si_context *sctx)
{
- struct si_query_hw *query;
-
- assert(sctx->num_cs_dw_queries_suspend == 0);
+ struct si_query *query;
/* Check CS space here. Resuming must not be interrupted by flushes. */
si_need_gfx_cs_space(sctx);
- LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
- si_query_hw_emit_start(sctx, query);
- }
+ LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list)
+ query->ops->resume(sctx, query);
}
#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
{ \
.name = name_, \
.query_type = SI_QUERY_##query_type_, \
.type = PIPE_DRIVER_QUERY_TYPE_##type_, \
.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
.group_id = group_id_ \
}
diff --git a/src/gallium/drivers/radeonsi/si_query.h b/src/gallium/drivers/radeonsi/si_query.h
index 032946edf4d..ebd965a004f 100644
--- a/src/gallium/drivers/radeonsi/si_query.h
+++ b/src/gallium/drivers/radeonsi/si_query.h
@@ -126,28 +126,37 @@ struct si_query_ops {
bool (*end)(struct si_context *, struct si_query *);
bool (*get_result)(struct si_context *,
struct si_query *, bool wait,
union pipe_query_result *result);
void (*get_result_resource)(struct si_context *,
struct si_query *, bool wait,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *resource,
unsigned offset);
+
+ void (*suspend)(struct si_context *, struct si_query *);
+ void (*resume)(struct si_context *, struct si_query *);
};
struct si_query {
struct threaded_query b;
struct si_query_ops *ops;
- /* The type of query */
+ /* The PIPE_QUERY_xxx type of query */
unsigned type;
+
+ /* The number of dwords for suspend. */
+ unsigned num_cs_dw_suspend;
+
+ /* Linked list of queries that must be suspended at end of CS. */
+ struct list_head active_list;
};
enum {
SI_QUERY_HW_FLAG_NO_START = (1 << 0),
/* gap */
/* whether begin_query doesn't clear the result */
SI_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
};
struct si_query_hw_ops {
@@ -180,44 +189,43 @@ struct si_query_buffer {
struct si_query_hw {
struct si_query b;
struct si_query_hw_ops *ops;
unsigned flags;
/* The query buffer and how many results are in it. */
struct si_query_buffer buffer;
/* Size of the result in memory for both begin_query and end_query,
* this can be one or two numbers, or it could even be a size of a structure. */
unsigned result_size;
- /* The number of dwords for end_query. */
- unsigned num_cs_dw_end;
- /* Linked list of queries */
- struct list_head list;
/* For transform feedback: which stream the query is for */
unsigned stream;
/* Workaround via compute shader */
struct r600_resource *workaround_buf;
unsigned workaround_offset;
};
bool si_query_hw_init(struct si_screen *sscreen,
struct si_query_hw *query);
void si_query_hw_destroy(struct si_screen *sscreen,
struct si_query *rquery);
bool si_query_hw_begin(struct si_context *sctx,
struct si_query *rquery);
bool si_query_hw_end(struct si_context *sctx,
struct si_query *rquery);
bool si_query_hw_get_result(struct si_context *sctx,
struct si_query *rquery,
bool wait,
union pipe_query_result *result);
+void si_query_hw_suspend(struct si_context *sctx, struct si_query *query);
+void si_query_hw_resume(struct si_context *sctx, struct si_query *query);
+
/* Performance counters */
struct si_perfcounters {
unsigned num_groups;
unsigned num_blocks;
struct si_pc_block *blocks;
unsigned num_stop_cs_dwords;
unsigned num_instance_cs_dwords;
--
2.19.1
More information about the mesa-dev
mailing list