[Mesa-dev] [PATCH 23/25] radeonsi: factor si_query_buffer logic out of si_query_hw
Haehnle, Nicolai
Nicolai.Haehnle at amd.com
Wed Feb 6 08:07:34 UTC 2019
Thanks!
On 06.02.19 01:27, Timothy Arceri wrote:
> On 4/2/19 11:26 pm, Nicolai Hähnle wrote:
>> On 01.02.19 05:25, Timothy Arceri wrote:
>>> On 26/1/19 11:56 am, Marek Olšák wrote:
>>>> Timothy, can you please test the attached fix?
>>>
>>> I'm having trouble compiling 32bit mesa on my machine at the moment
>>> so haven't been able to test Batman. But this commit also causes No
>>> Mans Sky to lock up my machine and the attached patch does not fix it.
>>
>> Is there a trace or something else to easily reproduce it?
>
> I've filed a bug report with all the info you should need to reproduce:
>
> https://bugs.freedesktop.org/show_bug.cgi?id=109561
>
>>
>> Cheers,
>> Nicolai
>>
>>
>>
>>>
>>>>
>>>> Thanks,
>>>> Marek
>>>>
>>>> On Wed, Jan 2, 2019 at 10:58 PM Timothy Arceri
>>>> <tarceri at itsqueeze.com <mailto:tarceri at itsqueeze.com>> wrote:
>>>>
>>>> This commit seems to cause bad stuttering in the Batman Arkham City
>>>> benchmark.
>>>>
>>>> On 7/12/18 1:00 am, Nicolai Hähnle wrote:
>>>> > From: Nicolai Hähnle <nicolai.haehnle at amd.com
>>>> <mailto:nicolai.haehnle at amd.com>>
>>>> >
>>>> > This is a move towards using composition instead of
>>>> inheritance for
>>>> > different query types.
>>>> >
>>>> > This change weakens out-of-memory error reporting somewhat,
>>>> though this
>>>> > should be acceptable since we didn't consistently report such
>>>> errors in
>>>> > the first place.
>>>> > ---
>>>> > src/gallium/drivers/radeonsi/si_perfcounter.c | 8 +-
>>>> > src/gallium/drivers/radeonsi/si_query.c | 177
>>>> +++++++++---------
>>>> > src/gallium/drivers/radeonsi/si_query.h | 17 +-
>>>> > src/gallium/drivers/radeonsi/si_texture.c | 7 +-
>>>> > 4 files changed, 99 insertions(+), 110 deletions(-)
>>>> >
>>>> > diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c
>>>> b/src/gallium/drivers/radeonsi/si_perfcounter.c
>>>> > index 0b3d8f89273..f0d10c054c4 100644
>>>> > --- a/src/gallium/drivers/radeonsi/si_perfcounter.c
>>>> > +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
>>>> > @@ -761,23 +761,22 @@ static void si_pc_query_destroy(struct
>>>> si_screen *sscreen,
>>>> > struct si_query_group *group = query->groups;
>>>> > query->groups = group->next;
>>>> > FREE(group);
>>>> > }
>>>> >
>>>> > FREE(query->counters);
>>>> >
>>>> > si_query_hw_destroy(sscreen, rquery);
>>>> > }
>>>> >
>>>> > -static bool si_pc_query_prepare_buffer(struct si_screen
>>>> *screen,
>>>> > - struct si_query_hw
>>>> *hwquery,
>>>> > - struct r600_resource
>>>> *buffer)
>>>> > +static bool si_pc_query_prepare_buffer(struct si_context *ctx,
>>>> > + struct si_query_buffer
>>>> *qbuf)
>>>> > {
>>>> > /* no-op */
>>>> > return true;
>>>> > }
>>>> >
>>>> > static void si_pc_query_emit_start(struct si_context *sctx,
>>>> > struct si_query_hw *hwquery,
>>>> > struct r600_resource *buffer,
>>>> uint64_t va)
>>>> > {
>>>> > struct si_query_pc *query = (struct si_query_pc *)hwquery;
>>>> > @@ -1055,23 +1054,20 @@ struct pipe_query
>>>> *si_create_batch_query(struct pipe_context *ctx,
>>>> > counter->base = group->result_base + j;
>>>> > counter->stride = group->num_counters;
>>>> >
>>>> > counter->qwords = 1;
>>>> > if ((block->b->b->flags & SI_PC_BLOCK_SE) &&
>>>> group->se < 0)
>>>> > counter->qwords = screen->info.max_se;
>>>> > if (group->instance < 0)
>>>> > counter->qwords *= block->num_instances;
>>>> > }
>>>> >
>>>> > - if (!si_query_hw_init(screen, &query->b))
>>>> > - goto error;
>>>> > -
>>>> > return (struct pipe_query *)query;
>>>> >
>>>> > error:
>>>> > si_pc_query_destroy(screen, &query->b.b);
>>>> > return NULL;
>>>> > }
>>>> >
>>>> > static bool si_init_block_names(struct si_screen *screen,
>>>> > struct si_pc_block *block)
>>>> > {
>>>> > diff --git a/src/gallium/drivers/radeonsi/si_query.c
>>>> b/src/gallium/drivers/radeonsi/si_query.c
>>>> > index 479a1bbf2c4..5b0fba0ed92 100644
>>>> > --- a/src/gallium/drivers/radeonsi/si_query.c
>>>> > +++ b/src/gallium/drivers/radeonsi/si_query.c
>>>> > @@ -514,86 +514,129 @@ static struct pipe_query
>>>> *si_query_sw_create(unsigned query_type)
>>>> > query = CALLOC_STRUCT(si_query_sw);
>>>> > if (!query)
>>>> > return NULL;
>>>> >
>>>> > query->b.type = query_type;
>>>> > query->b.ops = &sw_query_ops;
>>>> >
>>>> > return (struct pipe_query *)query;
>>>> > }
>>>> >
>>>> > -void si_query_hw_destroy(struct si_screen *sscreen,
>>>> > - struct si_query *rquery)
>>>> > +void si_query_buffer_destroy(struct si_screen *sscreen, struct
>>>> si_query_buffer *buffer)
>>>> > {
>>>> > - struct si_query_hw *query = (struct si_query_hw *)rquery;
>>>> > - struct si_query_buffer *prev = query->buffer.previous;
>>>> > + struct si_query_buffer *prev = buffer->previous;
>>>> >
>>>> > /* Release all query buffers. */
>>>> > while (prev) {
>>>> > struct si_query_buffer *qbuf = prev;
>>>> > prev = prev->previous;
>>>> > r600_resource_reference(&qbuf->buf, NULL);
>>>> > FREE(qbuf);
>>>> > }
>>>> >
>>>> > - r600_resource_reference(&query->buffer.buf, NULL);
>>>> > - r600_resource_reference(&query->workaround_buf, NULL);
>>>> > - FREE(rquery);
>>>> > + r600_resource_reference(&buffer->buf, NULL);
>>>> > +}
>>>> > +
>>>> > +void si_query_buffer_reset(struct si_context *sctx, struct
>>>> si_query_buffer *buffer)
>>>> > +{
>>>> > + /* Discard all query buffers except for the oldest. */
>>>> > + while (buffer->previous) {
>>>> > + struct si_query_buffer *qbuf = buffer->previous;
>>>> > + buffer->previous = qbuf->previous;
>>>> > +
>>>> > + r600_resource_reference(&buffer->buf, NULL);
>>>> > + buffer->buf = qbuf->buf; /* move ownership */
>>>> > + FREE(qbuf);
>>>> > + }
>>>> > + buffer->results_end = 0;
>>>> > +
>>>> > + /* Discard even the oldest buffer if it can't be mapped
>>>> without a stall. */
>>>> > + if (buffer->buf &&
>>>> > + (si_rings_is_buffer_referenced(sctx, buffer->buf->buf,
>>>> RADEON_USAGE_READWRITE) ||
>>>> > + !sctx->ws->buffer_wait(buffer->buf->buf, 0,
>>>> RADEON_USAGE_READWRITE))) {
>>>> > + r600_resource_reference(&buffer->buf, NULL);
>>>> > + }
>>>> > }
>>>> >
>>>> > -static struct r600_resource *si_new_query_buffer(struct
>>>> si_screen *sscreen,
>>>> > - struct
>>>> si_query_hw
>>>> *query)
>>>> > +bool si_query_buffer_alloc(struct si_context *sctx, struct
>>>> si_query_buffer *buffer,
>>>> > + bool (*prepare_buffer)(struct
>>>> si_context
>>>> *, struct si_query_buffer*),
>>>> > + unsigned size)
>>>> > {
>>>> > - unsigned buf_size = MAX2(query->result_size,
>>>> > - sscreen->info.min_alloc_size);
>>>> > + if (buffer->buf && buffer->results_end + size >=
>>>> buffer->buf->b.b.width0)
>>>> > + return true;
>>>> > +
>>>> > + if (buffer->buf) {
>>>> > + struct si_query_buffer *qbuf =
>>>> MALLOC_STRUCT(si_query_buffer);
>>>> > + memcpy(qbuf, buffer, sizeof(*qbuf));
>>>> > + buffer->previous = qbuf;
>>>> > + }
>>>> > +
>>>> > + buffer->results_end = 0;
>>>> >
>>>> > /* Queries are normally read by the CPU after
>>>> > * being written by the gpu, hence staging is probably
>>>> a good
>>>> > * usage pattern.
>>>> > */
>>>> > - struct r600_resource *buf = r600_resource(
>>>> > - pipe_buffer_create(&sscreen->b, 0,
>>>> > - PIPE_USAGE_STAGING, buf_size));
>>>> > - if (!buf)
>>>> > - return NULL;
>>>> > + struct si_screen *screen = sctx->screen;
>>>> > + unsigned buf_size = MAX2(size,
>>>> screen->info.min_alloc_size);
>>>> > + buffer->buf = r600_resource(
>>>> > + pipe_buffer_create(&screen->b, 0,
>>>> PIPE_USAGE_STAGING, buf_size));
>>>> > + if (unlikely(!buffer->buf))
>>>> > + return false;
>>>> >
>>>> > - if (!query->ops->prepare_buffer(sscreen, query, buf)) {
>>>> > - r600_resource_reference(&buf, NULL);
>>>> > - return NULL;
>>>> > + if (prepare_buffer) {
>>>> > + if (unlikely(!prepare_buffer(sctx, buffer))) {
>>>> > + r600_resource_reference(&buffer->buf,
>>>> NULL);
>>>> > + return false;
>>>> > + }
>>>> > }
>>>> >
>>>> > - return buf;
>>>> > + return true;
>>>> > }
>>>> >
>>>> > -static bool si_query_hw_prepare_buffer(struct si_screen
>>>> *sscreen,
>>>> > - struct si_query_hw *query,
>>>> > - struct r600_resource
>>>> *buffer)
>>>> > +
>>>> > +void si_query_hw_destroy(struct si_screen *sscreen,
>>>> > + struct si_query *rquery)
>>>> > +{
>>>> > + struct si_query_hw *query = (struct si_query_hw *)rquery;
>>>> > +
>>>> > + si_query_buffer_destroy(sscreen, &query->buffer);
>>>> > + r600_resource_reference(&query->workaround_buf, NULL);
>>>> > + FREE(rquery);
>>>> > +}
>>>> > +
>>>> > +static bool si_query_hw_prepare_buffer(struct si_context *sctx,
>>>> > + struct si_query_buffer
>>>> *qbuf)
>>>> > {
>>>> > - /* Callers ensure that the buffer is currently unused by
>>>> the GPU. */
>>>> > - uint32_t *results =
>>>> sscreen->ws->buffer_map(buffer->buf, NULL,
>>>> > + static const struct si_query_hw si_query_hw_s;
>>>> > + struct si_query_hw *query = container_of(qbuf,
>>>> &si_query_hw_s, buffer);
>>>> > + struct si_screen *screen = sctx->screen;
>>>> > +
>>>> > + /* The caller ensures that the buffer is currently unused
>>>> by the GPU. */
>>>> > + uint32_t *results = screen->ws->buffer_map(qbuf->buf->buf,
>>>> NULL,
>>>> > PIPE_TRANSFER_WRITE |
>>>> > PIPE_TRANSFER_UNSYNCHRONIZED);
>>>> > if (!results)
>>>> > return false;
>>>> >
>>>> > - memset(results, 0, buffer->b.b.width0);
>>>> > + memset(results, 0, qbuf->buf->b.b.width0);
>>>> >
>>>> > if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
>>>> > query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
>>>> > query->b.type ==
>>>> PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
>>>> > - unsigned max_rbs =
>>>> sscreen->info.num_render_backends;
>>>> > - unsigned enabled_rb_mask =
>>>> sscreen->info.enabled_rb_mask;
>>>> > + unsigned max_rbs =
>>>> screen->info.num_render_backends;
>>>> > + unsigned enabled_rb_mask =
>>>> screen->info.enabled_rb_mask;
>>>> > unsigned num_results;
>>>> > unsigned i, j;
>>>> >
>>>> > /* Set top bits for unused backends. */
>>>> > - num_results = buffer->b.b.width0 /
>>>> query->result_size;
>>>> > + num_results = qbuf->buf->b.b.width0 /
>>>> query->result_size;
>>>> > for (j = 0; j < num_results; j++) {
>>>> > for (i = 0; i < max_rbs; i++) {
>>>> > if (!(enabled_rb_mask & (1<<i))) {
>>>> > results[(i * 4)+1] =
>>>> 0x80000000;
>>>> > results[(i * 4)+3] =
>>>> 0x80000000;
>>>> > }
>>>> > }
>>>> > results += 4 * max_rbs;
>>>> > }
>>>> > }
>>>> > @@ -624,30 +667,20 @@ static void
>>>> si_query_hw_clear_result(struct
>>>> si_query_hw *,
>>>> > union pipe_query_result *);
>>>> >
>>>> > static struct si_query_hw_ops query_hw_default_hw_ops = {
>>>> > .prepare_buffer = si_query_hw_prepare_buffer,
>>>> > .emit_start = si_query_hw_do_emit_start,
>>>> > .emit_stop = si_query_hw_do_emit_stop,
>>>> > .clear_result = si_query_hw_clear_result,
>>>> > .add_result = si_query_hw_add_result,
>>>> > };
>>>> >
>>>> > -bool si_query_hw_init(struct si_screen *sscreen,
>>>> > - struct si_query_hw *query)
>>>> > -{
>>>> > - query->buffer.buf = si_new_query_buffer(sscreen, query);
>>>> > - if (!query->buffer.buf)
>>>> > - return false;
>>>> > -
>>>> > - return true;
>>>> > -}
>>>> > -
>>>> > static struct pipe_query *si_query_hw_create(struct si_screen
>>>> *sscreen,
>>>> > unsigned query_type,
>>>> > unsigned index)
>>>> > {
>>>> > struct si_query_hw *query = CALLOC_STRUCT(si_query_hw);
>>>> > if (!query)
>>>> > return NULL;
>>>> >
>>>> > query->b.type = query_type;
>>>> > query->b.ops = &query_hw_ops;
>>>> > @@ -693,25 +726,20 @@ static struct pipe_query
>>>> *si_query_hw_create(struct si_screen *sscreen,
>>>> > query->result_size = 11 * 16;
>>>> > query->result_size += 8; /* for the fence +
>>>> alignment */
>>>> > query->b.num_cs_dw_suspend = 6 +
>>>> si_cp_write_fence_dwords(sscreen);
>>>> > break;
>>>> > default:
>>>> > assert(0);
>>>> > FREE(query);
>>>> > return NULL;
>>>> > }
>>>> >
>>>> > - if (!si_query_hw_init(sscreen, query)) {
>>>> > - FREE(query);
>>>> > - return NULL;
>>>> > - }
>>>> > -
>>>> > return (struct pipe_query *)query;
>>>> > }
>>>> >
>>>> > static void si_update_occlusion_query_state(struct si_context
>>>> *sctx,
>>>> > unsigned type, int
>>>> diff)
>>>> > {
>>>> > if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
>>>> > type == PIPE_QUERY_OCCLUSION_PREDICATE ||
>>>> > type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
>>>> > bool old_enable = sctx->num_occlusion_queries
>>>> != 0;
>>>> > @@ -802,43 +830,31 @@ static void
>>>> si_query_hw_do_emit_start(struct si_context *sctx,
>>>> > }
>>>> > radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
>>>> query->buffer.buf, RADEON_USAGE_WRITE,
>>>> > RADEON_PRIO_QUERY);
>>>> > }
>>>> >
>>>> > static void si_query_hw_emit_start(struct si_context *sctx,
>>>> > struct si_query_hw *query)
>>>> > {
>>>> > uint64_t va;
>>>> >
>>>> > - if (!query->buffer.buf)
>>>> > - return; // previous buffer allocation failure
>>>> > + if (!si_query_buffer_alloc(sctx, &query->buffer,
>>>> query->ops->prepare_buffer,
>>>> > + query->result_size))
>>>> > + return;
>>>> >
>>>> > si_update_occlusion_query_state(sctx, query->b.type, 1);
>>>> > si_update_prims_generated_query_state(sctx,
>>>> query->b.type, 1);
>>>> >
>>>> > if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA)
>>>> > si_need_gfx_cs_space(sctx);
>>>> >
>>>> > - /* Get a new query buffer if needed. */
>>>> > - if (query->buffer.results_end + query->result_size >
>>>> query->buffer.buf->b.b.width0) {
>>>> > - struct si_query_buffer *qbuf =
>>>> MALLOC_STRUCT(si_query_buffer);
>>>> > - *qbuf = query->buffer;
>>>> > - query->buffer.results_end = 0;
>>>> > - query->buffer.previous = qbuf;
>>>> > - query->buffer.buf =
>>>> si_new_query_buffer(sctx->screen, query);
>>>> > - if (!query->buffer.buf)
>>>> > - return;
>>>> > - }
>>>> > -
>>>> > - /* emit begin query */
>>>> > va = query->buffer.buf->gpu_address +
>>>> query->buffer.results_end;
>>>> > -
>>>> > query->ops->emit_start(sctx, query, query->buffer.buf,
>>>> va);
>>>> > }
>>>> >
>>>> > static void si_query_hw_do_emit_stop(struct si_context *sctx,
>>>> > struct si_query_hw *query,
>>>> > struct r600_resource
>>>> *buffer,
>>>> > uint64_t va)
>>>> > {
>>>> > struct radeon_cmdbuf *cs = sctx->gfx_cs;
>>>> > uint64_t fence_va = 0;
>>>> > @@ -905,26 +921,30 @@ static void
>>>> si_query_hw_do_emit_stop(struct
>>>> si_context *sctx,
>>>> > query->buffer.buf, fence_va,
>>>> 0x80000000,
>>>> > query->b.type);
>>>> > }
>>>> > }
>>>> >
>>>> > static void si_query_hw_emit_stop(struct si_context *sctx,
>>>> > struct si_query_hw *query)
>>>> > {
>>>> > uint64_t va;
>>>> >
>>>> > - if (!query->buffer.buf)
>>>> > - return; // previous buffer allocation failure
>>>> > -
>>>> > /* The queries which need begin already called this in
>>>> begin_query. */
>>>> > - if (query->flags & SI_QUERY_HW_FLAG_NO_START)
>>>> > + if (query->flags & SI_QUERY_HW_FLAG_NO_START) {
>>>> > si_need_gfx_cs_space(sctx);
>>>> > + if (!si_query_buffer_alloc(sctx, &query->buffer,
>>>> query->ops->prepare_buffer,
>>>> > + query->result_size))
>>>> > + return;
>>>> > + }
>>>> > +
>>>> > + if (!query->buffer.buf)
>>>> > + return; // previous buffer allocation failure
>>>> >
>>>> > /* emit end query */
>>>> > va = query->buffer.buf->gpu_address +
>>>> query->buffer.results_end;
>>>> >
>>>> > query->ops->emit_stop(sctx, query, query->buffer.buf, va);
>>>> >
>>>> > query->buffer.results_end += query->result_size;
>>>> >
>>>> > si_update_occlusion_query_state(sctx, query->b.type, -1);
>>>> > si_update_prims_generated_query_state(sctx,
>>>> query->b.type, -1);
>>>> > @@ -1054,59 +1074,32 @@ static void si_destroy_query(struct
>>>> pipe_context *ctx, struct pipe_query *query)
>>>> >
>>>> > static boolean si_begin_query(struct pipe_context *ctx,
>>>> > struct pipe_query *query)
>>>> > {
>>>> > struct si_context *sctx = (struct si_context *)ctx;
>>>> > struct si_query *rquery = (struct si_query *)query;
>>>> >
>>>> > return rquery->ops->begin(sctx, rquery);
>>>> > }
>>>> >
>>>> > -void si_query_hw_reset_buffers(struct si_context *sctx,
>>>> > - struct si_query_hw *query)
>>>> > -{
>>>> > - struct si_query_buffer *prev = query->buffer.previous;
>>>> > -
>>>> > - /* Discard the old query buffers. */
>>>> > - while (prev) {
>>>> > - struct si_query_buffer *qbuf = prev;
>>>> > - prev = prev->previous;
>>>> > - r600_resource_reference(&qbuf->buf, NULL);
>>>> > - FREE(qbuf);
>>>> > - }
>>>> > -
>>>> > - query->buffer.results_end = 0;
>>>> > - query->buffer.previous = NULL;
>>>> > -
>>>> > - /* Obtain a new buffer if the current one can't be mapped
>>>> without a stall. */
>>>> > - if (si_rings_is_buffer_referenced(sctx,
>>>> query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
>>>> > - !sctx->ws->buffer_wait(query->buffer.buf->buf, 0,
>>>> RADEON_USAGE_READWRITE)) {
>>>> > - r600_resource_reference(&query->buffer.buf, NULL);
>>>> > - query->buffer.buf =
>>>> si_new_query_buffer(sctx->screen, query);
>>>> > - } else {
>>>> > - if (!query->ops->prepare_buffer(sctx->screen,
>>>> query, query->buffer.buf))
>>>> > -
>>>> r600_resource_reference(&query->buffer.buf,
>>>> NULL);
>>>> > - }
>>>> > -}
>>>> > -
>>>> > bool si_query_hw_begin(struct si_context *sctx,
>>>> > struct si_query *rquery)
>>>> > {
>>>> > struct si_query_hw *query = (struct si_query_hw *)rquery;
>>>> >
>>>> > if (query->flags & SI_QUERY_HW_FLAG_NO_START) {
>>>> > assert(0);
>>>> > return false;
>>>> > }
>>>> >
>>>> > if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES))
>>>> > - si_query_hw_reset_buffers(sctx, query);
>>>> > + si_query_buffer_reset(sctx, &query->buffer);
>>>> >
>>>> > r600_resource_reference(&query->workaround_buf, NULL);
>>>> >
>>>> > si_query_hw_emit_start(sctx, query);
>>>> > if (!query->buffer.buf)
>>>> > return false;
>>>> >
>>>> > LIST_ADDTAIL(&query->b.active_list,
>>>> &sctx->active_queries);
>>>> > sctx->num_cs_dw_queries_suspend +=
>>>> query->b.num_cs_dw_suspend;
>>>> > return true;
>>>> > @@ -1119,21 +1112,21 @@ static bool si_end_query(struct
>>>> pipe_context *ctx, struct pipe_query *query)
>>>> >
>>>> > return rquery->ops->end(sctx, rquery);
>>>> > }
>>>> >
>>>> > bool si_query_hw_end(struct si_context *sctx,
>>>> > struct si_query *rquery)
>>>> > {
>>>> > struct si_query_hw *query = (struct si_query_hw *)rquery;
>>>> >
>>>> > if (query->flags & SI_QUERY_HW_FLAG_NO_START)
>>>> > - si_query_hw_reset_buffers(sctx, query);
>>>> > + si_query_buffer_reset(sctx, &query->buffer);
>>>> >
>>>> > si_query_hw_emit_stop(sctx, query);
>>>> >
>>>> > if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) {
>>>> > LIST_DELINIT(&query->b.active_list);
>>>> > sctx->num_cs_dw_queries_suspend -=
>>>> query->b.num_cs_dw_suspend;
>>>> > }
>>>> >
>>>> > if (!query->buffer.buf)
>>>> > return false;
>>>> > diff --git a/src/gallium/drivers/radeonsi/si_query.h
>>>> b/src/gallium/drivers/radeonsi/si_query.h
>>>> > index ebd965a004f..63af760a271 100644
>>>> > --- a/src/gallium/drivers/radeonsi/si_query.h
>>>> > +++ b/src/gallium/drivers/radeonsi/si_query.h
>>>> > @@ -27,20 +27,21 @@
>>>> >
>>>> > #include "util/u_threaded_context.h"
>>>> >
>>>> > struct pipe_context;
>>>> > struct pipe_query;
>>>> > struct pipe_resource;
>>>> >
>>>> > struct si_screen;
>>>> > struct si_context;
>>>> > struct si_query;
>>>> > +struct si_query_buffer;
>>>> > struct si_query_hw;
>>>> > struct r600_resource;
>>>> >
>>>> > enum {
>>>> > SI_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
>>>> > SI_QUERY_DECOMPRESS_CALLS,
>>>> > SI_QUERY_MRT_DRAW_CALLS,
>>>> > SI_QUERY_PRIM_RESTART_CALLS,
>>>> > SI_QUERY_SPILL_DRAW_CALLS,
>>>> > SI_QUERY_COMPUTE_CALLS,
>>>> > @@ -153,23 +154,21 @@ struct si_query {
>>>> > };
>>>> >
>>>> > enum {
>>>> > SI_QUERY_HW_FLAG_NO_START = (1 << 0),
>>>> > /* gap */
>>>> > /* whether begin_query doesn't clear the result */
>>>> > SI_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
>>>> > };
>>>> >
>>>> > struct si_query_hw_ops {
>>>> > - bool (*prepare_buffer)(struct si_screen *,
>>>> > - struct si_query_hw *,
>>>> > - struct r600_resource *);
>>>> > + bool (*prepare_buffer)(struct si_context *, struct
>>>> si_query_buffer *);
>>>> > void (*emit_start)(struct si_context *,
>>>> > struct si_query_hw *,
>>>> > struct r600_resource *buffer,
>>>> uint64_t va);
>>>> > void (*emit_stop)(struct si_context *,
>>>> > struct si_query_hw *,
>>>> > struct r600_resource *buffer,
>>>> uint64_t va);
>>>> > void (*clear_result)(struct si_query_hw *, union
>>>> pipe_query_result *);
>>>> > void (*add_result)(struct si_screen *screen,
>>>> > struct si_query_hw *, void *buffer,
>>>> > union pipe_query_result *result);
>>>> > @@ -179,40 +178,45 @@ struct si_query_buffer {
>>>> > /* The buffer where query results are stored. */
>>>> > struct r600_resource *buf;
>>>> > /* Offset of the next free result after current query
>>>> data */
>>>> > unsigned results_end;
>>>> > /* If a query buffer is full, a new buffer is created and
>>>> the old one
>>>> > * is put in here. When we calculate the result, we sum up
>>>> the samples
>>>> > * from all buffers. */
>>>> > struct si_query_buffer *previous;
>>>> > };
>>>> >
>>>> > +void si_query_buffer_destroy(struct si_screen *sctx, struct
>>>> si_query_buffer *buffer);
>>>> > +void si_query_buffer_reset(struct si_context *sctx, struct
>>>> si_query_buffer *buffer);
>>>> > +bool si_query_buffer_alloc(struct si_context *sctx, struct
>>>> si_query_buffer *buffer,
>>>> > + bool (*prepare_buffer)(struct
>>>> si_context
>>>> *, struct si_query_buffer*),
>>>> > + unsigned size);
>>>> > +
>>>> > +
>>>> > struct si_query_hw {
>>>> > struct si_query b;
>>>> > struct si_query_hw_ops *ops;
>>>> > unsigned flags;
>>>> >
>>>> > /* The query buffer and how many results are in it. */
>>>> > struct si_query_buffer buffer;
>>>> > /* Size of the result in memory for both begin_query and
>>>> end_query,
>>>> > * this can be one or two numbers, or it could even be a
>>>> size of a structure. */
>>>> > unsigned result_size;
>>>> > /* For transform feedback: which stream the query is
>>>> for */
>>>> > unsigned stream;
>>>> >
>>>> > /* Workaround via compute shader */
>>>> > struct r600_resource *workaround_buf;
>>>> > unsigned workaround_offset;
>>>> > };
>>>> >
>>>> > -bool si_query_hw_init(struct si_screen *sscreen,
>>>> > - struct si_query_hw *query);
>>>> > void si_query_hw_destroy(struct si_screen *sscreen,
>>>> > struct si_query *rquery);
>>>> > bool si_query_hw_begin(struct si_context *sctx,
>>>> > struct si_query *rquery);
>>>> > bool si_query_hw_end(struct si_context *sctx,
>>>> > struct si_query *rquery);
>>>> > bool si_query_hw_get_result(struct si_context *sctx,
>>>> > struct si_query *rquery,
>>>> > bool wait,
>>>> > union pipe_query_result *result);
>>>> > @@ -237,20 +241,17 @@ struct pipe_query
>>>> *si_create_batch_query(struct pipe_context *ctx,
>>>> > unsigned num_queries,
>>>> > unsigned *query_types);
>>>> >
>>>> > int si_get_perfcounter_info(struct si_screen *,
>>>> > unsigned index,
>>>> > struct pipe_driver_query_info *info);
>>>> > int si_get_perfcounter_group_info(struct si_screen *,
>>>> > unsigned index,
>>>> > struct
>>>> pipe_driver_query_group_info *info);
>>>> >
>>>> > -void si_query_hw_reset_buffers(struct si_context *sctx,
>>>> > - struct si_query_hw *query);
>>>> > -
>>>> > struct si_qbo_state {
>>>> > void *saved_compute;
>>>> > struct pipe_constant_buffer saved_const0;
>>>> > struct pipe_shader_buffer saved_ssbo[3];
>>>> > };
>>>> >
>>>> > #endif /* SI_QUERY_H */
>>>> > diff --git a/src/gallium/drivers/radeonsi/si_texture.c
>>>> b/src/gallium/drivers/radeonsi/si_texture.c
>>>> > index ac1a0aa6097..9df12e0f5bd 100644
>>>> > --- a/src/gallium/drivers/radeonsi/si_texture.c
>>>> > +++ b/src/gallium/drivers/radeonsi/si_texture.c
>>>> > @@ -2276,25 +2276,24 @@ void
>>>> vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
>>>> > struct si_context *sctx = (struct si_context*)ctx;
>>>> > struct pipe_query *tmp;
>>>> > unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
>>>> > bool query_active = sctx->dcc_stats[i].query_active;
>>>> > bool disable = false;
>>>> >
>>>> > if (sctx->dcc_stats[i].ps_stats[2]) {
>>>> > union pipe_query_result result;
>>>> >
>>>> > /* Read the results. */
>>>> > - ctx->get_query_result(ctx,
>>>> sctx->dcc_stats[i].ps_stats[2],
>>>> > + struct pipe_query *query =
>>>> sctx->dcc_stats[i].ps_stats[2];
>>>> > + ctx->get_query_result(ctx, query,
>>>> > true, &result);
>>>> > - si_query_hw_reset_buffers(sctx,
>>>> > - (struct si_query_hw*)
>>>> > - sctx->dcc_stats[i].ps_stats[2]);
>>>> > + si_query_buffer_reset(sctx, &((struct
>>>> si_query_hw*)query)->buffer);
>>>> >
>>>> > /* Compute the approximate number of fullscreen
>>>> draws. */
>>>> > tex->ps_draw_ratio =
>>>> >
>>>> result.pipeline_statistics.ps_invocations /
>>>> > (tex->buffer.b.b.width0 *
>>>> tex->buffer.b.b.height0);
>>>> > sctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio;
>>>> >
>>>> > disable = tex->dcc_separate_buffer &&
>>>> > !vi_should_enable_separate_dcc(tex);
>>>> > }
>>>> >
>>>> _______________________________________________
>>>> mesa-dev mailing list
>>>> mesa-dev at lists.freedesktop.org
>>>> <mailto:mesa-dev at lists.freedesktop.org>
>>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>>
>>
More information about the mesa-dev
mailing list