Mesa (master): freedreno: add support for hw queries

Rob Clark robclark at kemper.freedesktop.org
Tue May 13 22:33:59 UTC 2014


Module: Mesa
Branch: master
Commit: b8f78e18907be379415c8c804b634808349fc1d9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b8f78e18907be379415c8c804b634808349fc1d9

Author: Rob Clark <robclark at freedesktop.org>
Date:   Sat May 10 13:45:54 2014 -0400

freedreno: add support for hw queries

Real GPU queries need some infrastructure to track samples per tile and
accumulate the results.  But fortunately this can be shared across GPU
generation.

See:
https://github.com/freedreno/freedreno/wiki/Queries#hardware-queries

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/Makefile.sources     |    1 +
 src/gallium/drivers/freedreno/freedreno_context.c  |    3 +
 src/gallium/drivers/freedreno/freedreno_context.h  |   69 ++-
 src/gallium/drivers/freedreno/freedreno_draw.c     |    4 +
 src/gallium/drivers/freedreno/freedreno_gmem.c     |   19 +-
 src/gallium/drivers/freedreno/freedreno_query.c    |    5 +-
 src/gallium/drivers/freedreno/freedreno_query_hw.c |  465 ++++++++++++++++++++
 src/gallium/drivers/freedreno/freedreno_query_hw.h |  164 +++++++
 src/gallium/drivers/freedreno/freedreno_resource.c |    3 +
 src/gallium/drivers/freedreno/freedreno_util.h     |    9 +-
 10 files changed, 734 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index 3621abd..afb4c35 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -3,6 +3,7 @@ C_SOURCES := \
 	freedreno_lowering.c \
 	freedreno_program.c \
 	freedreno_query.c \
+	freedreno_query_hw.c \
 	freedreno_query_sw.c \
 	freedreno_fence.c \
 	freedreno_resource.c \
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index a8fe311..496a422 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -34,6 +34,7 @@
 #include "freedreno_state.h"
 #include "freedreno_gmem.h"
 #include "freedreno_query.h"
+#include "freedreno_query_hw.h"
 #include "freedreno_util.h"
 
 static struct fd_ringbuffer *next_rb(struct fd_context *ctx)
@@ -145,6 +146,7 @@ fd_context_destroy(struct pipe_context *pctx)
 	DBG("");
 
 	fd_prog_fini(pctx);
+	fd_hw_query_fini(pctx);
 
 	util_slab_destroy(&ctx->transfer_pool);
 
@@ -221,6 +223,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
 	fd_query_context_init(pctx);
 	fd_texture_init(pctx);
 	fd_state_init(pctx);
+	fd_hw_query_init(pctx);
 
 	ctx->blitter = util_blitter_create(pctx);
 	if (!ctx->blitter)
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 2e7fded..4698482 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -33,6 +33,7 @@
 #include "pipe/p_context.h"
 #include "indices/u_primconvert.h"
 #include "util/u_blitter.h"
+#include "util/u_double_list.h"
 #include "util/u_slab.h"
 #include "util/u_string.h"
 
@@ -82,16 +83,80 @@ struct fd_vertex_stateobj {
 	unsigned num_elements;
 };
 
+/* Bitmask of stages in rendering that a particular query query is
+ * active.  Queries will be automatically started/stopped (generating
+ * additional fd_hw_sample_period's) on entrance/exit from stages that
+ * are applicable to the query.
+ *
+ * NOTE: set the stage to NULL at end of IB to ensure no query is still
+ * active.  Things aren't going to work out the way you want if a query
+ * is active across IB's (or between tile IB and draw IB)
+ */
+enum fd_render_stage {
+	FD_STAGE_NULL     = 0x00,
+	FD_STAGE_DRAW     = 0x01,
+	FD_STAGE_CLEAR    = 0x02,
+	/* TODO before queries which include MEM2GMEM or GMEM2MEM will
+	 * work we will need to call fd_hw_query_prepare() from somewhere
+	 * appropriate so that queries in the tiling IB get backed with
+	 * memory to write results to.
+	 */
+	FD_STAGE_MEM2GMEM = 0x04,
+	FD_STAGE_GMEM2MEM = 0x08,
+	/* used for driver internal draws (ie. util_blitter_blit()): */
+	FD_STAGE_BLIT     = 0x10,
+};
+
+#define MAX_HW_SAMPLE_PROVIDERS 4
+struct fd_hw_sample_provider;
+struct fd_hw_sample;
+
 struct fd_context {
 	struct pipe_context base;
 
 	struct fd_device *dev;
 	struct fd_screen *screen;
+
 	struct blitter_context *blitter;
 	struct primconvert_context *primconvert;
 
+	/* slab for pipe_transfer allocations: */
 	struct util_slab_mempool transfer_pool;
 
+	/* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
+	struct util_slab_mempool sample_pool;
+	struct util_slab_mempool sample_period_pool;
+
+	/* next sample offset.. incremented for each sample in the batch/
+	 * submit, reset to zero on next submit.
+	 */
+	uint32_t next_sample_offset;
+
+	/* sample-providers for hw queries: */
+	const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS];
+
+	/* cached samples (in case multiple queries need to reference
+	 * the same sample snapshot)
+	 */
+	struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
+
+	/* tracking for current stage, to know when to start/stop
+	 * any active queries:
+	 */
+	enum fd_render_stage stage;
+
+	/* list of active queries: */
+	struct list_head active_queries;
+
+	/* list of queries that are not active, but were active in the
+	 * current submit:
+	 */
+	struct list_head current_queries;
+
+	/* current query result bo and tile stride: */
+	struct fd_bo *query_bo;
+	uint32_t query_tile_stride;
+
 	/* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to
 	 * DI_PT_x value to use for draw initiator.  There are some
 	 * slight differences between generation:
@@ -258,10 +323,6 @@ struct fd_context {
 	void (*draw)(struct fd_context *pctx, const struct pipe_draw_info *info);
 	void (*clear)(struct fd_context *ctx, unsigned buffers,
 			const union pipe_color_union *color, double depth, unsigned stencil);
-
-	/* queries: */
-	struct fd_query * (*create_query)(struct fd_context *ctx,
-			unsigned query_type);
 };
 
 static INLINE struct fd_context *
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 11bb8d8..1289bb4 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -36,6 +36,7 @@
 #include "freedreno_context.h"
 #include "freedreno_state.h"
 #include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
 #include "freedreno_util.h"
 
 
@@ -156,6 +157,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 	/* and any buffers used, need to be resolved: */
 	ctx->resolve |= buffers;
 
+	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW);
 	ctx->draw(ctx, info);
 }
 
@@ -188,6 +190,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
 		util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 		util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 
+	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_CLEAR);
+
 	ctx->clear(ctx, buffers, color, depth, stencil);
 
 	ctx->dirty |= FD_DIRTY_ZSA |
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 2d4de44..861ebf5 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -35,6 +35,7 @@
 #include "freedreno_gmem.h"
 #include "freedreno_context.h"
 #include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
 #include "freedreno_util.h"
 
 /*
@@ -273,17 +274,24 @@ render_tiles(struct fd_context *ctx)
 
 		ctx->emit_tile_prep(ctx, tile);
 
-		if (ctx->restore)
+		if (ctx->restore) {
+			fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM);
 			ctx->emit_tile_mem2gmem(ctx, tile);
+			fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+		}
 
 		ctx->emit_tile_renderprep(ctx, tile);
 
+		fd_hw_query_prepare_tile(ctx, i, ctx->ring);
+
 		/* emit IB to drawcmds: */
 		OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
 		fd_reset_wfi(ctx);
 
 		/* emit gmem2mem to transfer tile back to system memory: */
+		fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM);
 		ctx->emit_tile_gmem2mem(ctx, tile);
+		fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
 	}
 }
 
@@ -292,6 +300,8 @@ render_sysmem(struct fd_context *ctx)
 {
 	ctx->emit_sysmem_prep(ctx);
 
+	fd_hw_query_prepare_tile(ctx, 0, ctx->ring);
+
 	/* emit IB to drawcmds: */
 	OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
 	fd_reset_wfi(ctx);
@@ -314,6 +324,11 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
 		}
 	}
 
+	/* close out the draw cmds by making sure any active queries are
+	 * paused:
+	 */
+	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+
 	/* mark the end of the clear/draw cmds before emitting per-tile cmds: */
 	fd_ringmarker_mark(ctx->draw_end);
 	fd_ringmarker_mark(ctx->binning_end);
@@ -326,6 +341,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
 		DBG("rendering sysmem (%s/%s)",
 			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+		fd_hw_query_prepare(ctx, 1);
 		render_sysmem(ctx);
 		ctx->stats.batch_sysmem++;
 	} else {
@@ -334,6 +350,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
 		DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
 			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+		fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y);
 		render_tiles(ctx);
 		ctx->stats.batch_gmem++;
 	}
diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c
index fb9908b..8753a4b 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.c
+++ b/src/gallium/drivers/freedreno/freedreno_query.c
@@ -31,6 +31,7 @@
 
 #include "freedreno_query.h"
 #include "freedreno_query_sw.h"
+#include "freedreno_query_hw.h"
 #include "freedreno_context.h"
 #include "freedreno_util.h"
 
@@ -45,8 +46,8 @@ fd_create_query(struct pipe_context *pctx, unsigned query_type)
 	struct fd_query *q;
 
 	q = fd_sw_create_query(ctx, query_type);
-	if (ctx->create_query && !q)
-		q = ctx->create_query(ctx, query_type);
+	if (!q)
+		q = fd_hw_create_query(ctx, query_type);
 
 	return (struct pipe_query *) q;
 }
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c
new file mode 100644
index 0000000..38bd3de
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -0,0 +1,465 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "freedreno_query_hw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+struct fd_hw_sample_period {
+	struct fd_hw_sample *start, *end;
+	struct list_head list;
+};
+
+/* maps query_type to sample provider idx: */
+static int pidx(unsigned query_type)
+{
+	switch (query_type) {
+	case PIPE_QUERY_OCCLUSION_COUNTER:
+		return 0;
+	case PIPE_QUERY_OCCLUSION_PREDICATE:
+		return 1;
+	default:
+		return -1;
+	}
+}
+
+static struct fd_hw_sample *
+get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring,
+		unsigned query_type)
+{
+	struct fd_hw_sample *samp = NULL;
+	int idx = pidx(query_type);
+
+	if (!ctx->sample_cache[idx]) {
+		ctx->sample_cache[idx] =
+			ctx->sample_providers[idx]->get_sample(ctx, ring);
+	}
+
+	fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]);
+
+	return samp;
+}
+
+static void
+clear_sample_cache(struct fd_context *ctx)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++)
+		fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL);
+}
+
+static bool
+is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
+{
+	return !!(hq->provider->active & stage);
+}
+
+
+static void
+resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
+		struct fd_ringbuffer *ring)
+{
+	assert(!hq->period);
+	hq->period = util_slab_alloc(&ctx->sample_period_pool);
+	list_inithead(&hq->period->list);
+	hq->period->start = get_sample(ctx, ring, hq->base.type);
+	/* NOTE: util_slab_alloc() does not zero out the buffer: */
+	hq->period->end = NULL;
+}
+
+static void
+pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
+		struct fd_ringbuffer *ring)
+{
+	assert(hq->period && !hq->period->end);
+	hq->period->end = get_sample(ctx, ring, hq->base.type);
+	list_addtail(&hq->period->list, &hq->current_periods);
+	hq->period = NULL;
+}
+
+static void
+destroy_periods(struct fd_context *ctx, struct list_head *list)
+{
+	struct fd_hw_sample_period *period, *s;
+	LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) {
+		fd_hw_sample_reference(ctx, &period->start, NULL);
+		fd_hw_sample_reference(ctx, &period->end, NULL);
+		list_del(&period->list);
+		util_slab_free(&ctx->sample_period_pool, period);
+	}
+}
+
+static void
+fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
+{
+	struct fd_hw_query *hq = fd_hw_query(q);
+
+	destroy_periods(ctx, &hq->periods);
+	destroy_periods(ctx, &hq->current_periods);
+	list_del(&hq->list);
+
+	free(hq);
+}
+
+static void
+fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
+{
+	struct fd_hw_query *hq = fd_hw_query(q);
+	if (q->active)
+		return;
+
+	/* begin_query() should clear previous results: */
+	destroy_periods(ctx, &hq->periods);
+
+	if (is_active(hq, ctx->stage))
+		resume_query(ctx, hq, ctx->ring);
+
+	q->active = true;
+
+	/* add to active list: */
+	list_del(&hq->list);
+	list_addtail(&hq->list, &ctx->active_queries);
+}
+
+static void
+fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
+{
+	struct fd_hw_query *hq = fd_hw_query(q);
+	if (!q->active)
+		return;
+	if (is_active(hq, ctx->stage))
+		pause_query(ctx, hq, ctx->ring);
+	q->active = false;
+	/* move to current list: */
+	list_del(&hq->list);
+	list_addtail(&hq->list, &ctx->current_queries);
+}
+
+/* helper to get ptr to specified sample: */
+static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
+{
+	return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
+}
+
+static boolean
+fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
+		boolean wait, union pipe_query_result *result)
+{
+	struct fd_hw_query *hq = fd_hw_query(q);
+	const struct fd_hw_sample_provider *p = hq->provider;
+	struct fd_hw_sample_period *period;
+
+	if (q->active)
+		return false;
+
+	/* if the app tries to read back the query result before the
+	 * back is submitted, that forces us to flush so that there
+	 * are actually results to wait for:
+	 */
+	if (!LIST_IS_EMPTY(&hq->list)) {
+		DBG("reading query result forces flush!");
+		ctx->needs_flush = true;
+		fd_context_render(&ctx->base);
+	}
+
+	util_query_clear_result(result, q->type);
+
+	if (LIST_IS_EMPTY(&hq->periods))
+		return true;
+
+	assert(LIST_IS_EMPTY(&hq->list));
+	assert(LIST_IS_EMPTY(&hq->current_periods));
+	assert(!hq->period);
+
+	if (LIST_IS_EMPTY(&hq->periods))
+		return true;
+
+	/* if !wait, then check the last sample (the one most likely to
+	 * not be ready yet) and bail if it is not ready:
+	 */
+	if (!wait) {
+		int ret;
+
+		period = LIST_ENTRY(struct fd_hw_sample_period,
+				hq->periods.prev, list);
+
+		ret = fd_bo_cpu_prep(period->end->bo, ctx->screen->pipe,
+				DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
+		if (ret)
+			return false;
+
+		fd_bo_cpu_fini(period->end->bo);
+	}
+
+	/* sum the result across all sample periods: */
+	LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
+		struct fd_hw_sample *start = period->start;
+		struct fd_hw_sample *end = period->end;
+		unsigned i;
+
+		/* start and end samples should be from same batch: */
+		assert(start->bo == end->bo);
+		assert(start->num_tiles == end->num_tiles);
+
+		for (i = 0; i < start->num_tiles; i++) {
+			void *ptr;
+
+			fd_bo_cpu_prep(start->bo, ctx->screen->pipe,
+					DRM_FREEDRENO_PREP_READ);
+
+			ptr = fd_bo_map(start->bo);
+
+			p->accumulate_result(ctx, sampptr(period->start, i, ptr),
+					sampptr(period->end, i, ptr), result);
+
+			fd_bo_cpu_fini(start->bo);
+		}
+	}
+
+	return true;
+}
+
+static const struct fd_query_funcs hw_query_funcs = {
+		.destroy_query    = fd_hw_destroy_query,
+		.begin_query      = fd_hw_begin_query,
+		.end_query        = fd_hw_end_query,
+		.get_query_result = fd_hw_get_query_result,
+};
+
+struct fd_query *
+fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
+{
+	struct fd_hw_query *hq;
+	struct fd_query *q;
+	int idx = pidx(query_type);
+
+	if ((idx < 0) || !ctx->sample_providers[idx])
+		return NULL;
+
+	hq = CALLOC_STRUCT(fd_hw_query);
+	if (!hq)
+		return NULL;
+
+	hq->provider = ctx->sample_providers[idx];
+
+	list_inithead(&hq->periods);
+	list_inithead(&hq->current_periods);
+	list_inithead(&hq->list);
+
+	q = &hq->base;
+	q->funcs = &hw_query_funcs;
+	q->type = query_type;
+
+	return q;
+}
+
+struct fd_hw_sample *
+fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
+{
+	struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
+	pipe_reference_init(&samp->reference, 1);
+	samp->size = size;
+	samp->offset = ctx->next_sample_offset;
+	/* NOTE: util_slab_alloc() does not zero out the buffer: */
+	samp->bo = NULL;
+	samp->num_tiles = 0;
+	samp->tile_stride = 0;
+	ctx->next_sample_offset += size;
+	return samp;
+}
+
+void
+__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
+{
+	if (samp->bo)
+		fd_bo_del(samp->bo);
+	util_slab_free(&ctx->sample_pool, samp);
+}
+
+static void
+prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
+		uint32_t num_tiles, uint32_t tile_stride)
+{
+	if (samp->bo) {
+		assert(samp->bo == bo);
+		assert(samp->num_tiles == num_tiles);
+		assert(samp->tile_stride == tile_stride);
+		return;
+	}
+	samp->bo = bo;
+	samp->num_tiles = num_tiles;
+	samp->tile_stride = tile_stride;
+}
+
+static void
+prepare_query(struct fd_hw_query *hq, struct fd_bo *bo,
+		uint32_t num_tiles, uint32_t tile_stride)
+{
+	struct fd_hw_sample_period *period, *s;
+
+	/* prepare all the samples in the query: */
+	LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) {
+		prepare_sample(period->start, bo, num_tiles, tile_stride);
+		prepare_sample(period->end, bo, num_tiles, tile_stride);
+
+		/* move from current_periods list to periods list: */
+		list_del(&period->list);
+		list_addtail(&period->list, &hq->periods);
+	}
+}
+
+static void
+prepare_queries(struct fd_context *ctx, struct fd_bo *bo,
+		uint32_t num_tiles, uint32_t tile_stride,
+		struct list_head *list, bool remove)
+{
+	struct fd_hw_query *hq, *s;
+	LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) {
+		prepare_query(hq, bo, num_tiles, tile_stride);
+		if (remove)
+			list_delinit(&hq->list);
+	}
+}
+
+/* called from gmem code once total storage requirements are known (ie.
+ * number of samples times number of tiles)
+ */
+void
+fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles)
+{
+	uint32_t tile_stride = ctx->next_sample_offset;
+	struct fd_bo *bo;
+
+	if (ctx->query_bo)
+		fd_bo_del(ctx->query_bo);
+
+	if (tile_stride > 0) {
+		bo = fd_bo_new(ctx->dev, tile_stride * num_tiles,
+				DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+				DRM_FREEDRENO_GEM_TYPE_KMEM);
+	} else {
+		bo = NULL;
+	}
+
+	ctx->query_bo = bo;
+	ctx->query_tile_stride = tile_stride;
+
+	prepare_queries(ctx, bo, num_tiles, tile_stride,
+			&ctx->active_queries, false);
+	prepare_queries(ctx, bo, num_tiles, tile_stride,
+			&ctx->current_queries, true);
+
+	/* reset things for next batch: */
+	ctx->next_sample_offset = 0;
+}
+
+void
+fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
+		struct fd_ringbuffer *ring)
+{
+	uint32_t tile_stride = ctx->query_tile_stride;
+	uint32_t offset = tile_stride * n;
+
+	/* bail if no queries: */
+	if (tile_stride == 0)
+		return;
+
+	fd_wfi(ctx, ring);
+	OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
+	OUT_RELOCW(ring, ctx->query_bo, offset, 0, 0);
+}
+
+void
+fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
+		enum fd_render_stage stage)
+{
+	/* special case: internal blits (like mipmap level generation)
+	 * go through normal draw path (via util_blitter_blit()).. but
+	 * we need to ignore the FD_STAGE_DRAW which will be set, so we
+	 * don't enable queries which should be paused during internal
+	 * blits:
+	 */
+	if ((ctx->stage == FD_STAGE_BLIT) &&
+			(stage != FD_STAGE_NULL))
+		return;
+
+	if (stage != ctx->stage) {
+		struct fd_hw_query *hq;
+		LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) {
+			bool was_active = is_active(hq, ctx->stage);
+			bool now_active = is_active(hq, stage);
+
+			if (now_active && !was_active)
+				resume_query(ctx, hq, ring);
+			else if (was_active && !now_active)
+				pause_query(ctx, hq, ring);
+		}
+	}
+	clear_sample_cache(ctx);
+	ctx->stage = stage;
+}
+
+void
+fd_hw_query_register_provider(struct pipe_context *pctx,
+		const struct fd_hw_sample_provider *provider)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	int idx = pidx(provider->query_type);
+
+	assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
+	assert(!ctx->sample_providers[idx]);
+
+	ctx->sample_providers[idx] = provider;
+}
+
+void
+fd_hw_query_init(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
+			16, UTIL_SLAB_SINGLETHREADED);
+	util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
+			16, UTIL_SLAB_SINGLETHREADED);
+	list_inithead(&ctx->active_queries);
+	list_inithead(&ctx->current_queries);
+}
+
+void
+fd_hw_query_fini(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	util_slab_destroy(&ctx->sample_pool);
+	util_slab_destroy(&ctx->sample_period_pool);
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h
new file mode 100644
index 0000000..62baa3a
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h
@@ -0,0 +1,164 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_QUERY_HW_H_
+#define FREEDRENO_QUERY_HW_H_
+
+#include "util/u_double_list.h"
+
+#include "freedreno_query.h"
+#include "freedreno_context.h"
+
+
+/*
+ * HW Queries:
+ *
+ * See: https://github.com/freedreno/freedreno/wiki/Queries#hardware-queries
+ *
+ * Hardware queries will be specific to gpu generation, but they need
+ * some common infrastructure for triggering start/stop samples at
+ * various points (for example, to exclude mem2gmem/gmem2mem or clear)
+ * as well as per tile tracking.
+ *
+ * NOTE: in at least some cases hw writes sample values to memory addr
+ * specified in some register.  So we don't really have the option to
+ * just sample the same counter multiple times for multiple different
+ * queries with the same query_type.  So we cache per sample provider
+ * the most recent sample since the last draw.  This way multiple
+ * sample periods for multiple queries can reference the same sample.
+ *
+ * fd_hw_sample_provider:
+ *   - one per query type, registered/implemented by gpu generation
+ *     specific code
+ *   - can construct fd_hw_samples on demand
+ *   - most recent sample (since last draw) cached so multiple
+ *     different queries can ref the same sample
+ *
+ * fd_hw_sample:
+ *   - abstracts one snapshot of counter value(s) across N tiles
+ *   - backing object not allocated until submit time when number
+ *     of samples and number of tiles is known
+ *
+ * fd_hw_sample_period:
+ *   - consists of start and stop sample
+ *   - a query accumulates a list of sample periods
+ *   - the query result is the sum of the sample periods
+ */
+
+struct fd_hw_sample_provider {
+	unsigned query_type;
+
+	/* stages applicable to the query type: */
+	enum fd_render_stage active;
+
+	/* when a new sample is required, emit appropriate cmdstream
+	 * and return a sample object:
+	 */
+	struct fd_hw_sample *(*get_sample)(struct fd_context *ctx,
+			struct fd_ringbuffer *ring);
+
+	/* accumulate the results from specified sample period: */
+	void (*accumulate_result)(struct fd_context *ctx,
+			const void *start, const void *end,
+			union pipe_query_result *result);
+};
+
+struct fd_hw_sample {
+	struct pipe_reference reference;  /* keep this first */
+
+	/* offset and size of the sample are know at the time the
+	 * sample is constructed.
+	 */
+	uint32_t size;
+	uint32_t offset;
+
+	/* backing object, offset/stride/etc are determined not when
+	 * the sample is constructed, but when the batch is submitted.
+	 * This way we can defer allocation until total # of requested
+	 * samples, and total # of tiles, is known.
+	 */
+	struct fd_bo *bo;
+	uint32_t num_tiles;
+	uint32_t tile_stride;
+};
+
+struct fd_hw_sample_period;
+
+struct fd_hw_query {
+	struct fd_query base;
+
+	const struct fd_hw_sample_provider *provider;
+
+	/* list of fd_hw_sample_period in previous submits: */
+	struct list_head periods;
+
+	/* list of fd_hw_sample_period's in current submit: */
+	struct list_head current_periods;
+
+	/* if active and not paused, the current sample period (not
+	 * yet added to current_periods):
+	 */
+	struct fd_hw_sample_period *period;
+
+	struct list_head list;  /* list-node in ctx->active_queries */
+};
+
+static inline struct fd_hw_query *
+fd_hw_query(struct fd_query *q)
+{
+	return (struct fd_hw_query *)q;
+}
+
+struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type);
+/* helper for sample providers: */
+struct fd_hw_sample * fd_hw_sample_init(struct fd_context *ctx, uint32_t size);
+/* don't call directly, use fd_hw_sample_reference() */
+void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp);
+void fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles);
+void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
+		struct fd_ringbuffer *ring);
+void fd_hw_query_set_stage(struct fd_context *ctx,
+		struct fd_ringbuffer *ring, enum fd_render_stage stage);
+void fd_hw_query_register_provider(struct pipe_context *pctx,
+		const struct fd_hw_sample_provider *provider);
+void fd_hw_query_init(struct pipe_context *pctx);
+void fd_hw_query_fini(struct pipe_context *pctx);
+
+static inline void
+fd_hw_sample_reference(struct fd_context *ctx,
+		struct fd_hw_sample **ptr, struct fd_hw_sample *samp)
+{
+	struct fd_hw_sample *old_samp = *ptr;
+
+	if (pipe_reference(&(*ptr)->reference, &samp->reference))
+		__fd_hw_sample_destroy(ctx, old_samp);
+	if (ptr)
+		*ptr = samp;
+}
+
+#endif /* FREEDRENO_QUERY_HW_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index bd8c6cb..289f365 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -36,6 +36,7 @@
 #include "freedreno_screen.h"
 #include "freedreno_surface.h"
 #include "freedreno_context.h"
+#include "freedreno_query_hw.h"
 #include "freedreno_util.h"
 
 #include <errno.h>
@@ -401,7 +402,9 @@ render_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
 	util_blitter_save_fragment_sampler_views(ctx->blitter,
 			ctx->fragtex.num_textures, ctx->fragtex.textures);
 
+	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT);
 	util_blitter_blit(ctx->blitter, info);
+	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
 
 	return true;
 }
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 356f416..b57702c 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -223,11 +223,18 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
 	emit_marker(ring, 6);
 }
 
+/* CP_SCRATCH_REG4 is used to hold base address for query results: */
+#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4
+
 static inline void
 emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
 {
 	extern unsigned marker_cnt;
-	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG0 + scratch_idx, 1);
+	unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;
+	assert(reg != HW_QUERY_BASE_REG);
+	if (reg == HW_QUERY_BASE_REG)
+		return;
+	OUT_PKT0(ring, reg, 1);
 	OUT_RING(ring, ++marker_cnt);
 }
 




More information about the mesa-commit mailing list