Mesa (master): freedreno/a6xx: pre-calculate expected vsc stream sizes

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Apr 28 23:50:36 UTC 2020


Module: Mesa
Branch: master
Commit: f561e516c8a01993ea83f5d48e0126d0b7b6528b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f561e516c8a01993ea83f5d48e0126d0b7b6528b

Author: Rob Clark <robdclark at chromium.org>
Date:   Sat Apr 25 12:16:35 2020 -0700

freedreno/a6xx: pre-calculate expected vsc stream sizes

We should only rely on overflow detection for indirect draws, where we
have no other option.

This doesn't use quite the worst-possible-case sizes, which in practice
seem to be ~20x larger than what is required.  But instead uses roughly
half of that.

Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4750>

---

 src/gallium/drivers/freedreno/Makefile.sources  |   2 +
 src/gallium/drivers/freedreno/a6xx/fd6_draw.c   |   4 +
 src/gallium/drivers/freedreno/a6xx/fd6_gmem.c   |  21 ++++
 src/gallium/drivers/freedreno/a6xx/fd6_vsc.c    | 160 ++++++++++++++++++++++++
 src/gallium/drivers/freedreno/a6xx/fd6_vsc.h    |  29 +++++
 src/gallium/drivers/freedreno/freedreno_batch.c |   3 +
 src/gallium/drivers/freedreno/freedreno_batch.h |   9 +-
 src/gallium/drivers/freedreno/meson.build       |   2 +
 8 files changed, 229 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index 0fd7bcf5ddd..0268bb11a55 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -195,6 +195,8 @@ a6xx_SOURCES := \
 	a6xx/fd6_screen.h \
 	a6xx/fd6_texture.c \
 	a6xx/fd6_texture.h \
+	a6xx/fd6_vsc.c \
+	a6xx/fd6_vsc.h \
 	a6xx/fd6_zsa.c \
 	a6xx/fd6_zsa.h
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
index 25d81018ccc..f8ad3be75c8 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
@@ -38,6 +38,7 @@
 #include "fd6_emit.h"
 #include "fd6_program.h"
 #include "fd6_format.h"
+#include "fd6_vsc.h"
 #include "fd6_zsa.h"
 
 static void
@@ -192,6 +193,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
 	if (emit.key.gs)
 		emit.key.key.has_gs = true;
 
+	if (!(emit.key.hs || emit.key.ds || emit.key.gs || info->indirect))
+		fd6_vsc_update_sizes(ctx->batch, info);
+
 	fixup_shader_state(ctx, &emit.key.key);
 
 	if (!(ctx->dirty & FD_DIRTY_PROG)) {
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
index 3ee55f98adf..befe19aadfa 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
@@ -326,6 +326,27 @@ update_vsc_pipe(struct fd_batch *batch)
 	struct fd_ringbuffer *ring = batch->gmem;
 	int i;
 
+	if (batch->draw_strm_bits/8 > fd6_ctx->vsc_draw_strm_pitch) {
+		if (fd6_ctx->vsc_draw_strm)
+			fd_bo_del(fd6_ctx->vsc_draw_strm);
+		fd6_ctx->vsc_draw_strm = NULL;
+		/* Note: probably only need to align to 0x40, but aligning stronger
+		 * reduces the odds that we will have to realloc again on the next
+		 * frame:
+		 */
+		fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits/8, 0x4000);
+		debug_printf("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x\n",
+				fd6_ctx->vsc_draw_strm_pitch);
+	}
+
+	if (batch->prim_strm_bits/8 > fd6_ctx->vsc_prim_strm_pitch) {
+		if (fd6_ctx->vsc_prim_strm)
+			fd_bo_del(fd6_ctx->vsc_prim_strm);
+		fd6_ctx->vsc_prim_strm = NULL;
+		fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits/8, 0x4000);
+		debug_printf("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x\n",
+				fd6_ctx->vsc_prim_strm_pitch);
+	}
 
 	if (!fd6_ctx->vsc_draw_strm) {
 		fd6_ctx->vsc_draw_strm = fd_bo_new(ctx->screen->dev,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c
new file mode 100644
index 00000000000..daf97fe48ac
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_prim.h"
+
+#include "freedreno_batch.h"
+#include "freedreno_gmem.h"
+
+#include "fd6_vsc.h"
+
+/*
+ * Helper util to update expected vsc draw and primitive stream sizes, see
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format
+ */
+
+enum {
+	byte = 8,
+	dword = 4 * byte,
+} bits_per;
+
+/**
+ * Determine # of bits required to store a given number, see
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#numbers
+ */
+static unsigned
+number_size_bits(unsigned nr)
+{
+	unsigned n = util_last_bit(nr);
+	assert(n);  /* encoding 0 is not possible */
+	return n + (n - 1);
+}
+
+/**
+ * Determine # of bits requred to store a given bitfield, see
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#bitfields
+ */
+static unsigned
+bitfield_size_bits(unsigned n)
+{
+	return n + 1;  /* worst case is always 1 + nr of bits */
+}
+
+static unsigned
+prim_count(const struct pipe_draw_info *info)
+{
+	/* PIPE_PRIM_MAX used internally for RECTLIST blits on 3d pipe: */
+	unsigned vtx_per_prim = (info->mode == PIPE_PRIM_MAX) ? 2 :
+			u_vertices_per_prim(info->mode);
+	return (info->count * info->instance_count) / vtx_per_prim;
+}
+
+/**
+ * The primitive stream uses a run-length encoding, where each packet contains a
+ * bitfield of bins covered and then the number of primitives which have the same
+ * bitfield. Each packet consists of the following, in order:
+ *
+ *  - The (compressed) bitfield of bins covered
+ *  - The number of primitives with this bitset
+ *  - Checksum
+ *
+ * The worst case would be that each primitive has a different bitmask.  In practice,
+ * assuming ever other primitive has a different bitmask still gets us conservatively
+ * large primitive stream sizes.  (Ie. 10x what is needed, vs. 20x)
+ *
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#primitive-streams
+ */
+static unsigned
+primitive_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins)
+{
+	unsigned num_prims = prim_count(info);
+	unsigned nbits =
+			(bitfield_size_bits(num_bins)   /* bitfield of bins covered */
+			+ number_size_bits(1)           /* number of primitives with this bitset */
+			+ 1                             /* checksum */
+			) * DIV_ROUND_UP(num_prims, 2);
+	return align(nbits, dword);
+}
+
+/**
+ * Each draw stream packet contains the following:
+ *
+ *  - Bin bitfield
+ *  - Last instance bit
+ *  - If bitfield is empty, the number of draws it is empty for, otherwise
+ *    the size of the corresponding primitive stream in DWORD's.
+ *  - Checksum
+ *
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#draw-streams
+ */
+static unsigned
+draw_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins,
+		unsigned prim_strm_bits)
+{
+	unsigned ndwords = prim_strm_bits / dword;
+	assert(info->instance_count > 0);
+	return (bitfield_size_bits(num_bins)    /* bitfield of bins */
+			+ 1                             /* last-instance-bit */
+			+ number_size_bits(ndwords)     /* size of corresponding prim strm */
+			+ 1                             /* checksum */
+			) * info->instance_count;
+}
+
+void
+fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info)
+{
+	if (!batch->num_bins_per_pipe) {
+		batch->num_bins_per_pipe = fd_gmem_estimate_bins_per_pipe(batch);
+
+		/* This is a convenient spot to add the size of the final draw-
+		 * stream packet:
+		 *
+		 * If there are N bins, the final packet, after all the draws are
+		 * done, consists of a 1 followed by N + 17 0's, plus a final 1.
+		 * This uses the otherwise-unused pattern of a non-empty bitfield
+		 * (initial 1) that is nontheless empty (has all 0's)
+		 */
+		unsigned final_pkt_sz = 1 + batch->num_bins_per_pipe + 17 + 1;
+		batch->prim_strm_bits = align(final_pkt_sz, dword);
+	}
+
+	unsigned prim_strm_bits =
+		primitive_stream_size_bits(info, batch->num_bins_per_pipe);
+	unsigned draw_strm_bits =
+		draw_stream_size_bits(info, batch->num_bins_per_pipe, prim_strm_bits);
+
+#if 0
+	printf("vsc: prim_strm_bits=%d, draw_strm_bits=%d, nb=%u, ic=%u, c=%u, pc=%u (%s)\n",
+			prim_strm_bits, draw_strm_bits, batch->num_bins_per_pipe,
+			info->instance_count, info->count,
+			(info->count * info->instance_count) /
+			u_vertices_per_prim(info->mode),
+			u_prim_name(info->mode));
+#endif
+
+	batch->prim_strm_bits += prim_strm_bits;
+	batch->draw_strm_bits += draw_strm_bits;
+}
+
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h
new file mode 100644
index 00000000000..50470f683e0
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef FD6_VSC_H_
+#define FD6_VSC_H_
+
+void fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info);
+
+#endif /* FD6_VSC_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index 106959b4fed..082329eb59c 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -84,6 +84,9 @@ batch_init(struct fd_batch *batch)
 	batch->gmem_reason = 0;
 	batch->num_draws = 0;
 	batch->num_vertices = 0;
+	batch->num_bins_per_pipe = 0;
+	batch->prim_strm_bits = 0;
+	batch->draw_strm_bits = 0;
 	batch->stage = FD_STAGE_NULL;
 
 	fd_reset_wfi(batch);
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h
index 9266790bb80..479d78d5eca 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -129,9 +129,16 @@ struct fd_batch {
 	 */
 	const struct fd_gmem_stateobj *gmem_state;
 
-	unsigned num_draws;   /* number of draws in current batch */
+	unsigned num_draws;      /* number of draws in current batch */
 	unsigned num_vertices;   /* number of vertices in current batch */
 
+	/* Currently only used on a6xx, to calculate vsc prim/draw stream
+	 * sizes:
+	 */
+	unsigned num_bins_per_pipe;
+	unsigned prim_strm_bits;
+	unsigned draw_strm_bits;
+
 	/* Track the maximal bounds of the scissor of all the draws within a
 	 * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
 	 * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build
index a5c0935e07e..919baf070de 100644
--- a/src/gallium/drivers/freedreno/meson.build
+++ b/src/gallium/drivers/freedreno/meson.build
@@ -205,6 +205,8 @@ files_libfreedreno = files(
   'a6xx/fd6_screen.h',
   'a6xx/fd6_texture.c',
   'a6xx/fd6_texture.h',
+  'a6xx/fd6_vsc.c',
+  'a6xx/fd6_vsc.h',
   'a6xx/fd6_zsa.c',
   'a6xx/fd6_zsa.h',
   'ir3/ir3_cache.c',



More information about the mesa-commit mailing list