[Mesa-dev] [PATCH] freedreno: pack texture buffer objects in 2d logical space

Ilia Mirkin imirkin at alum.mit.edu
Sun Sep 4 23:22:48 UTC 2016


This artificially converts a buffer into a 8K x N 2D texture to fetch
texels from. As a result we can access up to 8K x 8K texels on a3xx, and
16K x 16K on a4xx. This could be further expanded into 3D space if
necessary, but 64M should be enough.

We have to check out-of-bounds conditions in the shader since otherwise
we wouldn't be able to prevent a situation where the last line of the
texture covers unallocated pages.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---

The limits we were previous allowing are too small. The spec requires at least 64K.

 src/gallium/drivers/freedreno/a3xx/fd3_texture.c   | 15 +++++---
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c   | 13 +++----
 src/gallium/drivers/freedreno/freedreno_screen.c   |  7 ++--
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       | 40 ++++++++++++++++++++--
 src/gallium/drivers/freedreno/ir3/ir3_shader.c     | 32 +++++++++++++++++
 src/gallium/drivers/freedreno/ir3/ir3_shader.h     |  7 +++-
 6 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index 94caaed..875bd49 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -194,10 +194,10 @@ tex_type(unsigned target)
 	switch (target) {
 	default:
 		assert(0);
-	case PIPE_BUFFER:
 	case PIPE_TEXTURE_1D:
 	case PIPE_TEXTURE_1D_ARRAY:
 		return A3XX_TEX_1D;
+	case PIPE_BUFFER:
 	case PIPE_TEXTURE_RECT:
 	case PIPE_TEXTURE_2D:
 	case PIPE_TEXTURE_2D_ARRAY:
@@ -238,11 +238,16 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 		so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
 
 	if (prsc->target == PIPE_BUFFER) {
+		unsigned elements =
+			cso->u.buf.size / util_format_get_blocksize(cso->format);
 		lvl = 0;
 		so->texconst1 =
 			A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
-			A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size / util_format_get_blocksize(cso->format)) |
-			A3XX_TEX_CONST_1_HEIGHT(1);
+			A3XX_TEX_CONST_1_WIDTH(MIN2(elements, 8192)) |
+			A3XX_TEX_CONST_1_HEIGHT(DIV_ROUND_UP(elements, 8192));
+		so->texconst2 =
+			A3XX_TEX_CONST_2_PITCH(MIN2(elements, 8192) *
+								   util_format_get_blocksize(cso->format));
 	} else {
 		unsigned miplevels;
 
@@ -254,10 +259,10 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 			A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
 			A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
 			A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+		so->texconst2 =
+			A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
 	}
 	/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
-	so->texconst2 =
-			A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
 	switch (prsc->target) {
 	case PIPE_TEXTURE_1D_ARRAY:
 	case PIPE_TEXTURE_2D_ARRAY:
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 4faecee..06645ca 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -195,10 +195,10 @@ tex_type(unsigned target)
 	switch (target) {
 	default:
 		assert(0);
-	case PIPE_BUFFER:
 	case PIPE_TEXTURE_1D:
 	case PIPE_TEXTURE_1D_ARRAY:
 		return A4XX_TEX_1D;
+	case PIPE_BUFFER:
 	case PIPE_TEXTURE_RECT:
 	case PIPE_TEXTURE_2D:
 	case PIPE_TEXTURE_2D_ARRAY:
@@ -249,15 +249,16 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	}
 
 	if (cso->target == PIPE_BUFFER) {
-		unsigned elements = cso->u.buf.size / util_format_get_blocksize(cso->format);
-
+		unsigned elements =
+			cso->u.buf.size / util_format_get_blocksize(cso->format);
 		lvl = 0;
 		so->texconst1 =
-			A4XX_TEX_CONST_1_WIDTH(elements) |
-			A4XX_TEX_CONST_1_HEIGHT(1);
+			A4XX_TEX_CONST_1_WIDTH(MIN2(elements, 16384)) |
+			A4XX_TEX_CONST_1_HEIGHT(DIV_ROUND_UP(elements, 16384));
 		so->texconst2 =
 			A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
-			A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
+			A4XX_TEX_CONST_2_PITCH(MIN2(elements, 16384) *
+								   util_format_get_blocksize(cso->format));
 		so->offset = cso->u.buf.offset;
 	} else {
 		unsigned miplevels;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index fbdd1e2..c67e2c0 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -218,11 +218,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 		if (is_a4xx(screen)) return 32;
 		return 0;
 	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-		/* We could possibly emulate more by pretending 2d/rect textures and
-		 * splitting high bits of index into 2nd dimension..
-		 */
-		if (is_a3xx(screen)) return 8192;
-		if (is_a4xx(screen)) return 16384;
+		if (is_a3xx(screen)) return 8192 * 8192;
+		if (is_a4xx(screen)) return 16384 * 16384;
 		return 0;
 
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index b1b9d6b..4a5a5f6 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -113,6 +113,9 @@ struct ir3_compile {
 
 	unsigned max_texture_index;
 
+	/* a3xx and a4xx have different max texture sizes */
+	uint8_t max_texture_size_log2;
+
 	/* set if we encounter something we can't handle yet, so we
 	 * can bail cleanly and fallback to TGSI compiler f/e
 	 */
@@ -136,6 +139,7 @@ compile_init(struct ir3_compiler *compiler,
 		ctx->levels_add_one = false;
 		ctx->unminify_coords = false;
 		ctx->array_index_add_half = true;
+		ctx->max_texture_size_log2 = 14;
 
 		if (so->type == SHADER_VERTEX)
 			ctx->astc_srgb = so->key.vastc_srgb;
@@ -148,6 +152,7 @@ compile_init(struct ir3_compiler *compiler,
 		ctx->levels_add_one = true;
 		ctx->unminify_coords = true;
 		ctx->array_index_add_half = false;
+		ctx->max_texture_size_log2 = 13;
 	}
 
 	ctx->compiler = compiler;
@@ -187,6 +192,7 @@ compile_init(struct ir3_compiler *compiler,
 	 *
 	 *    num_uniform * vec4  -  user consts
 	 *    4 * vec4            -  UBO addresses
+	 *    4 * vec4            -  TBO lengths
 	 *    if (vertex shader) {
 	 *        N * vec4        -  driver params (IR3_DP_*)
 	 *        1 * vec4        -  stream-out addresses
@@ -199,6 +205,9 @@ compile_init(struct ir3_compiler *compiler,
 	/* reserve 4 (vec4) slots for ubo base addresses: */
 	so->first_immediate += 4;
 
+	/* reserve 4 (vec4) slots for tbo lengths: */
+	so->first_immediate += 4;
+
 	if (so->type == SHADER_VERTEX) {
 		/* driver params (see ir3_driver_param): */
 		so->first_immediate += IR3_DP_COUNT/4;  /* convert to vec4 */
@@ -1340,6 +1349,14 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
 	*coordsp = coords;
 }
 
+/* fetch the number of elements in the TBO: */
+static struct ir3_instruction *
+tex_tbo_length(struct ir3_compile *ctx, unsigned tex_idx)
+{
+	unsigned tbo = regid(ctx->so->first_driver_param + IR3_TBOS_OFF, 0);
+	return create_uniform(ctx, tbo + tex_idx);
+}
+
 static void
 emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 {
@@ -1414,6 +1431,8 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 		return;
 	}
 
+	unsigned tex_idx = tex->texture_index;
+
 	tex_info(tex, &flags, &coords);
 
 	/*
@@ -1440,7 +1459,24 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 			src0[i] = ir3_SHL_B(b, src0[i], 0, lod, 0);
 	}
 
-	if (coords == 1) {
+	if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+		/* need to clamp the coordinate to the number of elements manually */
+		struct ir3_instruction *elements = tex_tbo_length(ctx, tex_idx);
+		struct ir3_instruction *cond =
+			ir3_CMPS_U(ctx->block, coord[0], 0, elements, 0);
+		cond->cat2.condition = IR3_COND_LT;
+
+		src0[0] = ir3_AND_B(b, coord[0], 0,
+							create_immed(b, (1 << ctx->max_texture_size_log2) - 1), 0);
+		src0[nsrc0++] = ir3_SHR_B(b, coord[0], 0,
+								  create_immed(b, ctx->max_texture_size_log2), 0);
+
+		/* If the coordinate is out of range, set it to -1 */
+		src0[0] = ir3_SEL_B32(b, src0[0], 0, cond, 0, create_immed(b, ~0U), 0);
+		src0[1] = ir3_SEL_B32(b, src0[1], 0, cond, 0, create_immed(b, ~0U), 0);
+
+		ctx->so->has_tbo = true;
+	} else if (coords == 1) {
 		/* hw doesn't do 1d, so we treat it as 2d with
 		 * height of 1, and patch up the y coord.
 		 * TODO: y coord should be (int)0 in some cases..
@@ -1518,8 +1554,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 	if (opc == OPC_GETLOD)
 		type = TYPE_U32;
 
-	unsigned tex_idx = tex->texture_index;
-
 	ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx);
 
 	struct ir3_instruction *col0 = create_collect(b, src0, nsrc0);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 76460d9..f654bef 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -552,6 +552,33 @@ emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
 	}
 }
 
+/* emit tbo element lengths: */
+static void
+emit_tbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
+		struct fd_ringbuffer *ring, struct fd_texture_stateobj *textures)
+{
+	uint32_t offset = v->first_driver_param + IR3_TBOS_OFF;
+	if (v->constlen > offset) {
+		uint32_t params = MIN2(4, v->constlen - offset) * 4;
+		uint32_t lengths[params];
+
+		for (uint32_t i = 0; i < params; i++) {
+			struct pipe_sampler_view *tex = textures->textures[i];
+
+			if (tex && tex->texture && tex->target == PIPE_BUFFER) {
+				lengths[i] =
+					tex->u.buf.size / util_format_get_blocksize(tex->format);
+			} else {
+				lengths[i] = 0;
+			}
+		}
+
+		fd_wfi(ctx->batch, ring);
+		ctx->emit_const(ring, v->type, offset * 4, 0,
+						params, lengths, NULL);
+	}
+}
+
 static void
 emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
 		struct fd_ringbuffer *ring)
@@ -658,13 +685,16 @@ ir3_emit_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
 {
 	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
 		struct fd_constbuf_stateobj *constbuf;
+		struct fd_texture_stateobj *textures;
 		bool shader_dirty;
 
 		if (v->type == SHADER_VERTEX) {
 			constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX];
+			textures = &ctx->verttex;
 			shader_dirty = !!(dirty & FD_SHADER_DIRTY_VP);
 		} else if (v->type == SHADER_FRAGMENT) {
 			constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT];
+			textures = &ctx->fragtex;
 			shader_dirty = !!(dirty & FD_SHADER_DIRTY_FP);
 		} else {
 			unreachable("bad shader type");
@@ -673,6 +703,8 @@ ir3_emit_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
 
 		emit_user_consts(ctx, v, ring, constbuf);
 		emit_ubos(ctx, v, ring, constbuf);
+		if (v->has_tbo)
+			emit_tbos(ctx, v, ring, textures);
 		if (shader_dirty)
 			emit_immediates(ctx, v, ring);
 	}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 8c9483e..da8996c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -50,6 +50,7 @@ enum ir3_driver_param {
  *
  *    num_uniform * vec4  -  user consts
  *    4 * vec4            -  UBO addresses
+ *    4 * vec4            -  TBO lengths
  *    if (vertex shader) {
  *        N * vec4        -  driver params (IR3_DP_*)
  *        1 * vec4        -  stream-out addresses
@@ -59,7 +60,8 @@ enum ir3_driver_param {
  * that we don't need..
  */
 #define IR3_UBOS_OFF         0  /* UBOs after user consts */
-#define IR3_DRIVER_PARAM_OFF 4  /* driver params after UBOs */
+#define IR3_TBOS_OFF         4  /* TBO lengths after UBOs */
+#define IR3_DRIVER_PARAM_OFF 8  /* driver params after TBO lengths */
 #define IR3_TFBOS_OFF       (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
 
 /* Configuration key used to identify a shader variant.. different
@@ -213,6 +215,9 @@ struct ir3_shader_variant {
 	/* do we have one or more texture sample instructions: */
 	bool has_samp;
 
+	/* do we have texture buffer accesses: */
+	bool has_tbo;
+
 	/* do we have kill instructions: */
 	bool has_kill;
 
-- 
2.7.3



More information about the mesa-dev mailing list