[Freedreno] [PATCH] freedreno: pack texture buffer objects in 2d logical space
Ilia Mirkin
imirkin at alum.mit.edu
Sun Sep 4 23:22:48 UTC 2016
This artificially converts a buffer into a 8K x N 2D texture to fetch
texels from. As a result we can access up to 8K x 8K texels on a3xx, and
16K x 16K on a4xx. This could be further expanded into 3D space if
necessary, but 64M should be enough.
We have to check out-of-bounds conditions in the shader since otherwise
we wouldn't be able to prevent a situation where the last line of the
texture covers unallocated pages.
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
The limits we were previous allowing are too small. The spec requires at least 64K.
src/gallium/drivers/freedreno/a3xx/fd3_texture.c | 15 +++++---
src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 13 +++----
src/gallium/drivers/freedreno/freedreno_screen.c | 7 ++--
.../drivers/freedreno/ir3/ir3_compiler_nir.c | 40 ++++++++++++++++++++--
src/gallium/drivers/freedreno/ir3/ir3_shader.c | 32 +++++++++++++++++
src/gallium/drivers/freedreno/ir3/ir3_shader.h | 7 +++-
6 files changed, 94 insertions(+), 20 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index 94caaed..875bd49 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -194,10 +194,10 @@ tex_type(unsigned target)
switch (target) {
default:
assert(0);
- case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return A3XX_TEX_1D;
+ case PIPE_BUFFER:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
@@ -238,11 +238,16 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
if (prsc->target == PIPE_BUFFER) {
+ unsigned elements =
+ cso->u.buf.size / util_format_get_blocksize(cso->format);
lvl = 0;
so->texconst1 =
A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
- A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size / util_format_get_blocksize(cso->format)) |
- A3XX_TEX_CONST_1_HEIGHT(1);
+ A3XX_TEX_CONST_1_WIDTH(MIN2(elements, 8192)) |
+ A3XX_TEX_CONST_1_HEIGHT(DIV_ROUND_UP(elements, 8192));
+ so->texconst2 =
+ A3XX_TEX_CONST_2_PITCH(MIN2(elements, 8192) *
+ util_format_get_blocksize(cso->format));
} else {
unsigned miplevels;
@@ -254,10 +259,10 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ so->texconst2 =
+ A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
}
/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
- so->texconst2 =
- A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
switch (prsc->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 4faecee..06645ca 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -195,10 +195,10 @@ tex_type(unsigned target)
switch (target) {
default:
assert(0);
- case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return A4XX_TEX_1D;
+ case PIPE_BUFFER:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
@@ -249,15 +249,16 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
}
if (cso->target == PIPE_BUFFER) {
- unsigned elements = cso->u.buf.size / util_format_get_blocksize(cso->format);
-
+ unsigned elements =
+ cso->u.buf.size / util_format_get_blocksize(cso->format);
lvl = 0;
so->texconst1 =
- A4XX_TEX_CONST_1_WIDTH(elements) |
- A4XX_TEX_CONST_1_HEIGHT(1);
+ A4XX_TEX_CONST_1_WIDTH(MIN2(elements, 16384)) |
+ A4XX_TEX_CONST_1_HEIGHT(DIV_ROUND_UP(elements, 16384));
so->texconst2 =
A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
- A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
+ A4XX_TEX_CONST_2_PITCH(MIN2(elements, 16384) *
+ util_format_get_blocksize(cso->format));
so->offset = cso->u.buf.offset;
} else {
unsigned miplevels;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index fbdd1e2..c67e2c0 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -218,11 +218,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
if (is_a4xx(screen)) return 32;
return 0;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- /* We could possibly emulate more by pretending 2d/rect textures and
- * splitting high bits of index into 2nd dimension..
- */
- if (is_a3xx(screen)) return 8192;
- if (is_a4xx(screen)) return 16384;
+ if (is_a3xx(screen)) return 8192 * 8192;
+ if (is_a4xx(screen)) return 16384 * 16384;
return 0;
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index b1b9d6b..4a5a5f6 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -113,6 +113,9 @@ struct ir3_compile {
unsigned max_texture_index;
+ /* a3xx and a4xx have different max texture sizes */
+ uint8_t max_texture_size_log2;
+
/* set if we encounter something we can't handle yet, so we
* can bail cleanly and fallback to TGSI compiler f/e
*/
@@ -136,6 +139,7 @@ compile_init(struct ir3_compiler *compiler,
ctx->levels_add_one = false;
ctx->unminify_coords = false;
ctx->array_index_add_half = true;
+ ctx->max_texture_size_log2 = 14;
if (so->type == SHADER_VERTEX)
ctx->astc_srgb = so->key.vastc_srgb;
@@ -148,6 +152,7 @@ compile_init(struct ir3_compiler *compiler,
ctx->levels_add_one = true;
ctx->unminify_coords = true;
ctx->array_index_add_half = false;
+ ctx->max_texture_size_log2 = 13;
}
ctx->compiler = compiler;
@@ -187,6 +192,7 @@ compile_init(struct ir3_compiler *compiler,
*
* num_uniform * vec4 - user consts
* 4 * vec4 - UBO addresses
+ * 4 * vec4 - TBO lengths
* if (vertex shader) {
* N * vec4 - driver params (IR3_DP_*)
* 1 * vec4 - stream-out addresses
@@ -199,6 +205,9 @@ compile_init(struct ir3_compiler *compiler,
/* reserve 4 (vec4) slots for ubo base addresses: */
so->first_immediate += 4;
+ /* reserve 4 (vec4) slots for tbo lengths: */
+ so->first_immediate += 4;
+
if (so->type == SHADER_VERTEX) {
/* driver params (see ir3_driver_param): */
so->first_immediate += IR3_DP_COUNT/4; /* convert to vec4 */
@@ -1340,6 +1349,14 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
*coordsp = coords;
}
+/* fetch the number of elements in the TBO: */
+static struct ir3_instruction *
+tex_tbo_length(struct ir3_compile *ctx, unsigned tex_idx)
+{
+ unsigned tbo = regid(ctx->so->first_driver_param + IR3_TBOS_OFF, 0);
+ return create_uniform(ctx, tbo + tex_idx);
+}
+
static void
emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
{
@@ -1414,6 +1431,8 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
return;
}
+ unsigned tex_idx = tex->texture_index;
+
tex_info(tex, &flags, &coords);
/*
@@ -1440,7 +1459,24 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
src0[i] = ir3_SHL_B(b, src0[i], 0, lod, 0);
}
- if (coords == 1) {
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+ /* need to clamp the coordinate to the number of elements manually */
+ struct ir3_instruction *elements = tex_tbo_length(ctx, tex_idx);
+ struct ir3_instruction *cond =
+ ir3_CMPS_U(ctx->block, coord[0], 0, elements, 0);
+ cond->cat2.condition = IR3_COND_LT;
+
+ src0[0] = ir3_AND_B(b, coord[0], 0,
+ create_immed(b, (1 << ctx->max_texture_size_log2) - 1), 0);
+ src0[nsrc0++] = ir3_SHR_B(b, coord[0], 0,
+ create_immed(b, ctx->max_texture_size_log2), 0);
+
+ /* If the coordinate is out of range, set it to -1 */
+ src0[0] = ir3_SEL_B32(b, src0[0], 0, cond, 0, create_immed(b, ~0U), 0);
+ src0[1] = ir3_SEL_B32(b, src0[1], 0, cond, 0, create_immed(b, ~0U), 0);
+
+ ctx->so->has_tbo = true;
+ } else if (coords == 1) {
/* hw doesn't do 1d, so we treat it as 2d with
* height of 1, and patch up the y coord.
* TODO: y coord should be (int)0 in some cases..
@@ -1518,8 +1554,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
if (opc == OPC_GETLOD)
type = TYPE_U32;
- unsigned tex_idx = tex->texture_index;
-
ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx);
struct ir3_instruction *col0 = create_collect(b, src0, nsrc0);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 76460d9..f654bef 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -552,6 +552,33 @@ emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
}
}
+/* emit tbo element lengths: */
+static void
+emit_tbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_texture_stateobj *textures)
+{
+ uint32_t offset = v->first_driver_param + IR3_TBOS_OFF;
+ if (v->constlen > offset) {
+ uint32_t params = MIN2(4, v->constlen - offset) * 4;
+ uint32_t lengths[params];
+
+ for (uint32_t i = 0; i < params; i++) {
+ struct pipe_sampler_view *tex = textures->textures[i];
+
+ if (tex && tex->texture && tex->target == PIPE_BUFFER) {
+ lengths[i] =
+ tex->u.buf.size / util_format_get_blocksize(tex->format);
+ } else {
+ lengths[i] = 0;
+ }
+ }
+
+ fd_wfi(ctx->batch, ring);
+ ctx->emit_const(ring, v->type, offset * 4, 0,
+ params, lengths, NULL);
+ }
+}
+
static void
emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
@@ -658,13 +685,16 @@ ir3_emit_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
{
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
struct fd_constbuf_stateobj *constbuf;
+ struct fd_texture_stateobj *textures;
bool shader_dirty;
if (v->type == SHADER_VERTEX) {
constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX];
+ textures = &ctx->verttex;
shader_dirty = !!(dirty & FD_SHADER_DIRTY_VP);
} else if (v->type == SHADER_FRAGMENT) {
constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT];
+ textures = &ctx->fragtex;
shader_dirty = !!(dirty & FD_SHADER_DIRTY_FP);
} else {
unreachable("bad shader type");
@@ -673,6 +703,8 @@ ir3_emit_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
emit_user_consts(ctx, v, ring, constbuf);
emit_ubos(ctx, v, ring, constbuf);
+ if (v->has_tbo)
+ emit_tbos(ctx, v, ring, textures);
if (shader_dirty)
emit_immediates(ctx, v, ring);
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 8c9483e..da8996c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -50,6 +50,7 @@ enum ir3_driver_param {
*
* num_uniform * vec4 - user consts
* 4 * vec4 - UBO addresses
+ * 4 * vec4 - TBO lengths
* if (vertex shader) {
* N * vec4 - driver params (IR3_DP_*)
* 1 * vec4 - stream-out addresses
@@ -59,7 +60,8 @@ enum ir3_driver_param {
* that we don't need..
*/
#define IR3_UBOS_OFF 0 /* UBOs after user consts */
-#define IR3_DRIVER_PARAM_OFF 4 /* driver params after UBOs */
+#define IR3_TBOS_OFF 4 /* TBO lengths after UBOs */
+#define IR3_DRIVER_PARAM_OFF 8 /* driver params after TBO lengths */
#define IR3_TFBOS_OFF (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
/* Configuration key used to identify a shader variant.. different
@@ -213,6 +215,9 @@ struct ir3_shader_variant {
/* do we have one or more texture sample instructions: */
bool has_samp;
+ /* do we have texture buffer accesses: */
+ bool has_tbo;
+
/* do we have kill instructions: */
bool has_kill;
--
2.7.3
More information about the Freedreno
mailing list