[Mesa-dev] [PATCH] panfrost: Backport driver to Mali T600/T700
Alyssa Rosenzweig
alyssa at rosenzweig.io
Thu Feb 14 01:58:21 UTC 2019
There are a few differenes between Mali T860 (Panfrost's primary
reference target) and the older Midgard generations (T600/T700):
- Miscellaneous different magic numbers. It's not clear what these
numbers mean on either the old or new configurations yet.
- Errata fixes. T800 is the final Midgard generation and presumably the
least buggy. Older Midgard has some extra hardware errata we have to
workaround.
- SFBD vs MFBD split. Essentially, older Midgard use a Single
FrameBuffer Descriptor (SFBD), which corresponds to single
render-target rendering. Newer Midgard (T760+) use a Multiple
FrameBuffer Descriptor (MFBD), allowing multiple RTs. On ES 2.0, these
descriptors serve the same function, but we implement both, depending on
the version of the hardware.
- CPU bitness. 32-bit systems generally use 32-bit GPU descriptors, and
vice versa for 64-bit. Our target T760 systems are 32-bit whereas our
target T860 systems are 64-bit. More work is needed in this area.
This patch fixes support in these areas for supporting older Midgard
hardware. It is tested on Mali T760 and Mali T860.
Signed-off-by: Alyssa Rosenzweig <alyssa at rosenzweig.io>
---
.../drivers/panfrost/include/panfrost-job.h | 21 +-
src/gallium/drivers/panfrost/meson.build | 1 +
src/gallium/drivers/panfrost/pan_assemble.c | 4 +-
src/gallium/drivers/panfrost/pan_blending.c | 4 +-
src/gallium/drivers/panfrost/pan_context.c | 541 ++++++++++--------
src/gallium/drivers/panfrost/pan_context.h | 31 +-
6 files changed, 340 insertions(+), 262 deletions(-)
diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h b/src/gallium/drivers/panfrost/include/panfrost-job.h
index dbb5486bfa4..3b51fcfddef 100644
--- a/src/gallium/drivers/panfrost/include/panfrost-job.h
+++ b/src/gallium/drivers/panfrost/include/panfrost-job.h
@@ -30,8 +30,6 @@
#include <stdint.h>
#include <panfrost-misc.h>
-#define T8XX
-
#define MALI_SHORT_PTR_BITS (sizeof(uintptr_t)*8)
#define MALI_FBD_HIERARCHY_WEIGHTS 8
@@ -120,7 +118,7 @@ enum mali_alt_func {
#define MALI_HAS_MSAA (1 << 0)
#define MALI_CAN_DISCARD (1 << 5)
-/* Applies on T6XX, specifying that programmable blending is in use */
+/* Applies on SFBD systems, specifying that programmable blending is in use */
#define MALI_HAS_BLEND_SHADER (1 << 6)
/* func is mali_func */
@@ -404,7 +402,7 @@ enum mali_format {
#define MALI_NO_ALPHA_TO_COVERAGE (1 << 10)
struct mali_blend_meta {
-#ifdef T8XX
+#ifndef BIFROST
/* Base value of 0x200.
* OR with 0x1 for blending (anything other than REPLACE).
* OR with 0x2 for programmable blending
@@ -995,7 +993,7 @@ struct mali_vertex_tiler_postfix {
mali_ptr framebuffer;
#ifdef __LP64__
-#ifndef T8XX
+#ifdef BIFROST
/* most likely padding to make this a multiple of 64 bytes */
u64 zero7;
#endif
@@ -1003,29 +1001,26 @@ struct mali_vertex_tiler_postfix {
} __attribute__((packed));
struct midgard_payload_vertex_tiler {
-#ifdef T6XX
+#ifndef __LP64__
union midgard_primitive_size primitive_size;
#endif
struct mali_vertex_tiler_prefix prefix;
-#ifdef T6XX
+#ifndef __LP64__
u32 zero3;
#endif
+
u32 gl_enables; // 0x5
/* Offset for first vertex in buffer */
u32 draw_start;
-#ifdef T6XX
- u32 zero5;
-#else
- u64 zero5;
-#endif
+ uintptr_t zero5;
struct mali_vertex_tiler_postfix postfix;
-#ifdef T8XX
+#ifdef __LP64__
union midgard_primitive_size primitive_size;
#endif
} __attribute__((packed));
diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build
index 5e799eae119..79c1639a3e1 100644
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -63,6 +63,7 @@ nondrm_overlay_check = run_command('ls', overlay)
has_nondrm_overlay = nondrm_overlay_check.returncode() == 0
if has_nondrm_overlay
+ subdir('nondrm/include')
files_panfrost += files('nondrm/pan_nondrm.c')
inc_panfrost += include_directories('nondrm/include')
compile_args_panfrost += '-DPAN_NONDRM_OVERLAY'
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c
index 44136acc18a..4cbbecce0f7 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -96,8 +96,8 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
if (type == JOB_TYPE_VERTEX)
meta->varying_count += 1;
- /* gl_FragCoord does -not- eat an extra spot; it will be included in our count if we need it */
-
+ /* Note: gl_FragCoord does -not- eat an extra spot; it will be included
+ * in our count if we need it */
meta->midgard1.unknown2 = 8; /* XXX */
diff --git a/src/gallium/drivers/panfrost/pan_blending.c b/src/gallium/drivers/panfrost/pan_blending.c
index 058fb6bda84..cecdd780ce1 100644
--- a/src/gallium/drivers/panfrost/pan_blending.c
+++ b/src/gallium/drivers/panfrost/pan_blending.c
@@ -296,7 +296,7 @@ panfrost_make_fixed_blend_part(unsigned func, unsigned src_factor, unsigned dst_
* fixed-function operation breaks down. */
static bool
-panfrost_make_constant(unsigned *factors, unsigned num_factors, const struct pipe_blend_color *blend_color, float *out)
+panfrost_make_constant(unsigned *factors, unsigned num_factors, const struct pipe_blend_color *blend_color, void *out)
{
/* Color components used */
bool cc[4] = { false };
@@ -335,7 +335,7 @@ panfrost_make_constant(unsigned *factors, unsigned num_factors, const struct pip
/* We have the constant -- success! */
- *out = constant;
+ memcpy(out, &constant, sizeof(float));
return true;
}
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index 565e6541b6c..44c0ea3e8f5 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -58,22 +58,35 @@ static int performance_counter_number = 0;
/* TODO: Sample size, etc */
+/* True for t6XX, false for t8xx. TODO: Run-time settable for automatic
+ * hardware configuration. */
+
+static bool is_t6xx = false;
+
+/* If set, we'll require the use of single render-target framebuffer
+ * descriptors (SFBD), for older hardware -- specifically, <T760 hardware, If
+ * false, we'll use the MFBD no matter what. New hardware -does- retain support
+ * for SFBD, and in theory we could flip between them on a per-RT basis, but
+ * there's no real advantage to doing so */
+
+static bool require_sfbd = false;
+
static void
panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled)
{
SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled);
SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled);
-#ifdef SFBD
- SET_BIT(ctx->fragment_fbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled);
-#else
- SET_BIT(ctx->fragment_rts[0].format, MALI_MFBD_FORMAT_MSAA, enabled);
+ if (require_sfbd) {
+ SET_BIT(ctx->fragment_sfbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled);
+ } else {
+ SET_BIT(ctx->fragment_rts[0].format, MALI_MFBD_FORMAT_MSAA, enabled);
- SET_BIT(ctx->fragment_fbd.unk1, (1 << 4) | (1 << 1), enabled);
+ SET_BIT(ctx->fragment_mfbd.unk1, (1 << 4) | (1 << 1), enabled);
- /* XXX */
- ctx->fragment_fbd.rt_count_2 = enabled ? 4 : 1;
-#endif
+ /* XXX */
+ ctx->fragment_mfbd.rt_count_2 = enabled ? 4 : 1;
+ }
}
/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically
@@ -85,7 +98,11 @@ panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled)
static void
panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds)
{
-#ifdef MFBD
+ if (require_sfbd) {
+ printf("AFBC not supported yet on SFBD\n");
+ assert(0);
+ }
+
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_screen *screen = pan_screen(gallium->screen);
/* AFBC metadata is 16 bytes per tile */
@@ -109,10 +126,6 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsr
rsrc->bo->gpu[0] = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1);
rsrc->bo->cpu[0] = rsrc->bo->afbc_slab.cpu;
-#else
- printf("AFBC not supported yet on SFBD\n");
- assert(0);
-#endif
}
static void
@@ -144,6 +157,11 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx)
if (!rsrc->bo->has_afbc)
continue;
+ if (require_sfbd) {
+ fprintf(stderr, "Color AFBC not supported on SFBD\n");
+ assert(0);
+ }
+
/* Enable AFBC for the render target */
ctx->fragment_rts[0].afbc.metadata = rsrc->bo->afbc_slab.gpu;
ctx->fragment_rts[0].afbc.stride = 0;
@@ -163,7 +181,12 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx)
struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture;
if (rsrc->bo->has_afbc) {
- ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA;
+ if (require_sfbd) {
+ fprintf(stderr, "Depth AFBC not supported on SFBD\n");
+ assert(0);
+ }
+
+ ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA;
ctx->fragment_extra.ds_afbc.depth_stencil_afbc_metadata = rsrc->bo->afbc_slab.gpu;
ctx->fragment_extra.ds_afbc.depth_stencil_afbc_stride = 0;
@@ -175,13 +198,18 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx)
ctx->fragment_extra.unk = 0x435; /* General 0x400 in all unks. 0x5 for depth/stencil. 0x10 for AFBC encoded depth stencil. Unclear where the 0x20 is from */
- ctx->fragment_fbd.unk3 |= 0x400;
+ ctx->fragment_mfbd.unk3 |= 0x400;
}
}
/* For the special case of a depth-only FBO, we need to attach a dummy render target */
if (ctx->pipe_framebuffer.nr_cbufs == 0) {
+ if (require_sfbd) {
+ fprintf(stderr, "Depth-only FBO not supported on SFBD\n");
+ assert(0);
+ }
+
ctx->fragment_rts[0].format = 0x80008000;
ctx->fragment_rts[0].framebuffer = 0;
ctx->fragment_rts[0].framebuffer_stride = 0;
@@ -190,7 +218,6 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx)
/* Framebuffer descriptor */
-#ifdef SFBD
static void
panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h)
{
@@ -204,26 +231,30 @@ panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, i
fb->resolution_check = ((w + h) / 3) << 4;
}
-#endif
-static PANFROST_FRAMEBUFFER
-panfrost_emit_fbd(struct panfrost_context *ctx)
+static struct mali_single_framebuffer
+panfrost_emit_sfbd(struct panfrost_context *ctx)
{
-#ifdef SFBD
struct mali_single_framebuffer framebuffer = {
.unknown2 = 0x1f,
.format = 0x30000000,
.clear_flags = 0x1000,
.unknown_address_0 = ctx->scratchpad.gpu,
- .unknown_address_1 = ctx->scratchpad.gpu + 0x6000,
- .unknown_address_2 = ctx->scratchpad.gpu + 0x6200,
+ .unknown_address_1 = ctx->misc_0.gpu,
+ .unknown_address_2 = ctx->misc_0.gpu + 40960,
.tiler_flags = 0xf0,
.tiler_heap_free = ctx->tiler_heap.gpu,
.tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
};
panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height);
-#else
+
+ return framebuffer;
+}
+
+static struct bifrost_framebuffer
+panfrost_emit_mfbd(struct panfrost_context *ctx)
+{
struct bifrost_framebuffer framebuffer = {
.tiler_meta = 0xf00000c600,
@@ -249,8 +280,6 @@ panfrost_emit_fbd(struct panfrost_context *ctx)
.tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
};
-#endif
-
return framebuffer;
}
@@ -297,33 +326,33 @@ panfrost_new_frag_framebuffer(struct panfrost_context *ctx)
stride = -stride;
}
-#ifdef SFBD
- struct mali_single_framebuffer fb = panfrost_emit_fbd(ctx);
-
- fb.framebuffer = framebuffer;
- fb.stride = stride;
+ if (require_sfbd) {
+ struct mali_single_framebuffer fb = panfrost_emit_sfbd(ctx);
- fb.format = 0xb84e0281; /* RGB32, no MSAA */
-#else
- struct bifrost_framebuffer fb = panfrost_emit_fbd(ctx);
+ fb.framebuffer = framebuffer;
+ fb.stride = stride;
- /* XXX: MRT case */
- fb.rt_count_2 = 1;
- fb.unk3 = 0x100;
+ fb.format = 0xb84e0281; /* RGB32, no MSAA */
+ memcpy(&ctx->fragment_sfbd, &fb, sizeof(fb));
+ } else {
+ struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx);
- struct bifrost_render_target rt = {
- .unk1 = 0x4000000,
- .format = 0x860a8899, /* RGBA32, no MSAA */
- .framebuffer = framebuffer,
- .framebuffer_stride = (stride / 16) & 0xfffffff,
- };
+ /* XXX: MRT case */
+ fb.rt_count_2 = 1;
+ fb.unk3 = 0x100;
- memcpy(&ctx->fragment_rts[0], &rt, sizeof(rt));
+ struct bifrost_render_target rt = {
+ .unk1 = 0x4000000,
+ .format = 0x860a8899, /* RGBA32, no MSAA */
+ .framebuffer = framebuffer,
+ .framebuffer_stride = (stride / 16) & 0xfffffff,
+ };
- memset(&ctx->fragment_extra, 0, sizeof(ctx->fragment_extra));
-#endif
+ memcpy(&ctx->fragment_rts[0], &rt, sizeof(rt));
- memcpy(&ctx->fragment_fbd, &fb, sizeof(fb));
+ memset(&ctx->fragment_extra, 0, sizeof(ctx->fragment_extra));
+ memcpy(&ctx->fragment_mfbd, &fb, sizeof(fb));
+ }
}
/* Maps float 0.0-1.0 to int 0x00-0xFF */
@@ -334,58 +363,77 @@ normalised_float_to_u8(float f)
}
static void
-panfrost_clear(
- struct pipe_context *pipe,
- unsigned buffers,
- const union pipe_color_union *color,
- double depth, unsigned stencil)
+panfrost_clear_sfbd(struct panfrost_context *ctx,
+ bool clear_color,
+ bool clear_depth,
+ bool clear_stencil,
+ uint32_t packed_color,
+ double depth, unsigned stencil
+ )
{
- struct panfrost_context *ctx = pan_context(pipe);
+ struct mali_single_framebuffer *sfbd = &ctx->fragment_sfbd;
- if (!color) {
- printf("Warning: clear color null?\n");
- return;
+ if (clear_color) {
+ sfbd->clear_color_1 = packed_color;
+ sfbd->clear_color_2 = packed_color;
+ sfbd->clear_color_3 = packed_color;
+ sfbd->clear_color_4 = packed_color;
}
- /* Save settings for FBO switch */
- ctx->last_clear.buffers = buffers;
- ctx->last_clear.color = color;
- ctx->last_clear.depth = depth;
- ctx->last_clear.depth = depth;
+ if (clear_depth) {
+ sfbd->clear_depth_1 = depth;
+ sfbd->clear_depth_2 = depth;
+ sfbd->clear_depth_3 = depth;
+ sfbd->clear_depth_4 = depth;
+ }
- bool clear_color = buffers & PIPE_CLEAR_COLOR;
- bool clear_depth = buffers & PIPE_CLEAR_DEPTH;
- bool clear_stencil = buffers & PIPE_CLEAR_STENCIL;
+ if (clear_stencil) {
+ sfbd->clear_stencil = stencil;
+ }
- /* Remember that we've done something */
- ctx->frame_cleared = true;
+ /* Setup buffers */
- /* Alpha clear only meaningful without alpha channel */
- bool has_alpha = ctx->pipe_framebuffer.nr_cbufs && util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format);
- float clear_alpha = has_alpha ? color->f[3] : 1.0f;
+ if (clear_depth) {
+ sfbd->depth_buffer = ctx->depth_stencil_buffer.gpu;
+ sfbd->depth_buffer_enable = MALI_DEPTH_STENCIL_ENABLE;
+ }
- uint32_t packed_color =
- (normalised_float_to_u8(clear_alpha) << 24) |
- (normalised_float_to_u8(color->f[2]) << 16) |
- (normalised_float_to_u8(color->f[1]) << 8) |
- (normalised_float_to_u8(color->f[0]) << 0);
+ if (clear_stencil) {
+ sfbd->stencil_buffer = ctx->depth_stencil_buffer.gpu;
+ sfbd->stencil_buffer_enable = MALI_DEPTH_STENCIL_ENABLE;
+ }
-#ifdef MFBD
- struct bifrost_render_target *buffer_color = &ctx->fragment_rts[0];
-#else
- struct mali_single_framebuffer *buffer_color = &ctx->fragment_fbd;
-#endif
+ /* Set flags based on what has been cleared, for the SFBD case */
+ /* XXX: What do these flags mean? */
+ int clear_flags = 0x101100;
-#ifdef MFBD
- struct bifrost_framebuffer *buffer_ds = &ctx->fragment_fbd;
-#else
- struct mali_single_framebuffer *buffer_ds = buffer_color;
-#endif
+ if (clear_color && clear_depth && clear_stencil) {
+ /* On a tiler like this, it's fastest to clear all three buffers at once */
- if (clear_color) {
- /* Fields duplicated 4x for unknown reasons. Same in Utgard,
- * too, which is doubly weird. */
+ clear_flags |= MALI_CLEAR_FAST;
+ } else {
+ clear_flags |= MALI_CLEAR_SLOW;
+
+ if (clear_stencil)
+ clear_flags |= MALI_CLEAR_SLOW_STENCIL;
+ }
+
+ sfbd->clear_flags = clear_flags;
+}
+static void
+panfrost_clear_mfbd(struct panfrost_context *ctx,
+ bool clear_color,
+ bool clear_depth,
+ bool clear_stencil,
+ uint32_t packed_color,
+ double depth, unsigned stencil
+ )
+{
+ struct bifrost_render_target *buffer_color = &ctx->fragment_rts[0];
+ struct bifrost_framebuffer *buffer_ds = &ctx->fragment_mfbd;
+
+ if (clear_color) {
buffer_color->clear_color_1 = packed_color;
buffer_color->clear_color_2 = packed_color;
buffer_color->clear_color_3 = packed_color;
@@ -393,72 +441,71 @@ panfrost_clear(
}
if (clear_depth) {
-#ifdef SFBD
- buffer_ds->clear_depth_1 = depth;
- buffer_ds->clear_depth_2 = depth;
- buffer_ds->clear_depth_3 = depth;
- buffer_ds->clear_depth_4 = depth;
-#else
buffer_ds->clear_depth = depth;
-#endif
}
if (clear_stencil) {
buffer_ds->clear_stencil = stencil;
}
- /* Setup buffers depending on MFBD/SFBD */
-
-#ifdef MFBD
-
if (clear_depth || clear_stencil) {
/* Setup combined 24/8 depth/stencil */
- ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA;
+ ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA;
//ctx->fragment_extra.unk = /*0x405*/0x404;
ctx->fragment_extra.unk = 0x405;
ctx->fragment_extra.ds_linear.depth = ctx->depth_stencil_buffer.gpu;
ctx->fragment_extra.ds_linear.depth_stride = ctx->pipe_framebuffer.width * 4;
}
+}
-#else
+static void
+panfrost_clear(
+ struct pipe_context *pipe,
+ unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ struct panfrost_context *ctx = pan_context(pipe);
- if (clear_depth) {
- buffer_ds->depth_buffer = ctx->depth_stencil_buffer.gpu;
- buffer_ds->depth_buffer_enable = MALI_DEPTH_STENCIL_ENABLE;
+ if (!color) {
+ printf("Warning: clear color null?\n");
+ return;
}
- if (clear_stencil) {
- buffer_ds->stencil_buffer = ctx->depth_stencil_buffer.gpu;
- buffer_ds->stencil_buffer_enable = MALI_DEPTH_STENCIL_ENABLE;
- }
+ /* Save settings for FBO switch */
+ ctx->last_clear.buffers = buffers;
+ ctx->last_clear.color = color;
+ ctx->last_clear.depth = depth;
+ ctx->last_clear.depth = depth;
-#endif
+ bool clear_color = buffers & PIPE_CLEAR_COLOR;
+ bool clear_depth = buffers & PIPE_CLEAR_DEPTH;
+ bool clear_stencil = buffers & PIPE_CLEAR_STENCIL;
-#ifdef SFBD
- /* Set flags based on what has been cleared, for the SFBD case */
- /* XXX: What do these flags mean? */
- int clear_flags = 0x101100;
+ /* Remember that we've done something */
+ ctx->frame_cleared = true;
- if (clear_color && clear_depth && clear_stencil) {
- /* On a tiler like this, it's fastest to clear all three buffers at once */
+ /* Alpha clear only meaningful without alpha channel */
+ bool has_alpha = ctx->pipe_framebuffer.nr_cbufs && util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format);
+ float clear_alpha = has_alpha ? color->f[3] : 1.0f;
- clear_flags |= MALI_CLEAR_FAST;
- } else {
- clear_flags |= MALI_CLEAR_SLOW;
+ uint32_t packed_color =
+ (normalised_float_to_u8(clear_alpha) << 24) |
+ (normalised_float_to_u8(color->f[2]) << 16) |
+ (normalised_float_to_u8(color->f[1]) << 8) |
+ (normalised_float_to_u8(color->f[0]) << 0);
- if (clear_stencil)
- clear_flags |= MALI_CLEAR_SLOW_STENCIL;
+ if (require_sfbd) {
+ panfrost_clear_sfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil);
+ } else {
+ panfrost_clear_mfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil);
}
-
- fbd->clear_flags = clear_flags;
-#endif
}
-static void
-panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
+static mali_ptr
+panfrost_attach_vt_mfbd(struct panfrost_context *ctx)
{
-#ifdef MFBD
- /* MFBD needs a sequential semi-render target upload, but this is, is beyond me for now */
+ /* MFBD needs a sequential semi-render target upload, but what exactly this is, is beyond me for now */
struct bifrost_render_target rts_list[] = {
{
.chunknown = {
@@ -470,18 +517,31 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
};
/* Allocate memory for the three components */
- int size = 1024 + sizeof(ctx->vt_framebuffer) + sizeof(rts_list);
+ int size = 1024 + sizeof(ctx->vt_framebuffer_mfbd) + sizeof(rts_list);
struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
/* Opaque 1024-block */
rts_list[0].chunknown.pointer = transfer.gpu;
- mali_ptr framebuffer = (transfer.gpu + 1024) | PANFROST_DEFAULT_FBD;
- memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer, sizeof(ctx->vt_framebuffer));
- memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer), rts_list, sizeof(rts_list));
-#else
- mali_ptr framebuffer = panfrost_upload_transient(ctx, &ctx->vt_framebuffer, sizeof(ctx->vt_framebuffer)) | PANFROST_DEFAULT_FBD;
-#endif
+ memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd));
+ memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer_mfbd), rts_list, sizeof(rts_list));
+
+ return (transfer.gpu + 1024) | MALI_MFBD;
+}
+
+static mali_ptr
+panfrost_attach_vt_sfbd(struct panfrost_context *ctx)
+{
+ return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD;
+}
+
+static void
+panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
+{
+ mali_ptr framebuffer = require_sfbd ?
+ panfrost_attach_vt_sfbd(ctx) :
+ panfrost_attach_vt_mfbd(ctx);
+
ctx->payload_vertex.postfix.framebuffer = framebuffer;
ctx->payload_tiler.postfix.framebuffer = framebuffer;
}
@@ -528,7 +588,11 @@ panfrost_invalidate_frame(struct panfrost_context *ctx)
if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0])))
ctx->cmdstream_i = 0;
- ctx->vt_framebuffer = panfrost_emit_fbd(ctx);
+ if (require_sfbd)
+ ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
+ else
+ ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
+
panfrost_new_frag_framebuffer(ctx);
/* Reset varyings allocated */
@@ -563,7 +627,7 @@ panfrost_emit_vertex_payload(struct panfrost_context *ctx)
.workgroups_x_shift_2 = 0x2,
.workgroups_x_shift_3 = 0x5,
},
- .gl_enables = 0x6
+ .gl_enables = 0x4 | (is_t6xx ? 0 : 0x2),
};
memcpy(&ctx->payload_vertex, &payload, sizeof(payload));
@@ -751,14 +815,14 @@ panfrost_default_shader_backend(struct panfrost_context *ctx)
struct mali_shader_meta shader = {
.alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000),
- .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010 /*| MALI_CAN_DISCARD*/,
-#ifdef T8XX
+ .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010,
.unknown2_4 = MALI_NO_MSAA | 0x4e0,
-#else
- .unknown2_4 = MALI_NO_MSAA | 0x4f0,
-#endif
};
+ if (is_t6xx) {
+ shader.unknown2_4 |= 0x10;
+ }
+
struct pipe_stencil_state default_stencil = {
.enabled = 0,
.func = PIPE_FUNC_ALWAYS,
@@ -801,14 +865,6 @@ panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_e
#endif
};
- /* XXX: What is this? */
-#ifdef T6XX
-
- if (is_tiler)
- job.unknown_flags = ctx->draw_count ? 64 : 1;
-
-#endif
-
/* Only non-elided tiler jobs have dependencies which are known at this point */
if (is_tiler && !is_elided_tiler) {
@@ -873,12 +929,16 @@ panfrost_fragment_job(struct panfrost_context *ctx)
if (ctx->pipe_framebuffer.nr_cbufs == 1) {
struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture;
- int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0);
if (rsrc->bo->has_checksum) {
- //ctx->fragment_fbd.unk3 |= 0xa00000;
- //ctx->fragment_fbd.unk3 = 0xa02100;
- ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA;
+ if (require_sfbd) {
+ fprintf(stderr, "Checksumming not supported on SFBD\n");
+ assert(0);
+ }
+
+ int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0);
+
+ ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA;
ctx->fragment_extra.unk |= 0x420;
ctx->fragment_extra.checksum_stride = rsrc->bo->checksum_stride;
ctx->fragment_extra.checksum = rsrc->bo->gpu[0] + stride * rsrc->base.height0;
@@ -888,22 +948,29 @@ panfrost_fragment_job(struct panfrost_context *ctx)
/* The frame is complete and therefore the framebuffer descriptor is
* ready for linkage and upload */
- size_t sz = sizeof(ctx->fragment_fbd) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1;
+ size_t sz = require_sfbd ? sizeof(struct mali_single_framebuffer) : (sizeof(struct bifrost_framebuffer) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1);
struct panfrost_transfer fbd_t = panfrost_allocate_transient(ctx, sz);
off_t offset = 0;
- memcpy(fbd_t.cpu, &ctx->fragment_fbd, sizeof(ctx->fragment_fbd));
- offset += sizeof(ctx->fragment_fbd);
+ if (require_sfbd) {
+ /* Upload just the SFBD all at once */
+ memcpy(fbd_t.cpu, &ctx->fragment_sfbd, sizeof(ctx->fragment_sfbd));
+ offset += sizeof(ctx->fragment_sfbd);
+ } else {
+ /* Upload the MFBD header */
+ memcpy(fbd_t.cpu, &ctx->fragment_mfbd, sizeof(ctx->fragment_mfbd));
+ offset += sizeof(ctx->fragment_mfbd);
+
+ /* Upload extra framebuffer info if necessary */
+ if (ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) {
+ memcpy(fbd_t.cpu + offset, &ctx->fragment_extra, sizeof(struct bifrost_fb_extra));
+ offset += sizeof(struct bifrost_fb_extra);
+ }
- /* Upload extra framebuffer info if necessary */
- if (ctx->fragment_fbd.unk3 & MALI_MFBD_EXTRA) {
- memcpy(fbd_t.cpu + offset, &ctx->fragment_extra, sizeof(struct bifrost_fb_extra));
- offset += sizeof(struct bifrost_fb_extra);
+ /* Upload (single) render target */
+ memcpy(fbd_t.cpu + offset, &ctx->fragment_rts[0], sizeof(struct bifrost_render_target) * 1);
}
- /* Upload (single) render target */
- memcpy(fbd_t.cpu + offset, &ctx->fragment_rts[0], sizeof(struct bifrost_render_target) * 1);
-
/* Generate the fragment (frame) job */
struct mali_job_descriptor_header header = {
@@ -917,9 +984,16 @@ panfrost_fragment_job(struct panfrost_context *ctx)
struct mali_payload_fragment payload = {
.min_tile_coord = MALI_COORDINATE_TO_TILE_MIN(0, 0),
.max_tile_coord = MALI_COORDINATE_TO_TILE_MAX(ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height),
- .framebuffer = fbd_t.gpu | PANFROST_DEFAULT_FBD | (ctx->fragment_fbd.unk3 & MALI_MFBD_EXTRA ? 2 : 0),
+ .framebuffer = fbd_t.gpu | (require_sfbd ? MALI_SFBD : MALI_MFBD),
};
+ if (!require_sfbd && ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) {
+ /* Signal that there is an extra portion of the framebuffer
+ * descriptor */
+
+ payload.framebuffer |= 2;
+ }
+
/* Normally, there should be no padding. However, fragment jobs are
* shared with 64-bit Bifrost systems, and accordingly there is 4-bytes
* of zero padding in between. */
@@ -1114,10 +1188,10 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1];
/* CAN_DISCARD should be set if the fragment shader possibly
- * contains a 'discard' instruction, or maybe other
- * circumstances. It is likely this is related to optimizations
- * related to forward-pixel kill, as per "Mali Performance 3:
- * Is EGL_BUFFER_PRESERVED a good thing?" by Peter Harris
+ * contains a 'discard' instruction. It is likely this is
+ * related to optimizations related to forward-pixel kill, as
+ * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good
+ * thing?" by Peter Harris
*/
if (variant->can_discard) {
@@ -1127,8 +1201,30 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
ctx->fragment_shader_core.midgard1.unknown1 = 0x4200;
}
- if (ctx->blend->has_blend_shader)
- ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader;
+ /* Check if we're using the default blend descriptor (fast path) */
+
+ bool no_blending =
+ !ctx->blend->has_blend_shader &&
+ (ctx->blend->equation.rgb_mode == 0x122) &&
+ (ctx->blend->equation.alpha_mode == 0x122) &&
+ (ctx->blend->equation.color_mask == 0xf);
+
+ if (require_sfbd) {
+ /* When only a single render target platform is used, the blend
+ * information is inside the shader meta itself. We
+ * additionally need to signal CAN_DISCARD for nontrivial blend
+ * modes (so we're able to read back the destination buffer) */
+
+ if (ctx->blend->has_blend_shader) {
+ ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader;
+ } else {
+ memcpy(&ctx->fragment_shader_core.blend_equation, &ctx->blend->equation, sizeof(ctx->blend->equation));
+ }
+
+ if (!no_blending) {
+ ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
+ }
+ }
size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta);
struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
@@ -1136,51 +1232,46 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4;
-#ifdef T8XX
- /* Additional blend descriptor tacked on for newer systems */
+ if (!require_sfbd) {
+ /* Additional blend descriptor tacked on for jobs using MFBD */
- unsigned blend_count = 0;
+ unsigned blend_count = 0;
- if (ctx->blend->has_blend_shader) {
- /* For a blend shader, the bottom nibble corresponds to
- * the number of work registers used, which signals the
- * -existence- of a blend shader */
+ if (ctx->blend->has_blend_shader) {
+ /* For a blend shader, the bottom nibble corresponds to
+ * the number of work registers used, which signals the
+ * -existence- of a blend shader */
- assert(ctx->blend->blend_work_count >= 2);
- blend_count |= MIN2(ctx->blend->blend_work_count, 3);
- } else {
- /* Otherwise, the bottom bit simply specifies if
- * blending (anything other than REPLACE) is enabled */
+ assert(ctx->blend->blend_work_count >= 2);
+ blend_count |= MIN2(ctx->blend->blend_work_count, 3);
+ } else {
+ /* Otherwise, the bottom bit simply specifies if
+ * blending (anything other than REPLACE) is enabled */
- /* XXX: Less ugly way to do this? */
- bool no_blending =
- (ctx->blend->equation.rgb_mode == 0x122) &&
- (ctx->blend->equation.alpha_mode == 0x122) &&
- (ctx->blend->equation.color_mask == 0xf);
- if (!no_blending)
- blend_count |= 0x1;
- }
+ if (!no_blending)
+ blend_count |= 0x1;
+ }
- /* Second blend equation is always a simple replace */
+ /* Second blend equation is always a simple replace */
- uint64_t replace_magic = 0xf0122122;
- struct mali_blend_equation replace_mode;
- memcpy(&replace_mode, &replace_magic, sizeof(replace_mode));
+ uint64_t replace_magic = 0xf0122122;
+ struct mali_blend_equation replace_mode;
+ memcpy(&replace_mode, &replace_magic, sizeof(replace_mode));
- struct mali_blend_meta blend_meta[] = {
- {
- .unk1 = 0x200 | blend_count,
- .blend_equation_1 = ctx->blend->equation,
- .blend_equation_2 = replace_mode
- },
- };
+ struct mali_blend_meta blend_meta[] = {
+ {
+ .unk1 = 0x200 | blend_count,
+ .blend_equation_1 = ctx->blend->equation,
+ .blend_equation_2 = replace_mode
+ },
+ };
- if (ctx->blend->has_blend_shader)
- memcpy(&blend_meta[0].blend_equation_1, &ctx->blend->blend_shader, sizeof(ctx->blend->blend_shader));
+ if (ctx->blend->has_blend_shader)
+ memcpy(&blend_meta[0].blend_equation_1, &ctx->blend->blend_shader, sizeof(ctx->blend->blend_shader));
- memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta));
-#endif
+ memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta));
+ }
}
if (ctx->dirty & PAN_DIRTY_VERTEX) {
@@ -1231,12 +1322,13 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
int s = ctx->sampler_views[t][i]->hw.nr_mipmap_levels;
if (!rsrc->bo->is_mipmap) {
-#ifdef T6XX
- /* HW ERRATA, not needed after T6XX */
- ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0];
+ if (is_t6xx) {
+ /* HW ERRATA, not needed after t6XX */
+ ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0];
+
+ ctx->sampler_views[t][i]->hw.unknown3A = 1;
+ }
- ctx->sampler_views[t][i]->hw.unknown3A = 1;
-#endif
ctx->sampler_views[t][i]->hw.nr_mipmap_levels = 0;
}
@@ -1245,9 +1337,9 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
/* Restore */
ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s;
-#ifdef T6XX
- ctx->sampler_views[t][i]->hw.unknown3A = 0;
-#endif
+ if (is_t6xx) {
+ ctx->sampler_views[t][i]->hw.unknown3A = 0;
+ }
}
mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
@@ -1391,7 +1483,7 @@ panfrost_link_jobs(struct panfrost_context *ctx)
for (int i = 0; i < ctx->vertex_job_count; ++i) {
bool isLast = (i + 1) == ctx->vertex_job_count;
- panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0]: ctx->vertex_jobs[i + 1]);
+ panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0] : ctx->vertex_jobs[i + 1]);
}
/* T -> T/null */
@@ -1715,11 +1807,7 @@ panfrost_create_rasterizer_state(
so->base = *cso;
/* Bitmask, unknown meaning of the start value */
-#ifdef T8XX
- so->tiler_gl_enables = 0x7;
-#else
- so->tiler_gl_enables = 0x105;
-#endif
+ so->tiler_gl_enables = is_t6xx ? 0x105 : 0x7;
so->tiler_gl_enables |= MALI_FRONT_FACE(
cso->front_ccw ? MALI_CCW : MALI_CW);
@@ -2198,8 +2286,8 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs;
ctx->pipe_framebuffer.samples = fb->samples;
ctx->pipe_framebuffer.layers = fb->layers;
- ctx->pipe_framebuffer.width = fb->width;
- ctx->pipe_framebuffer.height = fb->height;
+ ctx->pipe_framebuffer.width = ALIGN(fb->width, 16);
+ ctx->pipe_framebuffer.height = ALIGN(fb->height, 16);
for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
@@ -2218,7 +2306,11 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
if (!cb)
continue;
- ctx->vt_framebuffer = panfrost_emit_fbd(ctx);
+ if (require_sfbd)
+ ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
+ else
+ ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
+
panfrost_attach_vt_framebuffer(ctx);
panfrost_new_frag_framebuffer(ctx);
panfrost_set_scissor(ctx);
@@ -2249,7 +2341,11 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
if (zb) {
/* FBO has depth */
- ctx->vt_framebuffer = panfrost_emit_fbd(ctx);
+ if (require_sfbd)
+ ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
+ else
+ ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
+
panfrost_attach_vt_framebuffer(ctx);
panfrost_new_frag_framebuffer(ctx);
panfrost_set_scissor(ctx);
@@ -2670,9 +2766,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
/* Prepare for render! */
- /* TODO: XXX */
- ctx->vt_framebuffer = panfrost_emit_fbd(ctx);
-
panfrost_emit_vertex_payload(ctx);
panfrost_emit_tiler_payload(ctx);
panfrost_invalidate_frame(ctx);
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index 89f821318e1..48cce72a303 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -25,8 +25,6 @@
#ifndef __BUILDER_H__
#define __BUILDER_H__
-#define MFBD
-
#define _LARGEFILE64_SOURCE 1
#define CACHE_LINE_SIZE 1024 /* TODO */
#include <sys/mman.h>
@@ -45,15 +43,6 @@
/* Forward declare to avoid extra header dep */
struct prim_convert_context;
-/* TODO: Handle on newer hardware */
-#ifdef MFBD
-#define PANFROST_DEFAULT_FBD (MALI_MFBD)
-#define PANFROST_FRAMEBUFFER struct bifrost_framebuffer
-#else
-#define PANFROST_DEFAULT_FBD (MALI_SFBD)
-#define PANFROST_FRAMEBUFFER struct mali_single_framebuffer
-#endif
-
#define MAX_DRAW_CALLS 4096
#define MAX_VARYINGS 4096
@@ -140,15 +129,14 @@ struct panfrost_context {
* most obvious is the fragment framebuffer descriptor, which carries
* e.g. clearing information */
-#ifdef SFBD
- struct mali_single_framebuffer fragment_fbd;
-#else
- struct bifrost_framebuffer fragment_fbd;
-
- struct bifrost_fb_extra fragment_extra;
-
- struct bifrost_render_target fragment_rts[4];
-#endif
+ union {
+ struct mali_single_framebuffer fragment_sfbd;
+ struct {
+ struct bifrost_framebuffer fragment_mfbd;
+ struct bifrost_fb_extra fragment_extra;
+ struct bifrost_render_target fragment_rts[4];
+ };
+ };
/* Each draw has corresponding vertex and tiler payloads */
struct midgard_payload_vertex_tiler payload_vertex;
@@ -190,7 +178,8 @@ struct panfrost_context {
unsigned varying_height;
struct mali_viewport *viewport;
- PANFROST_FRAMEBUFFER vt_framebuffer;
+ struct mali_single_framebuffer vt_framebuffer_sfbd;
+ struct bifrost_framebuffer vt_framebuffer_mfbd;
/* TODO: Multiple uniform buffers (index =/= 0), finer updates? */
--
2.20.1
More information about the mesa-dev
mailing list