Mesa (master): panfrost: Use 64-bit descriptors globally

Tue Jul 16 15:42:15 UTC 2019

Module: Mesa
Branch: master
Commit: 5a7688fdecd76c7d9cd87f6f6c93eb32870a2146
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5a7688fdecd76c7d9cd87f6f6c93eb32870a2146

Author: Tomeu Vizoso <tomeu.vizoso at collabora.com>
Date:   Thu Jul 11 08:06:41 2019 +0200

panfrost: Use 64-bit descriptors globally

Midgard supports two modes of operation, 32-bit mode and 64-bit mode.
The GPU is natively 64-bit, but job descriptors can be submitted in
32-bit mode. Among other changes, 32-bit mode shortens pointer sizes to
use 32-bit pointers rather than the full 64-bit range.

The blob decides which mode to use based on the CPU bitness, so an armhf
system uses 32-bit descriptors and an aarch64 system uses 64-bit
descriptors. For a while, we mimicked this, bu inevitably this caused
the 32-bit support to lag behind as our reference platform is 64-bit.

To combat the code staleness, we traced an older GPU paired with a 64-bit
CPU (the Midgard T720 on-board the sunxi H64). From there, we could tell
which fields were really about hardware and which fields were simply
reflections of the descriptor bitness.

>From there, we decided to remove support for 32-bit descriptors
entirely, using 64-bit descriptors unconditionally. There is minimal
performance penalty for this in practice, and it allows us to unify
these disparate code paths. This fixes:

   - T860 + armhf
   - T820 + armhf
   - T760 + aarch64

And will help bringup of 1st/2nd generation Midgard regardless of CPU.

[Work done by Tomeu. Commit message written by Alyssa.]

v2: Add comments preserving information about the old behaviour for
future reference. Fix a compiler warning. (Alyssa)

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>

---

 src/gallium/drivers/panfrost/pan_context.c  | 27 ++++++-------------
 src/gallium/drivers/panfrost/pan_fragment.c |  2 --
 src/gallium/drivers/panfrost/pan_screen.c   | 13 ++-------
 src/panfrost/include/panfrost-job.h         | 42 +++++++++++------------------
 src/panfrost/pandecode/decode.c             |  9 -------
 5 files changed, 26 insertions(+), 67 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index d720f086361..49ca1f570f1 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -272,8 +272,10 @@ panfrost_invalidate_frame(struct panfrost_context *ctx)
 static void
 panfrost_emit_vertex_payload(struct panfrost_context *ctx)
 {
+        /* 0x2 bit clear on 32-bit T6XX */
+
         struct midgard_payload_vertex_tiler payload = {
-                .gl_enables = 0x4 | (ctx->is_t6xx ? 0 : 0x2),
+                .gl_enables = 0x4 | 0x2,
         };
 
         memcpy(&ctx->payload_vertex, &payload, sizeof(payload));
@@ -454,9 +456,7 @@ panfrost_default_shader_backend(struct panfrost_context *ctx)
                 .unknown2_4 = MALI_NO_MSAA | 0x4e0,
         };
 
-        if (ctx->is_t6xx) {
-                shader.unknown2_4 |= 0x10;
-        }
+        /* unknown2_4 has 0x10 bit set on 32-bit T6XX */
 
         struct pipe_stencil_state default_stencil = {
                 .enabled = 0,
@@ -491,24 +491,14 @@ panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler)
 {
         struct mali_job_descriptor_header job = {
                 .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
-#ifdef __LP64__
                 .job_descriptor_size = 1,
-#endif
         };
 
         struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex;
 
-        /* There's some padding hacks on 32-bit */
-
-#ifdef __LP64__
-        int offset = 0;
-#else
-        int offset = 4;
-#endif
         struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload));
-
         memcpy(transfer.cpu, &job, sizeof(job));
-        memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload));
+        memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload));
         return transfer;
 }
 
@@ -1753,7 +1743,7 @@ panfrost_draw_vbo(
                 ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
 
                 /* Reverse index state */
-                ctx->payload_tiler.prefix.indices = (uintptr_t) NULL;
+                ctx->payload_tiler.prefix.indices = (u64) NULL;
         }
 
         /* Dispatch "compute jobs" for the vertex/tiler pair as (1,
@@ -1815,13 +1805,12 @@ panfrost_create_rasterizer_state(
         struct pipe_context *pctx,
         const struct pipe_rasterizer_state *cso)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
         struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
 
         so->base = *cso;
 
-        /* Bitmask, unknown meaning of the start value */
-        so->tiler_gl_enables = ctx->is_t6xx ? 0x105 : 0x7;
+        /* Bitmask, unknown meaning of the start value. 0x105 on 32-bit T6XX */
+        so->tiler_gl_enables = 0x7;
 
         if (cso->front_ccw)
                 so->tiler_gl_enables |= MALI_FRONT_CCW_TOP;
diff --git a/src/gallium/drivers/panfrost/pan_fragment.c b/src/gallium/drivers/panfrost/pan_fragment.c
index ab42e763be7..7ffb9db0a05 100644
--- a/src/gallium/drivers/panfrost/pan_fragment.c
+++ b/src/gallium/drivers/panfrost/pan_fragment.c
@@ -70,9 +70,7 @@ panfrost_fragment_job(struct panfrost_context *ctx, bool has_draws)
         struct mali_job_descriptor_header header = {
                 .job_type = JOB_TYPE_FRAGMENT,
                 .job_index = 1,
-#ifdef __LP64__
                 .job_descriptor_size = 1
-#endif
         };
 
         struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
index ffd2de4734e..6fe9e60f69a 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -511,22 +511,13 @@ panfrost_create_screen(int fd, struct renderonly *ro)
 
         screen->gpu_id = panfrost_drm_query_gpu_version(screen);
 
-        /* Check if we're loading against a supported GPU model
-         * paired with a supported CPU (differences from
-         * armhf/aarch64 break models on incompatible CPUs at the
-         * moment -- this is a TODO). In other words, we whitelist
-         * RK3288, RK3399, and S912, which are verified to work. */
+        /* Check if we're loading against a supported GPU model. */
 
         switch (screen->gpu_id) {
-#ifdef __LP64__
+        case 0x750: /* T760 */
         case 0x820: /* T820 */
         case 0x860: /* T860 */
                 break;
-#else
-        case 0x750: /* T760 */
-                break;
-#endif
-
         default:
                 /* Fail to load against untested models */
                 debug_printf("panfrost: Unsupported model %X",
diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h
index 0fbe0a2fa10..297d0806adc 100644
--- a/src/panfrost/include/panfrost-job.h
+++ b/src/panfrost/include/panfrost-job.h
@@ -31,7 +31,7 @@
 #include <stdint.h>
 #include <panfrost-misc.h>
 
-#define MALI_SHORT_PTR_BITS (sizeof(uintptr_t)*8)
+#define MALI_SHORT_PTR_BITS (sizeof(u64)*8)
 
 #define MALI_FBD_HIERARCHY_WEIGHTS 8
 
@@ -935,7 +935,7 @@ struct mali_vertex_tiler_prefix {
          * indices (width depends on flags). Thanks, guys, for not making my
          * life insane for once! NULL for non-indexed draws. */
 
-        uintptr_t indices;
+        u64 indices;
 } __attribute__((packed));
 
 /* Point size / line width can either be specified as a 32-bit float (for
@@ -947,7 +947,7 @@ struct mali_vertex_tiler_prefix {
 
 union midgard_primitive_size {
         float constant;
-        uintptr_t pointer;
+        u64 pointer;
 };
 
 struct bifrost_vertex_only {
@@ -1011,34 +1011,34 @@ struct mali_vertex_tiler_postfix {
          * output from the vertex shader for tiler jobs.
          */
 
-        uintptr_t position_varying;
+        u64 position_varying;
 
         /* An array of mali_uniform_buffer_meta's. The size is given by the
          * shader_meta.
          */
-        uintptr_t uniform_buffers;
+        u64 uniform_buffers;
 
         /* This is a pointer to an array of pointers to the texture
          * descriptors, number of pointers bounded by number of textures. The
          * indirection is needed to accomodate varying numbers and sizes of
          * texture descriptors */
-        uintptr_t texture_trampoline;
+        u64 texture_trampoline;
 
         /* For OpenGL, from what I've seen, this is intimately connected to
          * texture_meta. cwabbott says this is not the case under Vulkan, hence
          * why this field is seperate (Midgard is Vulkan capable). Pointer to
          * array of sampler descriptors (which are uniform in size) */
-        uintptr_t sampler_descriptor;
+        u64 sampler_descriptor;
 
-        uintptr_t uniforms;
+        u64 uniforms;
         u8 flags : 4;
-        uintptr_t _shader_upper : MALI_SHORT_PTR_BITS - 4; /* struct shader_meta */
-        uintptr_t attributes; /* struct attribute_buffer[] */
-        uintptr_t attribute_meta; /* attribute_meta[] */
-        uintptr_t varyings; /* struct attr */
-        uintptr_t varying_meta; /* pointer */
-        uintptr_t viewport;
-        uintptr_t occlusion_counter; /* A single bit as far as I can tell */
+        u64 _shader_upper : MALI_SHORT_PTR_BITS - 4; /* struct shader_meta */
+        u64 attributes; /* struct attribute_buffer[] */
+        u64 attribute_meta; /* attribute_meta[] */
+        u64 varyings; /* struct attr */
+        u64 varying_meta; /* pointer */
+        u64 viewport;
+        u64 occlusion_counter; /* A single bit as far as I can tell */
 
         /* Note: on Bifrost, this isn't actually the FBD. It points to
          * bifrost_scratchpad instead. However, it does point to the same thing
@@ -1048,16 +1048,8 @@ struct mali_vertex_tiler_postfix {
 } __attribute__((packed));
 
 struct midgard_payload_vertex_tiler {
-#ifndef __LP64__
-        union midgard_primitive_size primitive_size;
-#endif
-
         struct mali_vertex_tiler_prefix prefix;
 
-#ifndef __LP64__
-        u32 zero3;
-#endif
-
         u16 gl_enables; // 0x5
 
         /* Both zero for non-instanced draws. For instanced draws, a
@@ -1072,13 +1064,11 @@ struct midgard_payload_vertex_tiler {
         /* Offset for first vertex in buffer */
         u32 draw_start;
 
-	uintptr_t zero5;
+	u64 zero5;
 
         struct mali_vertex_tiler_postfix postfix;
 
-#ifdef __LP64__
         union midgard_primitive_size primitive_size;
-#endif
 } __attribute__((packed));
 
 struct bifrost_payload_vertex {
diff --git a/src/panfrost/pandecode/decode.c b/src/panfrost/pandecode/decode.c
index 5d38ec22b19..7e7fed81f99 100644
--- a/src/panfrost/pandecode/decode.c
+++ b/src/panfrost/pandecode/decode.c
@@ -2163,15 +2163,6 @@ pandecode_vertex_or_tiler_job_mdg(const struct mali_job_descriptor_header *h,
         if (v->draw_start)
                 pandecode_prop("draw_start = %d", v->draw_start);
 
-#ifndef __LP64__
-
-        if (v->zero3) {
-                pandecode_msg("Zero tripped\n");
-                pandecode_prop("zero3 = 0x%" PRIx32, v->zero3);
-        }
-
-#endif
-
         if (v->zero5) {
                 pandecode_msg("Zero tripped\n");
                 pandecode_prop("zero5 = 0x%" PRIx64, v->zero5);