[Mesa-dev] [PATCH 1/3] panfrost: Move ra_regs to the screen context

Thu Jun 20 09:20:22 UTC 2019

Signed-off-by: Tomeu Vizoso <tomeu.vizoso at collabora.com>
---
 .../drivers/panfrost/midgard/compiler.h       |   2 +
 .../panfrost/midgard/midgard_compile.c        |  13 +-
 .../panfrost/midgard/midgard_compile.h        |  17 ++-
 .../drivers/panfrost/midgard/midgard_ra.c     | 116 ++++++++++--------
 .../panfrost/midgard/midgard_schedule.c       |   1 +
 src/gallium/drivers/panfrost/pan_assemble.c   |   6 +-
 .../drivers/panfrost/pan_blend_shaders.c      |   5 +-
 src/gallium/drivers/panfrost/pan_screen.c     |   1 +
 src/gallium/drivers/panfrost/pan_screen.h     |   2 +
 9 files changed, 104 insertions(+), 59 deletions(-)

diff --git a/src/gallium/drivers/panfrost/midgard/compiler.h b/src/gallium/drivers/panfrost/midgard/compiler.h
index 4c2202711b1a..7299bc5c1826 100644
--- a/src/gallium/drivers/panfrost/midgard/compiler.h
+++ b/src/gallium/drivers/panfrost/midgard/compiler.h
@@ -180,6 +180,7 @@ typedef struct midgard_bundle {
 } midgard_bundle;
 
 typedef struct compiler_context {
+        struct midgard_compiler *compiler;
         nir_shader *nir;
         gl_shader_stage stage;
 
@@ -425,6 +426,7 @@ void schedule_program(compiler_context *ctx);
 
 struct ra_graph;
 
+void init_regsets(struct midgard_compiler *compiler);
 struct ra_graph* allocate_registers(compiler_context *ctx);
 void install_registers(compiler_context *ctx, struct ra_graph *g);
 bool mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src);
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index 0c8032cc80bf..ce290a11cb4c 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -2442,13 +2442,15 @@ midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx)
 }
 
 int
-midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend)
+midgard_compile_shader_nir(struct midgard_compiler *compiler, nir_shader *nir,
+                           midgard_program *program, bool is_blend)
 {
         struct util_dynarray *compiled = &program->compiled;
 
 	midgard_debug = debug_get_option_midgard_debug();
 
         compiler_context ictx = {
+                .compiler = compiler,
                 .nir = nir,
                 .stage = nir->info.stage,
 
@@ -2758,3 +2760,12 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
 
         return 0;
 }
+
+struct midgard_compiler *midgard_compiler_create(const void *ctx)
+{
+        struct midgard_compiler *compiler = rzalloc(ctx, struct midgard_compiler);
+
+        init_regsets(compiler);
+
+        return compiler;
+}
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.h b/src/gallium/drivers/panfrost/midgard/midgard_compile.h
index 44c3533b6de3..f3b23381ad51 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.h
@@ -54,6 +54,16 @@ enum {
 #define PAN_SYSVAL_ID_TO_TXS_DIM(id)            (((id) >> 7) & 0x3)
 #define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id)       !!((id) & (1 << 9))
 
+#define REGS_VARIANTS_COUNT 8
+
+struct midgard_compiler {
+        struct ra_regs *regs[REGS_VARIANTS_COUNT];
+        int work_vec1;
+        int work_vec2;
+        int work_vec3;
+        int work_vec4;
+};
+
 typedef struct {
         int work_register_count;
         int uniform_count;
@@ -87,8 +97,11 @@ typedef struct {
         float alpha_ref;
 } midgard_program;
 
-int
-midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend);
+struct midgard_compiler *midgard_compiler_create(const void *ctx);
+
+int midgard_compile_shader_nir(struct midgard_compiler *compiler,
+                               nir_shader *nir, midgard_program *program,
+                               bool is_blend);
 
 /* NIR options are shared between the standalone compiler and the online
  * compiler. Defining it here is the simplest, though maybe not the Right
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ra.c b/src/gallium/drivers/panfrost/midgard/midgard_ra.c
index 7fd5e5b49e3f..60b17c832d3c 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_ra.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_ra.c
@@ -212,60 +212,6 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, int reg)
 struct ra_graph *
 allocate_registers(compiler_context *ctx)
 {
-        /* The number of vec4 work registers available depends on when the
-         * uniforms start, so compute that first */
-
-        int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
-
-        int virtual_count = work_count * WORK_STRIDE;
-
-        /* First, initialize the RA */
-        struct ra_regs *regs = ra_alloc_reg_set(NULL, virtual_count, true);
-
-        int work_vec4 = ra_alloc_reg_class(regs);
-        int work_vec3 = ra_alloc_reg_class(regs);
-        int work_vec2 = ra_alloc_reg_class(regs);
-        int work_vec1 = ra_alloc_reg_class(regs);
-
-        unsigned classes[4] = {
-                work_vec1,
-                work_vec2,
-                work_vec3,
-                work_vec4
-        };
-
-        /* Add the full set of work registers */
-        for (unsigned i = 0; i < work_count; ++i) {
-                int base = WORK_STRIDE * i;
-
-                /* Build a full set of subdivisions */
-                ra_class_add_reg(regs, work_vec4, base);
-                ra_class_add_reg(regs, work_vec3, base + 1);
-                ra_class_add_reg(regs, work_vec3, base + 2);
-                ra_class_add_reg(regs, work_vec2, base + 3);
-                ra_class_add_reg(regs, work_vec2, base + 4);
-                ra_class_add_reg(regs, work_vec2, base + 5);
-                ra_class_add_reg(regs, work_vec1, base + 6);
-                ra_class_add_reg(regs, work_vec1, base + 7);
-                ra_class_add_reg(regs, work_vec1, base + 8);
-                ra_class_add_reg(regs, work_vec1, base + 9);
-
-                for (unsigned a = 0; a < 10; ++a) {
-                        unsigned mask1 = reg_type_to_mask[a];
-
-                        for (unsigned b = 0; b < 10; ++b) {
-                                unsigned mask2 = reg_type_to_mask[b];
-
-                                if (mask1 & mask2)
-                                        ra_add_reg_conflict(regs,
-                                                        base + a, base + b);
-                        }
-                }
-        }
-
-        /* We're done setting up */
-        ra_set_finalize(regs, NULL);
-
         /* Transform the MIR into squeezed index form */
         mir_foreach_block(ctx, block) {
                 mir_foreach_instr_in_block(block, ins) {
@@ -286,12 +232,21 @@ allocate_registers(compiler_context *ctx)
                 return NULL;
 
         /* Let's actually do register allocation */
+        unsigned regset_variant = MAX2((ctx->uniform_cutoff - 8), 0);
+        assert(regset_variant <= REGS_VARIANTS_COUNT);
+        struct ra_regs *regs = ctx->compiler->regs[regset_variant];
         int nodes = ctx->temp_count;
         struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
 
         /* Determine minimum size needed to hold values, to indirectly
          * determine class */
 
+        unsigned classes[4] = {
+                ctx->compiler->work_vec1,
+                ctx->compiler->work_vec2,
+                ctx->compiler->work_vec3,
+                ctx->compiler->work_vec4
+        };
         unsigned *found_class = calloc(sizeof(unsigned), ctx->temp_count);
 
         mir_foreach_block(ctx, block) {
@@ -520,3 +475,56 @@ install_registers(compiler_context *ctx, struct ra_graph *g)
         }
 
 }
+
+void init_regsets(struct midgard_compiler *compiler)
+{
+        int variant;
+
+        for (variant = 0; variant < REGS_VARIANTS_COUNT; variant++) {
+                /* The number of vec4 work registers available depends on how
+                 * many uniforms there are, so compute that first */
+                int work_count = 16 - variant;
+                int virtual_count = work_count * WORK_STRIDE;
+
+                struct ra_regs *regs = ra_alloc_reg_set(compiler, virtual_count, true);
+
+                compiler->work_vec4 = ra_alloc_reg_class(regs);
+                compiler->work_vec3 = ra_alloc_reg_class(regs);
+                compiler->work_vec2 = ra_alloc_reg_class(regs);
+                compiler->work_vec1 = ra_alloc_reg_class(regs);
+
+                /* Add the full set of work registers */
+                for (unsigned i = 0; i < work_count; ++i) {
+                        int base = WORK_STRIDE * i;
+
+                        /* Build a full set of subdivisions */
+                        ra_class_add_reg(regs, compiler->work_vec4, base);
+                        ra_class_add_reg(regs, compiler->work_vec3, base + 1);
+                        ra_class_add_reg(regs, compiler->work_vec3, base + 2);
+                        ra_class_add_reg(regs, compiler->work_vec2, base + 3);
+                        ra_class_add_reg(regs, compiler->work_vec2, base + 4);
+                        ra_class_add_reg(regs, compiler->work_vec2, base + 5);
+                        ra_class_add_reg(regs, compiler->work_vec1, base + 6);
+                        ra_class_add_reg(regs, compiler->work_vec1, base + 7);
+                        ra_class_add_reg(regs, compiler->work_vec1, base + 8);
+                        ra_class_add_reg(regs, compiler->work_vec1, base + 9);
+
+                        for (unsigned a = 0; a < 10; ++a) {
+                                unsigned mask1 = reg_type_to_mask[a];
+
+                                for (unsigned b = 0; b < 10; ++b) {
+                                        unsigned mask2 = reg_type_to_mask[b];
+
+                                        if (mask1 & mask2)
+                                                ra_add_reg_conflict(regs,
+                                                                base + a, base + b);
+                                }
+                        }
+                }
+
+                /* We're done setting up */
+                ra_set_finalize(regs, NULL);
+
+                compiler->regs[variant] = regs;
+       }
+}
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
index 0bf3502f41c4..4c5a7e1d3045 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
@@ -440,4 +440,5 @@ schedule_program(compiler_context *ctx)
 
         struct ra_graph *g = allocate_registers(ctx);
         install_registers(ctx, g);
+        ralloc_free(g);
 }
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c
index de8a53ce05d7..e8c0ee8d2ffc 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -37,6 +37,8 @@
 void
 panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *meta, const char *src, int type, struct panfrost_shader_state *state)
 {
+        struct pipe_context *pctx = (struct pipe_context *) ctx;
+        struct panfrost_screen *screen = pan_screen(pctx->screen);
         uint8_t *dst;
 
         nir_shader *s;
@@ -67,7 +69,7 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
                 .alpha_ref = state->alpha_state.ref_value
         };
 
-        midgard_compile_shader_nir(s, &program, false);
+        midgard_compile_shader_nir(screen->compiler, s, &program, false);
 
         /* Prepare the compiled binary for upload */
         int size = program.compiled.size;
@@ -139,4 +141,6 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
                 state->varyings[i] = v;
                 state->varyings_loc[i] = location;
         }
+
+        ralloc_free(s);
 }
diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c
index b458f42d720c..f3e0e961e1a5 100644
--- a/src/gallium/drivers/panfrost/pan_blend_shaders.c
+++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c
@@ -117,6 +117,9 @@ nir_make_options(const struct pipe_blend_state *blend, unsigned nr_cbufs)
 void
 panfrost_make_blend_shader(struct panfrost_context *ctx, struct panfrost_blend_state *cso, const struct pipe_blend_color *blend_color)
 {
+        struct pipe_context *pctx = (struct pipe_context *) ctx;
+        struct panfrost_screen *screen = pan_screen(pctx->screen);
+
         /* Build the shader */
 
         nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
@@ -152,7 +155,7 @@ panfrost_make_blend_shader(struct panfrost_context *ctx, struct panfrost_blend_s
         /* Compile the built shader */
 
         midgard_program program;
-        midgard_compile_shader_nir(shader, &program, true);
+        midgard_compile_shader_nir(screen->compiler, shader, &program, true);
 
         /* Upload the shader */
 
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
index 5d3acc0a0dd5..cf1842ae1892 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -612,6 +612,7 @@ panfrost_create_screen(int fd, struct renderonly *ro)
         screen->base.fence_reference = panfrost_fence_reference;
         screen->base.fence_finish = panfrost_fence_finish;
 
+	screen->compiler = midgard_compiler_create(screen);
 	screen->last_fragment_flushed = true;
         screen->last_job = NULL;
 
diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h
index 0660be5128c7..c1e4c90eb7fd 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -96,6 +96,8 @@ struct panfrost_screen {
          * yesterjob */
 	int last_fragment_flushed;
         struct panfrost_job *last_job;
+
+        struct midgard_compiler *compiler;
 };
 
 static inline struct panfrost_screen *
-- 
2.20.1