Mesa (master): ir3: make possible to specify branchstack up to 64

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 21 12:21:32 UTC 2021


Module: Mesa
Branch: master
Commit: 9402d5a6b5755035cd28f2a1754ef50fadca3007
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9402d5a6b5755035cd28f2a1754ef50fadca3007

Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date:   Fri Mar 26 16:39:12 2021 +0200

ir3: make possible to specify branchstack up to 64

On a6xx/a5xx there is such dependency between branchstack bitfield
and the amount of nested ifs, which could be seen with blob:

IFs   BRANCHSTACK
0	0
1	1
2	2
3	2
4	3
5	3
6	4
...
59	30
60	31
61	31
62	32
63	32
64	32

Remove open-coded branchstack for a5xx compute along the way.

Fixes tests:
 dEQP-VK.spirv_assembly.instruction.compute.float16.opvectorshuffle.344
 dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_vert
 dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.444_geom
 dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.244_tessc
 dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_frag

Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9859>

---

 src/freedreno/ci/deqp-freedreno-a630-fails.txt   |  5 -----
 src/freedreno/computerator/a6xx.c                |  2 +-
 src/freedreno/ir3/ir3_shader.h                   | 18 ++++++++++++++++++
 src/freedreno/vulkan/tu_clear_blit.c             |  4 +++-
 src/freedreno/vulkan/tu_pipeline.c               | 12 ++++++------
 src/gallium/drivers/freedreno/a5xx/fd5_compute.c |  3 +--
 src/gallium/drivers/freedreno/a5xx/fd5_program.c | 15 ++++++++-------
 src/gallium/drivers/freedreno/a6xx/fd6_compute.c |  2 +-
 src/gallium/drivers/freedreno/a6xx/fd6_program.c | 19 ++++++++++---------
 9 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
index 4a1e2fe698a..4ac5c109d29 100644
--- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt
+++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
@@ -141,7 +141,6 @@ dEQP-VK.renderpass.suballocation.attachment_allocation.input_output.7,Fail
 dEQP-VK.renderpass.suballocation.subpass_dependencies.implicit_dependencies.render_passes_5,Fail
 dEQP-VK.renderpass2.dedicated_allocation.attachment_allocation.input_output.7,Fail
 dEQP-VK.renderpass2.suballocation.attachment_allocation.input_output.7,Fail
-dEQP-VK.spirv_assembly.instruction.compute.float16.opvectorshuffle.344,Crash
 dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.denorm_clamp_denorm_preserve,Fail
 dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_dot,Fail
 dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_mmulm,Fail
@@ -151,10 +150,6 @@ dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.roundi
 dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.sign_denorm_preserve,Fail
 dEQP-VK.spirv_assembly.instruction.compute.opcopymemory.array,Fail
 dEQP-VK.spirv_assembly.instruction.compute.opquantize.infinities,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.244_tessc,Crash
-dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_frag,Crash
-dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.344_vert,Crash
-dEQP-VK.spirv_assembly.instruction.graphics.float16.opvectorshuffle.444_geom,Crash
 dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.abs_denorm_preserve_frag,Fail
 dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.abs_denorm_preserve_vert,Fail
 dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.denorm_nclamp_denorm_preserve_frag,Fail
diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c
index ffd7f74917b..e31745ac427 100644
--- a/src/freedreno/computerator/a6xx.c
+++ b/src/freedreno/computerator/a6xx.c
@@ -148,7 +148,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
                A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
                A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
                COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
-               A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack));
+               A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)));
 
    OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
    OUT_RING(ring, 0x41);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index a92917f4a36..0708b41ad7e 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -1004,4 +1004,22 @@ ir3_shader_nibo(const struct ir3_shader_variant *v)
 	return v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images;
 }
 
+static inline uint32_t
+ir3_shader_branchstack_hw(const struct ir3_shader_variant *v)
+{
+	/* Dummy shader */
+	if (!v->shader)
+		return 0;
+
+	if (v->shader->compiler->gpu_id < 500)
+		return v->branchstack;
+
+	if (v->branchstack > 0) {
+		uint32_t branchstack = v->branchstack / 2 + 1;
+		return MIN2(branchstack, v->shader->compiler->branchstack_size / 2);
+	} else {
+		return 0;
+	}
+}
+
 #endif /* IR3_SHADER_H_ */
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index fb97f36d65a..d1593cbd236 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -380,7 +380,9 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
            bool layered_clear, bool z_scale)
 {
    struct ir3_const_state dummy_const_state = {};
-   struct ir3_shader dummy_shader = {};
+   struct ir3_shader dummy_shader = {
+      .compiler = cmd->device->compiler,
+   };
 
    struct ir3_shader_variant vs = {
       .type = MESA_SHADER_VERTEX,
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index e866f8d7e00..17f54ca3fa8 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -399,7 +399,7 @@ tu6_emit_xs_config(struct tu_cs *cs,
       tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0(
                .fullregfootprint = xs->info.max_reg + 1,
                .halfregfootprint = xs->info.max_half_reg + 1,
-               .branchstack = xs->branchstack,
+               .branchstack = ir3_shader_branchstack_hw(xs),
                .mergedregs = xs->mergedregs,
       ));
       break;
@@ -407,14 +407,14 @@ tu6_emit_xs_config(struct tu_cs *cs,
       tu_cs_emit_regs(cs, A6XX_SP_HS_CTRL_REG0(
                .fullregfootprint = xs->info.max_reg + 1,
                .halfregfootprint = xs->info.max_half_reg + 1,
-               .branchstack = xs->branchstack,
+               .branchstack = ir3_shader_branchstack_hw(xs),
       ));
       break;
    case MESA_SHADER_TESS_EVAL:
       tu_cs_emit_regs(cs, A6XX_SP_DS_CTRL_REG0(
                .fullregfootprint = xs->info.max_reg + 1,
                .halfregfootprint = xs->info.max_half_reg + 1,
-               .branchstack = xs->branchstack,
+               .branchstack = ir3_shader_branchstack_hw(xs),
                .mergedregs = xs->mergedregs,
       ));
       break;
@@ -422,14 +422,14 @@ tu6_emit_xs_config(struct tu_cs *cs,
       tu_cs_emit_regs(cs, A6XX_SP_GS_CTRL_REG0(
                .fullregfootprint = xs->info.max_reg + 1,
                .halfregfootprint = xs->info.max_half_reg + 1,
-               .branchstack = xs->branchstack,
+               .branchstack = ir3_shader_branchstack_hw(xs),
       ));
       break;
    case MESA_SHADER_FRAGMENT:
       tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0(
                .fullregfootprint = xs->info.max_reg + 1,
                .halfregfootprint = xs->info.max_half_reg + 1,
-               .branchstack = xs->branchstack,
+               .branchstack = ir3_shader_branchstack_hw(xs),
                .mergedregs = xs->mergedregs,
                .threadsize = thrsz,
                .pixlodenable = xs->need_pixlod,
@@ -443,7 +443,7 @@ tu6_emit_xs_config(struct tu_cs *cs,
       tu_cs_emit_regs(cs, A6XX_SP_CS_CTRL_REG0(
                .fullregfootprint = xs->info.max_reg + 1,
                .halfregfootprint = xs->info.max_half_reg + 1,
-               .branchstack = xs->branchstack,
+               .branchstack = ir3_shader_branchstack_hw(xs),
                .mergedregs = xs->mergedregs,
                .threadsize = thrsz,
       ));
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
index 668da2e320c..c08c3b67723 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
@@ -59,8 +59,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
             A5XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
                A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
                A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
-               A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(
-                  0x3) | // XXX need to figure this out somehow..
+               A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)) |
                0x6 /* XXX */);
 
    OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
index 06b41bae097..072d432db6b 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
@@ -374,12 +374,13 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
    OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */
 
    OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1);
-   OUT_RING(ring,
-            A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
-               A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
-               0x6 | /* XXX seems to be always set? */
-               A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
-               COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+   OUT_RING(
+      ring,
+      A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
+         A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
+         0x6 | /* XXX seems to be always set? */
+         A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[VS].v)) |
+         COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
    /* If we have streamout, link against the real FS in the binning program,
     * rather than the dummy FS used for binning pass state, to ensure the
@@ -529,7 +530,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
          A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
          A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
          A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
-         A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
+         A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[FS].v)) |
          COND(s[FS].v->need_pixlod, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
    OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
index 688902e13f0..2cfe891def8 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
@@ -68,7 +68,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
                A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
                A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
                COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
-               A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack));
+               A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)));
 
    uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
    OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index d1b4e313169..97a000be9b0 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -451,11 +451,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
                     A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
 
    OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1);
-   OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
-                     A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(
-                        vs->info.max_half_reg + 1) |
-                     COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
-                     A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack));
+   OUT_RING(
+      ring,
+      A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
+         A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vs->info.max_half_reg + 1) |
+         COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
+         A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(vs)));
 
    fd6_emit_shader(ctx, ring, vs);
    fd6_emit_immediates(ctx->screen, vs, ring);
@@ -578,7 +579,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
          ring,
          A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) |
             A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) |
-            A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack));
+            A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(hs)));
 
       fd6_emit_shader(ctx, ring, hs);
       fd6_emit_immediates(ctx->screen, hs, ring);
@@ -590,7 +591,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
          A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) |
             A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) |
             COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) |
-            A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack));
+            A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(ds)));
 
       fd6_emit_shader(ctx, ring, ds);
       fd6_emit_immediates(ctx->screen, ds, ring);
@@ -732,7 +733,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
          A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) |
          A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) |
          COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) |
-         A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack) |
+         A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(fs)) |
          COND(fs->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
    OUT_PKT4(ring, REG_A6XX_VPC_VS_LAYER_CNTL, 1);
@@ -821,7 +822,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
          ring,
          A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) |
             A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) |
-            A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack));
+            A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(gs)));
 
       fd6_emit_shader(ctx, ring, gs);
       fd6_emit_immediates(ctx->screen, gs, ring);



More information about the mesa-commit mailing list