Mesa (master): turnip,freedreno/a6xx: tell hw the size of shared mem used by CS
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Feb 19 18:48:06 UTC 2021
Module: Mesa
Branch: master
Commit: 0fa7ec14734a640858e7f4047ffab78f71272ece
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0fa7ec14734a640858e7f4047ffab78f71272ece
Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date: Mon Feb 15 13:14:56 2021 +0200
turnip,freedreno/a6xx: tell hw the size of shared mem used by CS
Before, we only used 2k of shared memory.
It was found that 5 lower bits of SP_CS_UNKNOWN_A9B1 do control
the available size of shared memory for compute shaders, with
AVAILABLE_SIZE = (SP_CS_UNKNOWN_A9B1_SHARED_SIZE + 1) * 1k
up to 32k. And SP_CS_UNKNOWN_A9B1_SHARED_SIZE being zero enables
all 32k of shared memory.
Fixes tests:
dEQP-VK.rasterization.line_continuity.line-strip
dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.buffer.guard_nonlocal.workgroup.comp
dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_nonlocal.workgroup.guard_local.buffer.comp
dEQP-VK.memory_model.write_after_read.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.image.guard_nonlocal.workgroup.comp
Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9157>
---
ci-expects/freedreno/deqp-freedreno-a630-fails.txt | 4 ----
src/freedreno/.gitlab-ci/reference/crash.log | 4 ++--
src/freedreno/ir3/ir3_compiler_nir.c | 1 +
src/freedreno/ir3/ir3_shader.h | 3 +++
src/freedreno/registers/adreno/a6xx.xml | 25 +++++++++++++---------
src/freedreno/vulkan/tu_pipeline.c | 4 +++-
src/gallium/drivers/freedreno/a6xx/fd6_compute.c | 4 +++-
7 files changed, 27 insertions(+), 18 deletions(-)
diff --git a/ci-expects/freedreno/deqp-freedreno-a630-fails.txt b/ci-expects/freedreno/deqp-freedreno-a630-fails.txt
index eb6f905a3ca..f4156ff565c 100644
--- a/ci-expects/freedreno/deqp-freedreno-a630-fails.txt
+++ b/ci-expects/freedreno/deqp-freedreno-a630-fails.txt
@@ -49,9 +49,6 @@ dEQP-VK.image.subresource_layout.3d.all_levels.a8b8g8r8_snorm_pack32,Fail
dEQP-VK.image.subresource_layout.3d.all_levels.r16g16b16a16_snorm,Fail
dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp,Fail
dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.workgroup.guard_local.image.comp,Fail
-dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.buffer.guard_nonlocal.workgroup.comp,Fail
-dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_nonlocal.workgroup.guard_local.buffer.comp,Fail
-dEQP-VK.memory_model.write_after_read.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.image.guard_nonlocal.workgroup.comp,Fail
dEQP-VK.memory.requirements.dedicated_allocation.buffer.regular,Fail
dEQP-VK.memory.requirements.dedicated_allocation.image.transient_tiling_optimal,Fail
dEQP-VK.pipeline.extended_dynamic_state.after_pipelines.depth_compare_greater_equal_greater,Fail
@@ -66,7 +63,6 @@ dEQP-VK.pipeline.push_descriptor.compute.binding3_numcalls2_combined_image_sampl
dEQP-VK.pipeline.push_descriptor.compute.binding3_numcalls2_sampled_image,Crash
dEQP-VK.pipeline.push_descriptor.compute.binding3_numcalls2_sampler,Crash
dEQP-VK.pipeline.push_descriptor.compute.binding3_numcalls2_storage_image,Crash
-dEQP-VK.rasterization.line_continuity.line-strip,Fail
dEQP-VK.renderpass2.suballocation.attachment_allocation.input_output.7,Fail
dEQP-VK.spirv_assembly.instruction.compute.opquantize.infinities,Fail
dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_bit_geom,Fail
diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log
index 3ca89220612..8389724b5bd 100644
--- a/src/freedreno/.gitlab-ci/reference/crash.log
+++ b/src/freedreno/.gitlab-ci/reference/crash.log
@@ -7412,7 +7412,7 @@ clusters:
00000080 SP_FS_TEX_COUNT: 128
0000f000 SP_UNKNOWN_A9A8: 0xf000
00421800 SP_CS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = THREAD64 | VARYING }
- 0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE_2K | UNK1 = 0xf }
+ 0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 }
00000000 SP_CS_BRANCH_COND: 0
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
8c415420 SP_CS_OBJ_START: 0x8c415420
@@ -7494,7 +7494,7 @@ clusters:
00000080 SP_FS_TEX_COUNT: 128
0000f000 SP_UNKNOWN_A9A8: 0xf000
00421800 SP_CS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = THREAD64 | VARYING }
- 0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE_2K | UNK1 = 0xf }
+ 0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 }
00000000 SP_CS_BRANCH_COND: 0
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
8c415420 SP_CS_OBJ_START: 0x8c415420
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 0694bd86018..7c06bf34553 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -3413,6 +3413,7 @@ emit_instructions(struct ir3_context *ctx)
ctx->s->info.clip_distance_array_size;
ctx->so->pvtmem_size = ctx->s->scratch_size;
+ ctx->so->shared_size = ctx->s->shared_size;
/* NOTE: need to do something more clever when we support >1 fxn */
nir_foreach_register (reg, &fxn->registers) {
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index d28567b0a60..5d2aaab1550 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -559,6 +559,9 @@ struct ir3_shader_variant {
/* Whether we should use the new per-wave layout rather than per-fiber. */
bool pvtmem_per_wave;
+ /* Size in bytes of required shared memory */
+ unsigned shared_size;
+
/* About Linkage:
* + Let the frag shader determine the position/compmask for the
* varyings, since it is the place where we know if the varying
diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml
index 6a50378ad2b..f1bac710944 100644
--- a/src/freedreno/registers/adreno/a6xx.xml
+++ b/src/freedreno/registers/adreno/a6xx.xml
@@ -3059,17 +3059,22 @@ to upconvert to 32b float internally?
<reg32 offset="0xa9b0" name="SP_CS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
- <!-- set for compute shaders, always 0x41 -->
+
+ <!-- set for compute shaders -->
<reg32 offset="0xa9b1" name="SP_CS_UNKNOWN_A9B1">
- <doc>
- bit 0 seems to toggle between 2k and 32k of shared storage
- the ldl/stl offset seems to be rewritten to 0 when it is beyond
- this limit. This is different from ldlw/stlw, which wraps at
- 64k (and has 36k of storage on A640 - reads between 36k-64k
- always return 0)
- </doc>
- <bitfield name="SHARED_SIZE_2K" pos="0" type="boolean"/>
- <bitfield name="UNK1" low="1" high="6" type="uint"/>
+ <bitfield name="SHARED_SIZE" low="0" high="4" type="uint">
+ <doc>
+ If 0 - all 32k of shared storage is enabled, otherwise
+ (SHARED_SIZE + 1) * 1k is enabled.
+ The ldl/stl offset seems to be rewritten to 0 when it is beyond
+ this limit. This is different from ldlw/stlw, which wraps at
+ 64k (and has 36k of storage on A640 - reads between 36k-64k
+ always return 0)
+ </doc>
+ </bitfield>
+ <bitfield name="UNK5" pos="5" type="boolean"/>
+ <!-- always 1 ? -->
+ <bitfield name="UNK6" pos="6" type="boolean"/>
</reg32>
<reg32 offset="0xa9b2" name="SP_CS_BRANCH_COND" type="hex"/>
<reg32 offset="0xa9b3" name="SP_CS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index 3135311a108..1389f8ac134 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -532,8 +532,10 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
+ uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
- tu_cs_emit(cs, 0x41);
+ tu_cs_emit(cs, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
+ A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
uint32_t local_invocation_id =
ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
index 909b6780ebc..e6b42a84cc4 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
@@ -77,8 +77,10 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) |
COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
+ uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
- OUT_RING(ring, 0x41);
+ OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
+ A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
uint32_t local_invocation_id, work_group_id;
local_invocation_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
More information about the mesa-commit
mailing list