Mesa (main): zink: move all 64-32bit shader load rewriting to nir pass
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Oct 27 17:23:38 UTC 2021
Module: Mesa
Branch: main
Commit: 150d6ee97e374b5f520fc1ec3817a8a09c4b80fc
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=150d6ee97e374b5f520fc1ec3817a8a09c4b80fc
Author: Mike Blumenkrantz <michael.blumenkrantz at gmail.com>
Date: Wed Oct 20 10:02:08 2021 -0400
zink: move all 64-32bit shader load rewriting to nir pass
this also enables natural 64bit loads on drivers that support it
Reviewed-by: Dave Airlie <airlied at redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13484>
---
.../drivers/zink/nir_to_spirv/nir_to_spirv.c | 61 ++++------------------
src/gallium/drivers/zink/zink_compiler.c | 54 ++++++++++++++++---
2 files changed, 57 insertions(+), 58 deletions(-)
diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
index e3b8a27a42b..30f4b6bd267 100644
--- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
+++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
@@ -49,10 +49,10 @@ struct ntv_context {
gl_shader_stage stage;
const struct zink_so_info *so_info;
- SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][3]; //8, 16, 32
+ SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][5]; //8, 16, 32, unused, 64
nir_variable *ubo_vars[PIPE_MAX_CONSTANT_BUFFERS];
- SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][3]; //8, 16, 32
+ SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][5]; //8, 16, 32, unused, 64
nir_variable *ssbo_vars[PIPE_MAX_SHADER_BUFFERS];
SpvId image_types[PIPE_MAX_SAMPLERS];
SpvId images[PIPE_MAX_SAMPLERS];
@@ -1915,9 +1915,9 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
bool ssbo = intr->intrinsic == nir_intrinsic_load_ssbo;
assert(const_block_index); // no dynamic indexing for now
- unsigned idx = 0;
unsigned bit_size = nir_dest_bit_size(intr->dest);
- idx = MIN2(bit_size, 32) >> 4;
+ assert(bit_size <= 64);
+ unsigned idx = bit_size >> 4;
if (ssbo) {
assert(idx < ARRAY_SIZE(ctx->ssbos[0]));
if (!ctx->ssbos[const_block_index->u32][idx])
@@ -1928,15 +1928,12 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
emit_bo(ctx, ctx->ubo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest));
}
SpvId bo = ssbo ? ctx->ssbos[const_block_index->u32][idx] : ctx->ubos[const_block_index->u32][idx];
- SpvId uint_type = get_uvec_type(ctx, MIN2(bit_size, 32), 1);
+ SpvId uint_type = get_uvec_type(ctx, bit_size, 1);
SpvId one = emit_uint_const(ctx, 32, 1);
/* number of components being loaded */
unsigned num_components = nir_dest_num_components(intr->dest);
- /* we need to grab 2x32 to fill the 64bit value */
- if (bit_size == 64)
- num_components *= 2;
- SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2];
+ SpvId constituents[NIR_MAX_VEC_COMPONENTS];
SpvId result;
/* destination type for the load */
@@ -1950,7 +1947,7 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
/* our generated uniform has a memory layout like
*
* struct {
- * uint base[array_size];
+ * uintN base[array_size];
* };
*
* first, access 'base'
@@ -1983,18 +1980,6 @@ emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr)
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one);
}
- /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values
- * by creating uvec2 composites and bitcasting them to u64 values
- */
- if (bit_size == 64) {
- num_components /= 2;
- type = get_uvec_type(ctx, 64, num_components);
- SpvId u64_type = get_uvec_type(ctx, 64, 1);
- for (unsigned i = 0; i < num_components; i++) {
- constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2);
- constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]);
- }
- }
/* if loading more than 1 value, reassemble the results into the desired type,
* otherwise just use the loaded result
*/
@@ -2194,7 +2179,6 @@ emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint);
unsigned num_components = nir_dest_num_components(intr->dest);
unsigned bit_size = nir_dest_bit_size(intr->dest);
- bool qword = bit_size == 64;
SpvId uint_type = get_uvec_type(ctx, 32, 1);
SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder,
SpvStorageClassWorkgroup,
@@ -2203,17 +2187,10 @@ emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
SpvId constituents[NIR_MAX_VEC_COMPONENTS];
/* need to convert array -> vec */
for (unsigned i = 0; i < num_components; i++) {
- SpvId parts[2];
- for (unsigned j = 0; j < 1 + !!qword; j++) {
- SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
- ctx->shared_block_var, &offset, 1);
- parts[j] = spirv_builder_emit_load(&ctx->builder, uint_type, member);
- offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1));
- }
- if (qword)
- constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 64, 1), parts, 2);
- else
- constituents[i] = parts[0];
+ SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type,
+ ctx->shared_block_var, &offset, 1);
+ constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member);
+ offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1));
}
SpvId result;
if (num_components > 1)
@@ -2258,15 +2235,11 @@ emit_store_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr)
static void
emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
{
- unsigned bit_size = nir_dest_bit_size(intr->dest);
SpvId uint_type = get_uvec_type(ctx, 32, 1);
SpvId load_type = get_uvec_type(ctx, 32, 1);
/* number of components being loaded */
unsigned num_components = nir_dest_num_components(intr->dest);
- /* we need to grab 2x32 to fill the 64bit value */
- if (bit_size == 64)
- num_components *= 2;
SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2];
SpvId result;
@@ -2298,18 +2271,6 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one);
}
- /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values
- * by creating uvec2 composites and bitcasting them to u64 values
- */
- if (bit_size == 64) {
- num_components /= 2;
- type = get_uvec_type(ctx, 64, num_components);
- SpvId u64_type = get_uvec_type(ctx, 64, 1);
- for (unsigned i = 0; i < num_components; i++) {
- constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2);
- constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]);
- }
- }
/* if loading more than 1 value, reassemble the results into the desired type,
* otherwise just use the loaded result
*/
diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index be08bf811aa..73443e27933 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -640,16 +640,54 @@ decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decompose
static bool
rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
{
+ struct zink_screen *screen = data;
+ const bool has_int64 = screen->info.feats.features.shaderInt64;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ b->cursor = nir_before_instr(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_ssbo:
- case nir_intrinsic_load_ubo:
- case nir_intrinsic_load_ubo_vec4:
- b->cursor = nir_before_instr(instr);
- nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, MIN2(nir_dest_bit_size(intr->dest), 32) / 8));
+ case nir_intrinsic_load_ubo: {
+ /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
+ bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
+ nir_src_as_uint(intr->src[0]) == 0 &&
+ nir_dest_bit_size(intr->dest) == 64 &&
+ nir_intrinsic_align_offset(intr) % 8 != 0;
+ nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa,
+ (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8));
+ /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
+ if (force_2x32 || (nir_dest_bit_size(intr->dest) == 64 && !has_int64)) {
+ /* this is always scalarized */
+ assert(intr->dest.ssa.num_components == 1);
+ /* rewrite as 2x32 */
+ nir_ssa_def *load;
+ if (intr->intrinsic == nir_intrinsic_load_ssbo)
+ load = nir_load_ssbo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0);
+ else
+ load = nir_load_ubo(b, 2, 32, intr->src[0].ssa, intr->src[1].ssa, .align_mul = 4, .align_offset = 0, .range = 4);
+ nir_intrinsic_set_access(nir_instr_as_intrinsic(load->parent_instr), nir_intrinsic_access(intr));
+ /* cast back to 64bit */
+ nir_ssa_def *casted = nir_pack_64_2x32(b, load);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
+ nir_instr_remove(instr);
+ }
return true;
+ }
+ case nir_intrinsic_load_shared:
+ /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
+ if (nir_dest_bit_size(intr->dest) == 64 && !has_int64) {
+ /* this is always scalarized */
+ assert(intr->dest.ssa.num_components == 1);
+ /* rewrite as 2x32 */
+ nir_ssa_def *load = nir_load_shared(b, 2, 32, intr->src[0].ssa, .align_mul = 4, .align_offset = 0);
+ /* cast back to 64bit */
+ nir_ssa_def *casted = nir_pack_64_2x32(b, load);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
+ nir_instr_remove(instr);
+ return true;
+ }
+ break;
case nir_intrinsic_store_ssbo:
default:
break;
@@ -658,9 +696,9 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
}
static bool
-rewrite_bo_access(nir_shader *shader)
+rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
{
- return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, NULL);
+ return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
}
static void
@@ -900,7 +938,7 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad
}
if (screen->driconf.inline_uniforms) {
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
- NIR_PASS_V(nir, rewrite_bo_access);
+ NIR_PASS_V(nir, rewrite_bo_access, screen);
}
if (inlined_uniforms) {
optimize_nir(nir);
@@ -1417,7 +1455,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
/* run in compile if there could be inlined uniforms */
if (!screen->driconf.inline_uniforms) {
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
- NIR_PASS_V(nir, rewrite_bo_access);
+ NIR_PASS_V(nir, rewrite_bo_access, screen);
}
if (zink_debug & ZINK_DEBUG_NIR) {
More information about the mesa-commit
mailing list