Mesa (main): radv: allow holes in inline push constants
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Apr 12 12:09:12 UTC 2022
Module: Mesa
Branch: main
Commit: 7f6262bb85cbe39472f2b26d812629368a0eae3b
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7f6262bb85cbe39472f2b26d812629368a0eae3b
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Thu Jul 29 16:47:44 2021 +0100
radv: allow holes in inline push constants
Use a dword mask instead of a range to track which push constants to
inline.
fossil-db (Sienna Cichlid):
Totals from 5724 (4.25% of 134621) affected shaders:
CodeSize: 20894044 -> 20815748 (-0.37%); split: -0.39%, +0.02%
Instrs: 4002568 -> 3988385 (-0.35%); split: -0.38%, +0.02%
Latency: 29285060 -> 29224414 (-0.21%); split: -0.22%, +0.01%
InvThroughput: 5529700 -> 5526893 (-0.05%); split: -0.05%, +0.00%
VClause: 78093 -> 78240 (+0.19%); split: -0.23%, +0.41%
SClause: 135495 -> 131027 (-3.30%); split: -3.30%, +0.00%
Copies: 330856 -> 324552 (-1.91%); split: -2.37%, +0.46%
PreSGPRs: 226031 -> 224778 (-0.55%); split: -0.61%, +0.05%
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12145>
---
src/amd/.clang-format | 3 +-
src/amd/common/ac_shader_args.h | 2 +-
src/amd/compiler/aco_instruction_selection.cpp | 13 +++---
src/amd/llvm/ac_nir_to_llvm.c | 22 ++++------
src/amd/vulkan/radv_cmd_buffer.c | 21 ++++++++--
src/amd/vulkan/radv_pipeline.c | 6 +++
src/amd/vulkan/radv_shader.c | 1 +
src/amd/vulkan/radv_shader.h | 6 +--
src/amd/vulkan/radv_shader_args.c | 57 +++++++++++---------------
src/amd/vulkan/radv_shader_info.c | 26 +++++-------
10 files changed, 80 insertions(+), 77 deletions(-)
diff --git a/src/amd/.clang-format b/src/amd/.clang-format
index 6cf07d558fc..db9ecef76ce 100644
--- a/src/amd/.clang-format
+++ b/src/amd/.clang-format
@@ -52,7 +52,8 @@ ForEachMacros:
- nir_foreach_variable_in_list
- nir_foreach_src
- foreach_two_lists
- - foreach_bit
+ - u_foreach_bit
+ - u_foreach_bit64
- foreach_sched_node
- foreach_src
- foreach_src_n
diff --git a/src/amd/common/ac_shader_args.h b/src/amd/common/ac_shader_args.h
index e0430efe015..e8d743cd961 100644
--- a/src/amd/common/ac_shader_args.h
+++ b/src/amd/common/ac_shader_args.h
@@ -139,7 +139,7 @@ struct ac_shader_args {
/* Vulkan only */
struct ac_arg push_constants;
struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
- unsigned base_inline_push_consts;
+ uint64_t inline_push_const_mask;
struct ac_arg view_index;
struct ac_arg sbt_descriptors;
struct ac_arg ray_launch_size;
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index cc30bfbdcc6..e29817b8450 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5501,18 +5501,17 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr)
nir_const_value* index_cv = nir_src_as_const_value(instr->src[0]);
if (index_cv && instr->dest.ssa.bit_size == 32) {
- const struct radv_userdata_info *loc =
- &ctx->program->info->user_sgprs_locs.shader_data[AC_UD_INLINE_PUSH_CONSTANTS];
unsigned start = (offset + index_cv->u32) / 4u;
- unsigned num_inline_push_consts = loc->sgpr_idx != -1 ? loc->num_sgprs : 0;
-
- start -= ctx->program->info->min_push_constant_used / 4;
- if (start + count <= num_inline_push_consts) {
+ uint64_t mask = BITFIELD64_MASK(count) << start;
+ if ((ctx->args->ac.inline_push_const_mask | mask) == ctx->args->ac.inline_push_const_mask &&
+ start + count <= (sizeof(ctx->args->ac.inline_push_const_mask) * 8u)) {
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, count, 1)};
+ unsigned arg_index =
+ util_bitcount64(ctx->args->ac.inline_push_const_mask & BITFIELD64_MASK(start));
for (unsigned i = 0; i < count; ++i) {
- elems[i] = get_arg(ctx, ctx->args->ac.inline_push_consts[start + i]);
+ elems[i] = get_arg(ctx, ctx->args->ac.inline_push_consts[arg_index++]);
vec->operands[i] = Operand{elems[i]};
}
vec->definitions[0] = Definition(dst);
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 1789073b172..1b3caabb6ad 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -1662,19 +1662,15 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_int
offset += LLVMConstIntGetZExtValue(src0);
offset /= 4;
- offset -= ctx->args->base_inline_push_consts;
-
- unsigned num_inline_push_consts = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(ctx->args->inline_push_consts); i++) {
- if (ctx->args->inline_push_consts[i].used)
- num_inline_push_consts++;
- }
-
- if (offset + count <= num_inline_push_consts) {
- LLVMValueRef *const push_constants = alloca(num_inline_push_consts * sizeof(LLVMValueRef));
- for (unsigned i = 0; i < num_inline_push_consts; i++)
- push_constants[i] = ac_get_arg(&ctx->ac, ctx->args->inline_push_consts[i]);
- return ac_build_gather_values(&ctx->ac, push_constants + offset, count);
+ uint64_t mask = BITFIELD64_MASK(count) << offset;
+ if ((ctx->args->inline_push_const_mask | mask) == ctx->args->inline_push_const_mask &&
+ offset + count <= (sizeof(ctx->args->inline_push_const_mask) * 8u)) {
+ LLVMValueRef *const push_constants = alloca(count * sizeof(LLVMValueRef));
+ unsigned arg_index =
+ util_bitcount64(ctx->args->inline_push_const_mask & BITFIELD64_MASK(offset));
+ for (unsigned i = 0; i < count; i++)
+ push_constants[i] = ac_get_arg(&ctx->ac, ctx->args->inline_push_consts[arg_index++]);
+ return ac_build_gather_values(&ctx->ac, push_constants, count);
}
}
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index bedc13ce164..8e0f1493fc7 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -38,6 +38,7 @@
#include "vk_common_entrypoints.h"
#include "ac_debug.h"
+#include "ac_shader_args.h"
#include "util/fast_idiv_by_const.h"
@@ -3277,10 +3278,24 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag
need_push_constants |= radv_shader_loads_push_constants(pipeline, stage);
- uint8_t base = shader->info.min_push_constant_used / 4;
+ uint64_t mask = shader->info.inline_push_constant_mask;
+ if (!mask)
+ continue;
- radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS,
- (uint32_t *)cmd_buffer->push_constants + base);
+ uint8_t base = ffs(mask) - 1;
+ if (mask == u_bit_consecutive64(base, util_last_bit64(mask) - base)) {
+ /* consecutive inline push constants */
+ radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS,
+ (uint32_t *)cmd_buffer->push_constants + base);
+ } else {
+ /* sparse inline push constants */
+ uint32_t consts[AC_MAX_INLINE_PUSH_CONSTS];
+ unsigned num_consts = 0;
+ u_foreach_bit64 (idx, mask)
+ consts[num_consts++] = ((uint32_t *)cmd_buffer->push_constants)[idx];
+ radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS,
+ consts);
+ }
}
if (need_push_constants) {
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 69efd890996..c93123d7ea2 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3408,6 +3408,8 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
MESA_SHADER_TESS_CTRL, true, MESA_SHADER_VERTEX,
&stages[MESA_SHADER_TESS_CTRL].args);
stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
+ stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask =
+ stages[MESA_SHADER_TESS_CTRL].args.ac.inline_push_const_mask;
stages[MESA_SHADER_VERTEX].args = stages[MESA_SHADER_TESS_CTRL].args;
active_stages &= ~(1 << MESA_SHADER_VERTEX);
@@ -3420,6 +3422,8 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
radv_declare_shader_args(chip_class, pipeline_key, &stages[MESA_SHADER_GEOMETRY].info,
MESA_SHADER_GEOMETRY, true, pre_stage, &stages[MESA_SHADER_GEOMETRY].args);
stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
+ stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask =
+ stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask;
stages[pre_stage].args = stages[MESA_SHADER_GEOMETRY].args;
active_stages &= ~(1 << pre_stage);
@@ -3430,6 +3434,7 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
radv_declare_shader_args(chip_class, pipeline_key, &stages[i].info, i, false, MESA_SHADER_VERTEX,
&stages[i].args);
stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs;
+ stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask;
}
}
@@ -4474,6 +4479,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
radv_declare_shader_args(device->physical_device->rad_info.chip_class, pipeline_key, &info,
MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX, &gs_copy_args);
info.user_sgprs_locs = gs_copy_args.user_sgprs_locs;
+ info.inline_push_constant_mask = gs_copy_args.ac.inline_push_const_mask;
pipeline->gs_copy_shader = radv_create_gs_copy_shader(
device, stages[MESA_SHADER_GEOMETRY].nir, &info, &gs_copy_args, &gs_copy_binary,
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index c7e2c1bde08..b1cdbb3d194 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -2167,6 +2167,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
key->next_stage != MESA_SHADER_VERTEX, MESA_SHADER_VERTEX, &args);
info.user_sgprs_locs = args.user_sgprs_locs;
+ info.inline_push_constant_mask = args.ac.inline_push_const_mask;
#ifdef LLVM_AVAILABLE
if (options.dump_shader || options.record_ir)
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 6ceff1e0757..6cccc10e34d 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -229,12 +229,10 @@ struct gfx10_ngg_info {
};
struct radv_shader_info {
+ uint64_t inline_push_constant_mask;
+ bool can_inline_all_push_constants;
bool loads_push_constants;
bool loads_dynamic_offsets;
- uint16_t min_push_constant_used;
- uint16_t max_push_constant_used;
- bool has_only_32bit_push_constants;
- bool has_indirect_push_constants;
uint32_t desc_set_used_mask;
bool uses_view_index;
bool uses_invocation_id;
diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c
index e9b3d8e9a0c..6e3c56558c5 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -68,10 +68,10 @@ set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
}
struct user_sgpr_info {
+ uint64_t inline_push_constant_mask;
+ bool inlined_all_push_consts;
bool indirect_all_descriptor_sets;
uint8_t remaining_sgprs;
- unsigned num_inline_push_consts;
- bool inlined_all_push_consts;
};
static uint8_t
@@ -119,32 +119,29 @@ allocate_inline_push_consts(const struct radv_shader_info *info,
{
uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
- /* Only supported if shaders use push constants. */
- if (info->min_push_constant_used == UINT16_MAX)
+ if (!info->inline_push_constant_mask)
return;
- uint8_t num_push_consts =
- (info->max_push_constant_used - info->min_push_constant_used) / 4;
+ uint64_t mask = info->inline_push_constant_mask;
+ uint8_t num_push_consts = util_bitcount64(mask);
- /* Check if the number of user SGPRs is large enough. */
- if (num_push_consts < remaining_sgprs) {
- user_sgpr_info->num_inline_push_consts = num_push_consts;
+ /* Disable the default push constants path if all constants can be inlined and if shaders don't
+ * use dynamic descriptors.
+ */
+ if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) &&
+ info->can_inline_all_push_constants && !info->loads_dynamic_offsets) {
+ user_sgpr_info->inlined_all_push_consts = true;
+ remaining_sgprs++;
} else {
- user_sgpr_info->num_inline_push_consts = remaining_sgprs;
+ /* Clamp to the maximum number of allowed inlined push constants. */
+ while (num_push_consts > MIN2(remaining_sgprs, AC_MAX_INLINE_PUSH_CONSTS)) {
+ num_push_consts--;
+ mask &= ~BITFIELD64_BIT(util_last_bit64(mask) - 1);
+ }
}
- /* Clamp to the maximum number of allowed inlined push constants. */
- if (user_sgpr_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
- user_sgpr_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
-
- if (user_sgpr_info->num_inline_push_consts == num_push_consts &&
- info->has_only_32bit_push_constants && !info->has_indirect_push_constants &&
- !info->loads_dynamic_offsets) {
- /* Disable the default push constants path if all constants are
- * inlined and if shaders don't use dynamic descriptors.
- */
- user_sgpr_info->inlined_all_push_consts = true;
- }
+ user_sgpr_info->remaining_sgprs = remaining_sgprs - util_bitcount64(mask);
+ user_sgpr_info->inline_push_constant_mask = mask;
}
static void
@@ -252,10 +249,10 @@ declare_global_input_sgprs(const struct radv_shader_info *info,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
}
- for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) {
+ for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]);
}
- args->ac.base_inline_push_consts = info->min_push_constant_used / 4;
+ args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask;
if (info->so.num_outputs) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers);
@@ -451,8 +448,6 @@ static void
set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
uint8_t *user_sgpr_idx)
{
- unsigned num_inline_push_consts = 0;
-
if (!user_sgpr_info->indirect_all_descriptor_sets) {
for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) {
if (args->descriptor_sets[i].used)
@@ -466,13 +461,9 @@ set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info
set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
}
- for (unsigned i = 0; i < ARRAY_SIZE(args->ac.inline_push_consts); i++) {
- if (args->ac.inline_push_consts[i].used)
- num_inline_push_consts++;
- }
-
- if (num_inline_push_consts) {
- set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, num_inline_push_consts);
+ if (user_sgpr_info->inline_push_constant_mask) {
+ set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
+ util_bitcount64(user_sgpr_info->inline_push_constant_mask));
}
if (args->streamout_buffers.used) {
diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c
index 4196d97fa37..baef1dc6f4a 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -96,22 +96,19 @@ static void
gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
struct radv_shader_info *info)
{
- int base = nir_intrinsic_base(instr);
+ info->loads_push_constants = true;
- if (!nir_src_is_const(instr->src[0])) {
- info->has_indirect_push_constants = true;
- } else {
- uint32_t min = base + nir_src_as_uint(instr->src[0]);
- uint32_t max = min + instr->num_components * 4;
+ if (nir_src_is_const(instr->src[0]) && instr->dest.ssa.bit_size == 32) {
+ uint32_t start = (nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[0])) / 4u;
+ uint32_t size = instr->num_components * (instr->dest.ssa.bit_size / 32u);
- info->max_push_constant_used = MAX2(max, info->max_push_constant_used);
- info->min_push_constant_used = MIN2(min, info->min_push_constant_used);
+ if (start + size <= (MAX_PUSH_CONSTANTS_SIZE / 4u)) {
+ info->inline_push_constant_mask |= u_bit_consecutive64(start, size);
+ return;
+ }
}
- if (instr->dest.ssa.bit_size != 32)
- info->has_only_32bit_push_constants = false;
-
- info->loads_push_constants = true;
+ info->can_inline_all_push_constants = false;
}
static void
@@ -621,9 +618,8 @@ assign_outinfo_params(struct radv_vs_output_info *outinfo, uint64_t mask,
void
radv_nir_shader_info_init(struct radv_shader_info *info)
{
- /* Assume that shaders only have 32-bit push constants by default. */
- info->min_push_constant_used = UINT16_MAX;
- info->has_only_32bit_push_constants = true;
+ /* Assume that shaders can inline all push constants by default. */
+ info->can_inline_all_push_constants = true;
}
void
More information about the mesa-commit
mailing list