Mesa (main): radv/llvm: rework VS input loads and implement the callback
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Sep 3 08:32:47 UTC 2021
Module: Mesa
Branch: main
Commit: ad878856e6b06289a42c6eb2505153a24110e812
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ad878856e6b06289a42c6eb2505153a24110e812
Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date: Thu Sep 2 09:05:07 2021 +0200
radv/llvm: rework VS input loads and implement the callback
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12693>
---
src/amd/vulkan/radv_nir_to_llvm.c | 260 +++++++++++++++++++++-----------------
1 file changed, 146 insertions(+), 114 deletions(-)
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 13e677a2a6f..c1736c028b3 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -693,157 +693,188 @@ radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx, LLVMValueRef va
}
static void
-handle_vs_input_decl(struct radv_shader_context *ctx, struct nir_variable *variable)
+load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTypeRef dest_type,
+ LLVMValueRef out[4])
{
LLVMValueRef t_list_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.vertex_buffers);
LLVMValueRef t_offset;
LLVMValueRef t_list;
LLVMValueRef input;
LLVMValueRef buffer_index;
- unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
-
- enum glsl_base_type type = glsl_get_base_type(variable->type);
- for (unsigned i = 0; i < attrib_count; ++i) {
- LLVMValueRef output[4];
- unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
- unsigned attrib_format = ctx->args->options->key.vs.vertex_attribute_formats[attrib_index];
- unsigned data_format = attrib_format & 0x0f;
- unsigned num_format = (attrib_format >> 4) & 0x07;
- bool is_float =
- num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
- uint8_t input_usage_mask =
- ctx->args->shader_info->vs.input_usage_mask[variable->data.location + i];
- unsigned num_input_channels = util_last_bit(input_usage_mask);
-
- if (num_input_channels == 0)
- continue;
-
- if (ctx->args->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
- uint32_t divisor = ctx->args->options->key.vs.instance_rate_divisors[attrib_index];
-
- if (divisor) {
- buffer_index = ctx->abi.instance_id;
-
- if (divisor != 1) {
- buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
- LLVMConstInt(ctx->ac.i32, divisor, 0), "");
- }
- } else {
- buffer_index = ctx->ac.i32_0;
+ unsigned attrib_index = driver_location - VERT_ATTRIB_GENERIC0;
+ unsigned attrib_format = ctx->args->options->key.vs.vertex_attribute_formats[attrib_index];
+ unsigned data_format = attrib_format & 0x0f;
+ unsigned num_format = (attrib_format >> 4) & 0x07;
+ bool is_float =
+ num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
+ uint8_t input_usage_mask =
+ ctx->args->shader_info->vs.input_usage_mask[driver_location];
+ unsigned num_input_channels = util_last_bit(input_usage_mask);
+
+ if (ctx->args->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
+ uint32_t divisor = ctx->args->options->key.vs.instance_rate_divisors[attrib_index];
+
+ if (divisor) {
+ buffer_index = ctx->abi.instance_id;
+
+ if (divisor != 1) {
+ buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
+ LLVMConstInt(ctx->ac.i32, divisor, 0), "");
}
-
- buffer_index = LLVMBuildAdd(
- ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->ac.start_instance), buffer_index, "");
} else {
- buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
- ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex), "");
+ buffer_index = ctx->ac.i32_0;
}
- const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
-
- /* Adjust the number of channels to load based on the vertex
- * attribute format.
- */
- unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
- unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
- unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
- unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
- unsigned alpha_adjust = ctx->args->options->key.vs.alpha_adjust[attrib_index];
-
- if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
- /* Always load, at least, 3 channels for formats that
- * need to be shuffled because X<->Z.
- */
- num_channels = MAX2(num_channels, 3);
- }
+ buffer_index = LLVMBuildAdd(
+ ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->ac.start_instance), buffer_index, "");
+ } else {
+ buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
+ ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex), "");
+ }
- unsigned desc_index =
- ctx->args->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding;
- desc_index = util_bitcount(ctx->args->shader_info->vs.vb_desc_usage_mask &
- u_bit_consecutive(0, desc_index));
- t_offset = LLVMConstInt(ctx->ac.i32, desc_index, false);
- t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
-
- /* Always split typed vertex buffer loads on GFX6 and GFX10+
- * to avoid any alignment issues that triggers memory
- * violations and eventually a GPU hang. This can happen if
- * the stride (static or dynamic) is unaligned and also if the
- * VBO offset is aligned to a scalar (eg. stride is 8 and VBO
- * offset is 2 for R16G16B16A16_SNORM).
- */
- if (ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) {
- unsigned chan_format = vtx_info->chan_format;
- LLVMValueRef values[4];
+ const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
- assert(ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10);
+ /* Adjust the number of channels to load based on the vertex attribute format. */
+ unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
+ unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
+ unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
+ unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
+ unsigned alpha_adjust = ctx->args->options->key.vs.alpha_adjust[attrib_index];
- for (unsigned chan = 0; chan < num_channels; chan++) {
- unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
- LLVMValueRef chan_index = buffer_index;
+ if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
+ /* Always load, at least, 3 channels for formats that need to be shuffled because X<->Z. */
+ num_channels = MAX2(num_channels, 3);
+ }
- if (attrib_stride != 0 && chan_offset > attrib_stride) {
- LLVMValueRef buffer_offset =
- LLVMConstInt(ctx->ac.i32, chan_offset / attrib_stride, false);
+ unsigned desc_index =
+ ctx->args->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding;
+ desc_index = util_bitcount(ctx->args->shader_info->vs.vb_desc_usage_mask &
+ u_bit_consecutive(0, desc_index));
+ t_offset = LLVMConstInt(ctx->ac.i32, desc_index, false);
+ t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
- chan_index = LLVMBuildAdd(ctx->ac.builder, buffer_index, buffer_offset, "");
+ /* Always split typed vertex buffer loads on GFX6 and GFX10+ to avoid any alignment issues that
+ * triggers memory violations and eventually a GPU hang. This can happen if the stride (static or
+ * dynamic) is unaligned and also if the VBO offset is aligned to a scalar (eg. stride is 8 and
+ * VBO offset is 2 for R16G16B16A16_SNORM).
+ */
+ if (ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) {
+ unsigned chan_format = vtx_info->chan_format;
+ LLVMValueRef values[4];
- chan_offset = chan_offset % attrib_stride;
- }
+ assert(ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10);
- values[chan] = ac_build_struct_tbuffer_load(
- &ctx->ac, t_list, chan_index, LLVMConstInt(ctx->ac.i32, chan_offset, false),
- ctx->ac.i32_0, ctx->ac.i32_0, 1, chan_format, num_format, 0, true);
- }
+ for (unsigned chan = 0; chan < num_channels; chan++) {
+ unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
+ LLVMValueRef chan_index = buffer_index;
- input = ac_build_gather_values(&ctx->ac, values, num_channels);
- } else {
- if (attrib_stride != 0 && attrib_offset > attrib_stride) {
+ if (attrib_stride != 0 && chan_offset > attrib_stride) {
LLVMValueRef buffer_offset =
- LLVMConstInt(ctx->ac.i32, attrib_offset / attrib_stride, false);
+ LLVMConstInt(ctx->ac.i32, chan_offset / attrib_stride, false);
- buffer_index = LLVMBuildAdd(ctx->ac.builder, buffer_index, buffer_offset, "");
+ chan_index = LLVMBuildAdd(ctx->ac.builder, buffer_index, buffer_offset, "");
- attrib_offset = attrib_offset % attrib_stride;
+ chan_offset = chan_offset % attrib_stride;
}
- input = ac_build_struct_tbuffer_load(
- &ctx->ac, t_list, buffer_index, LLVMConstInt(ctx->ac.i32, attrib_offset, false),
- ctx->ac.i32_0, ctx->ac.i32_0, num_channels, data_format, num_format, 0, true);
+ values[chan] = ac_build_struct_tbuffer_load(
+ &ctx->ac, t_list, chan_index, LLVMConstInt(ctx->ac.i32, chan_offset, false),
+ ctx->ac.i32_0, ctx->ac.i32_0, 1, chan_format, num_format, 0, true);
}
- if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
- LLVMValueRef c[4];
- c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
- c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
- c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
- c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
+ input = ac_build_gather_values(&ctx->ac, values, num_channels);
+ } else {
+ if (attrib_stride != 0 && attrib_offset > attrib_stride) {
+ LLVMValueRef buffer_offset =
+ LLVMConstInt(ctx->ac.i32, attrib_offset / attrib_stride, false);
+
+ buffer_index = LLVMBuildAdd(ctx->ac.builder, buffer_index, buffer_offset, "");
- input = ac_build_gather_values(&ctx->ac, c, 4);
+ attrib_offset = attrib_offset % attrib_stride;
}
- input = radv_fixup_vertex_input_fetches(ctx, input, num_channels, is_float);
+ input = ac_build_struct_tbuffer_load(
+ &ctx->ac, t_list, buffer_index, LLVMConstInt(ctx->ac.i32, attrib_offset, false),
+ ctx->ac.i32_0, ctx->ac.i32_0, num_channels, data_format, num_format, 0, true);
+ }
+
+ if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
+ LLVMValueRef c[4];
+ c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
+ c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
+ c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
+ c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
- for (unsigned chan = 0; chan < 4; chan++) {
- LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
- output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
- if (type == GLSL_TYPE_FLOAT16) {
- output[chan] = LLVMBuildBitCast(ctx->ac.builder, output[chan], ctx->ac.f32, "");
- output[chan] = LLVMBuildFPTrunc(ctx->ac.builder, output[chan], ctx->ac.f16, "");
- }
+ input = ac_build_gather_values(&ctx->ac, c, 4);
+ }
+
+ input = radv_fixup_vertex_input_fetches(ctx, input, num_channels, is_float);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
+ out[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
+ if (dest_type == ctx->ac.f16) {
+ out[chan] = LLVMBuildBitCast(ctx->ac.builder, out[chan], ctx->ac.f32, "");
+ out[chan] = LLVMBuildFPTrunc(ctx->ac.builder, out[chan], ctx->ac.f16, "");
}
+ }
- output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]);
+ out[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, out[3]);
- for (unsigned chan = 0; chan < 4; chan++) {
- output[chan] = ac_to_integer(&ctx->ac, output[chan]);
- if (type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16)
- output[chan] = LLVMBuildTrunc(ctx->ac.builder, output[chan], ctx->ac.i16, "");
+ for (unsigned chan = 0; chan < 4; chan++) {
+ out[chan] = ac_to_integer(&ctx->ac, out[chan]);
+ if (dest_type == ctx->ac.i16)
+ out[chan] = LLVMBuildTrunc(ctx->ac.builder, out[chan], ctx->ac.i16, "");
+ }
+}
+
+static void
+handle_vs_input_decl(struct radv_shader_context *ctx, struct nir_variable *variable)
+{
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
+ enum glsl_base_type var_type = glsl_get_base_type(variable->type);
+ LLVMTypeRef type;
+
+ switch (var_type) {
+ case GLSL_TYPE_FLOAT16:
+ type = ctx->ac.f16;
+ break;
+ case GLSL_TYPE_UINT16:
+ case GLSL_TYPE_INT16:
+ type = ctx->ac.i16;
+ break;
+ default:
+ type = ctx->ac.i32;
+ break;
+ }
+
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ unsigned driver_location = variable->data.location + i;
+ LLVMValueRef output[4];
+
+ load_vs_input(ctx, driver_location, type, output);
- ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] = output[chan];
+ for (unsigned chan = 0; chan < 4; chan++) {
+ ctx->inputs[ac_llvm_reg_index_soa(driver_location, chan)] = output[chan];
}
}
}
+static LLVMValueRef
+radv_load_vs_inputs(struct ac_shader_abi *abi, unsigned driver_location, unsigned component,
+ unsigned num_components, unsigned vertex_index, LLVMTypeRef type)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef values[4];
+
+ load_vs_input(ctx, driver_location, type, values);
+
+ for (unsigned i = 0; i < 4; i++)
+ values[i] = LLVMBuildBitCast(ctx->ac.builder, values[i], type, "");
+
+ return ac_build_varying_gather_values(&ctx->ac, values, num_components, component);
+}
+
static void
handle_vs_inputs(struct radv_shader_context *ctx, struct nir_shader *nir)
{
@@ -2539,6 +2570,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co
ctx.abi.load_tess_coord = load_tess_coord;
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
ctx.abi.load_base_vertex = radv_load_base_vertex;
+ ctx.abi.load_inputs = radv_load_vs_inputs;
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT) {
ctx.abi.load_sample_position = load_sample_position;
ctx.abi.load_sample_mask_in = load_sample_mask_in;
More information about the mesa-commit
mailing list