Mesa (main): radv: Implement device-side BVH building.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jun 18 22:42:31 UTC 2021
Module: Mesa
Branch: main
Commit: 0dad88b4694cf82664f633187442ec65c14f3812
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0dad88b4694cf82664f633187442ec65c14f3812
Author: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Date: Tue May 18 13:25:00 2021 +0200
radv: Implement device-side BVH building.
Same naive algorithm as the host build.
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11078>
---
src/amd/vulkan/radv_acceleration_structure.c | 830 +++++++++++++++++++++++++++
src/amd/vulkan/radv_meta.c | 7 +
src/amd/vulkan/radv_meta.h | 3 +
src/amd/vulkan/radv_private.h | 7 +
4 files changed, 847 insertions(+)
diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c
index 98ef979378d..15d1a62f085 100644
--- a/src/amd/vulkan/radv_acceleration_structure.c
+++ b/src/amd/vulkan/radv_acceleration_structure.c
@@ -23,6 +23,9 @@
#include "radv_private.h"
#include "util/half_float.h"
+#include "nir_builder.h"
+#include "radv_cs.h"
+#include "radv_meta.h"
struct radv_accel_struct_header {
uint32_t root_node_offset;
@@ -589,3 +592,830 @@ radv_BuildAccelerationStructuresKHR(
}
return result;
}
+
+static nir_ssa_def *
+get_indices(nir_builder *b, nir_ssa_def *addr, nir_ssa_def *type, nir_ssa_def *id)
+{
+ const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3);
+ nir_variable *result =
+ nir_variable_create(b->shader, nir_var_shader_temp, uvec3_type, "indices");
+
+ nir_push_if(b, nir_ult(b, type, nir_imm_int(b, 2)));
+ nir_push_if(b, nir_ieq(b, type, nir_imm_int(b, VK_INDEX_TYPE_UINT16)));
+ {
+ nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 6));
+ nir_ssa_def *indices[3];
+ for (unsigned i = 0; i < 3; ++i) {
+ indices[i] = nir_build_load_global(
+ b, 1, 16, nir_iadd(b, addr, nir_u2u64(b, nir_iadd(b, index_id, nir_imm_int(b, 2 * i)))),
+ .align_mul = 2, .align_offset = 0);
+ }
+ nir_store_var(b, result, nir_u2u32(b, nir_vec(b, indices, 3)), 7);
+ }
+ nir_push_else(b, NULL);
+ {
+ nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 12));
+ nir_ssa_def *indices = nir_build_load_global(
+ b, 3, 32, nir_iadd(b, addr, nir_u2u64(b, index_id)), .align_mul = 4, .align_offset = 0);
+ nir_store_var(b, result, indices, 7);
+ }
+ nir_pop_if(b, NULL);
+ nir_push_else(b, NULL);
+ {
+ nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 3));
+ nir_ssa_def *indices[] = {
+ index_id,
+ nir_iadd(b, index_id, nir_imm_int(b, 1)),
+ nir_iadd(b, index_id, nir_imm_int(b, 2)),
+ };
+
+ nir_push_if(b, nir_ieq(b, type, nir_imm_int(b, VK_INDEX_TYPE_NONE_KHR)));
+ {
+ nir_store_var(b, result, nir_vec(b, indices, 3), 7);
+ }
+ nir_push_else(b, NULL);
+ {
+ for (unsigned i = 0; i < 3; ++i) {
+ indices[i] = nir_build_load_global(b, 1, 8, nir_iadd(b, addr, nir_u2u64(b, indices[i])),
+ .align_mul = 1, .align_offset = 0);
+ }
+ nir_store_var(b, result, nir_u2u32(b, nir_vec(b, indices, 3)), 7);
+ }
+ nir_pop_if(b, NULL);
+ }
+ nir_pop_if(b, NULL);
+ return nir_load_var(b, result);
+}
+
+static void
+get_vertices(nir_builder *b, nir_ssa_def *addresses, nir_ssa_def *format, nir_ssa_def *positions[3])
+{
+ const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+ nir_variable *results[3] = {
+ nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "vertex0"),
+ nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "vertex1"),
+ nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "vertex2")};
+
+ VkFormat formats[] = {
+ VK_FORMAT_R32G32B32_SFLOAT,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R16G16B16_SFLOAT,
+ VK_FORMAT_R16G16B16A16_SFLOAT,
+ };
+
+ for (unsigned f = 0; f < ARRAY_SIZE(formats); ++f) {
+ if (f + 1 < ARRAY_SIZE(formats))
+ nir_push_if(b, nir_ieq(b, format, nir_imm_int(b, formats[f])));
+
+ for (unsigned i = 0; i < 3; ++i) {
+ switch (formats[f]) {
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ nir_store_var(b, results[i],
+ nir_build_load_global(b, 3, 32, nir_channel(b, addresses, i),
+ .align_mul = 4, .align_offset = 0),
+ 7);
+ break;
+ case VK_FORMAT_R16G16B16_SFLOAT:
+ case VK_FORMAT_R16G16B16A16_SFLOAT: {
+ nir_ssa_def *values[3];
+ nir_ssa_def *addr = nir_channel(b, addresses, i);
+ for (unsigned j = 0; j < 3; ++j)
+ values[j] =
+ nir_build_load_global(b, 1, 16, nir_iadd(b, addr, nir_imm_int64(b, j * 2)),
+ .align_mul = 2, .align_offset = 0);
+ nir_store_var(b, results[i], nir_f2f32(b, nir_vec(b, values, 3)), 7);
+ break;
+ }
+ default:
+ unreachable("Unhandled format");
+ }
+ }
+ if (f + 1 < ARRAY_SIZE(formats))
+ nir_push_else(b, NULL);
+ }
+ for (unsigned f = 1; f < ARRAY_SIZE(formats); ++f) {
+ nir_pop_if(b, NULL);
+ }
+
+ for (unsigned i = 0; i < 3; ++i)
+ positions[i] = nir_load_var(b, results[i]);
+}
+
+struct build_primitive_constants {
+ uint64_t node_dst_addr;
+ uint64_t scratch_addr;
+ uint32_t dst_offset;
+ uint32_t dst_scratch_offset;
+ uint32_t geometry_type;
+ uint32_t geometry_id;
+
+ union {
+ struct {
+ uint64_t vertex_addr;
+ uint64_t index_addr;
+ uint64_t transform_addr;
+ uint32_t vertex_stride;
+ uint32_t vertex_format;
+ uint32_t index_format;
+ };
+ struct {
+ uint64_t instance_data;
+ };
+ struct {
+ uint64_t aabb_addr;
+ uint32_t aabb_stride;
+ };
+ };
+};
+
+struct build_internal_constants {
+ uint64_t node_dst_addr;
+ uint64_t scratch_addr;
+ uint32_t dst_offset;
+ uint32_t dst_scratch_offset;
+ uint32_t src_scratch_offset;
+ uint32_t fill_header;
+};
+
+/* This inverts a 3x3 matrix using cofactors, as in e.g.
+ * https://www.mathsisfun.com/algebra/matrix-inverse-minors-cofactors-adjugate.html */
+static void
+nir_invert_3x3(nir_builder *b, nir_ssa_def *in[3][3], nir_ssa_def *out[3][3])
+{
+ nir_ssa_def *cofactors[3][3];
+ for (unsigned i = 0; i < 3; ++i) {
+ for (unsigned j = 0; j < 3; ++j) {
+ cofactors[i][j] =
+ nir_fsub(b, nir_fmul(b, in[(i + 1) % 3][(j + 1) % 3], in[(i + 2) % 3][(j + 2) % 3]),
+ nir_fmul(b, in[(i + 1) % 3][(j + 2) % 3], in[(i + 2) % 3][(j + 1) % 3]));
+ }
+ }
+
+ nir_ssa_def *det = NULL;
+ for (unsigned i = 0; i < 3; ++i) {
+ nir_ssa_def *det_part = nir_fmul(b, in[0][i], cofactors[0][i]);
+ det = det ? nir_fadd(b, det, det_part) : det_part;
+ }
+
+ nir_ssa_def *det_inv = nir_frcp(b, det);
+ for (unsigned i = 0; i < 3; ++i) {
+ for (unsigned j = 0; j < 3; ++j) {
+ out[i][j] = nir_fmul(b, cofactors[j][i], det_inv);
+ }
+ }
+}
+
+static nir_shader *
+build_leaf_shader(struct radv_device *dev)
+{
+ const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "accel_build_leaf_shader");
+
+ b.shader->info.workgroup_size[0] = 64;
+ b.shader->info.workgroup_size[1] = 1;
+ b.shader->info.workgroup_size[2] = 1;
+
+ nir_ssa_def *pconst0 =
+ nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
+ nir_ssa_def *pconst1 =
+ nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 16, .range = 16);
+ nir_ssa_def *pconst2 =
+ nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 32, .range = 16);
+ nir_ssa_def *pconst3 =
+ nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 48, .range = 16);
+ nir_ssa_def *pconst4 =
+ nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 64, .range = 4);
+
+ nir_ssa_def *geom_type = nir_channel(&b, pconst1, 2);
+ nir_ssa_def *node_dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 3));
+ nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 12));
+ nir_ssa_def *node_dst_offset = nir_channel(&b, pconst1, 0);
+ nir_ssa_def *scratch_offset = nir_channel(&b, pconst1, 1);
+ nir_ssa_def *geometry_id = nir_channel(&b, pconst1, 3);
+
+ nir_ssa_def *global_id =
+ nir_iadd(&b,
+ nir_umul24(&b, nir_channels(&b, nir_load_workgroup_id(&b, 32), 1),
+ nir_imm_int(&b, b.shader->info.workgroup_size[0])),
+ nir_channels(&b, nir_load_local_invocation_id(&b), 1));
+ scratch_addr = nir_iadd(
+ &b, scratch_addr,
+ nir_u2u64(&b, nir_iadd(&b, scratch_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 4)))));
+
+ nir_push_if(&b, nir_ieq(&b, geom_type, nir_imm_int(&b, VK_GEOMETRY_TYPE_TRIANGLES_KHR)));
+ { /* Triangles */
+ nir_ssa_def *vertex_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3));
+ nir_ssa_def *index_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 12));
+ nir_ssa_def *transform_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst3, 3));
+ nir_ssa_def *vertex_stride = nir_channel(&b, pconst3, 2);
+ nir_ssa_def *vertex_format = nir_channel(&b, pconst3, 3);
+ nir_ssa_def *index_format = nir_channel(&b, pconst4, 0);
+ unsigned repl_swizzle[4] = {0, 0, 0, 0};
+
+ nir_ssa_def *node_offset =
+ nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 64)));
+ nir_ssa_def *triangle_node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
+
+ nir_ssa_def *indices = get_indices(&b, index_addr, index_format, global_id);
+ nir_ssa_def *vertex_addresses = nir_iadd(
+ &b, nir_u2u64(&b, nir_imul(&b, indices, nir_swizzle(&b, vertex_stride, repl_swizzle, 3))),
+ nir_swizzle(&b, vertex_addr, repl_swizzle, 3));
+ nir_ssa_def *positions[3];
+ get_vertices(&b, vertex_addresses, vertex_format, positions);
+
+ nir_ssa_def *node_data[16];
+ memset(node_data, 0, sizeof(node_data));
+
+ nir_variable *transform[] = {
+ nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "transform0"),
+ nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "transform1"),
+ nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "transform2"),
+ };
+ nir_store_var(&b, transform[0], nir_imm_vec4(&b, 1.0, 0.0, 0.0, 0.0), 0xf);
+ nir_store_var(&b, transform[1], nir_imm_vec4(&b, 0.0, 1.0, 0.0, 0.0), 0xf);
+ nir_store_var(&b, transform[2], nir_imm_vec4(&b, 0.0, 0.0, 1.0, 0.0), 0xf);
+
+ nir_push_if(&b, nir_ine(&b, transform_addr, nir_imm_int64(&b, 0)));
+ nir_store_var(
+ &b, transform[0],
+ nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, nir_imm_int64(&b, 0)),
+ .align_mul = 4, .align_offset = 0),
+ 0xf);
+ nir_store_var(
+ &b, transform[1],
+ nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, nir_imm_int64(&b, 16)),
+ .align_mul = 4, .align_offset = 0),
+ 0xf);
+ nir_store_var(
+ &b, transform[2],
+ nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, nir_imm_int64(&b, 32)),
+ .align_mul = 4, .align_offset = 0),
+ 0xf);
+ nir_pop_if(&b, NULL);
+
+ for (unsigned i = 0; i < 3; ++i)
+ for (unsigned j = 0; j < 3; ++j)
+ node_data[i * 3 + j] = nir_fdph(&b, positions[i], nir_load_var(&b, transform[j]));
+
+ node_data[12] = global_id;
+ node_data[13] = geometry_id;
+ node_data[15] = nir_imm_int(&b, 9);
+ for (unsigned i = 0; i < ARRAY_SIZE(node_data); ++i)
+ if (!node_data[i])
+ node_data[i] = nir_imm_int(&b, 0);
+
+ for (unsigned i = 0; i < 4; ++i) {
+ nir_build_store_global(&b, nir_vec(&b, node_data + i * 4, 4),
+ nir_iadd(&b, triangle_node_dst_addr, nir_imm_int64(&b, i * 16)),
+ .write_mask = 15, .align_mul = 16, .align_offset = 0);
+ }
+
+ nir_ssa_def *node_id = nir_ushr(&b, node_offset, nir_imm_int(&b, 3));
+ nir_build_store_global(&b, node_id, scratch_addr, .write_mask = 1, .align_mul = 4,
+ .align_offset = 0);
+ }
+ nir_push_else(&b, NULL);
+ nir_push_if(&b, nir_ieq(&b, geom_type, nir_imm_int(&b, VK_GEOMETRY_TYPE_AABBS_KHR)));
+ { /* AABBs */
+ nir_ssa_def *aabb_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3));
+ nir_ssa_def *aabb_stride = nir_channel(&b, pconst2, 2);
+
+ nir_ssa_def *node_offset =
+ nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 64)));
+ nir_ssa_def *aabb_node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
+ nir_ssa_def *node_id =
+ nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), nir_imm_int(&b, 7));
+ nir_build_store_global(&b, node_id, scratch_addr, .write_mask = 1, .align_mul = 4,
+ .align_offset = 0);
+
+ aabb_addr = nir_iadd(&b, aabb_addr, nir_u2u64(&b, nir_imul(&b, aabb_stride, global_id)));
+
+ nir_ssa_def *min_bound =
+ nir_build_load_global(&b, 3, 32, nir_iadd(&b, aabb_addr, nir_imm_int64(&b, 0)),
+ .align_mul = 4, .align_offset = 0);
+ nir_ssa_def *max_bound =
+ nir_build_load_global(&b, 3, 32, nir_iadd(&b, aabb_addr, nir_imm_int64(&b, 12)),
+ .align_mul = 4, .align_offset = 0);
+
+ nir_ssa_def *values[] = {nir_channel(&b, min_bound, 0),
+ nir_channel(&b, min_bound, 1),
+ nir_channel(&b, min_bound, 2),
+ nir_channel(&b, max_bound, 0),
+ nir_channel(&b, max_bound, 1),
+ nir_channel(&b, max_bound, 2),
+ global_id,
+ geometry_id};
+
+ nir_build_store_global(&b, nir_vec(&b, values + 0, 4),
+ nir_iadd(&b, aabb_node_dst_addr, nir_imm_int64(&b, 0)),
+ .write_mask = 15, .align_mul = 16, .align_offset = 0);
+ nir_build_store_global(&b, nir_vec(&b, values + 4, 4),
+ nir_iadd(&b, aabb_node_dst_addr, nir_imm_int64(&b, 16)),
+ .write_mask = 15, .align_mul = 16, .align_offset = 0);
+ }
+ nir_push_else(&b, NULL);
+ { /* Instances */
+
+ nir_ssa_def *instance_addr =
+ nir_iadd(&b, nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3)),
+ nir_u2u64(&b, nir_imul(&b, global_id, nir_imm_int(&b, 64))));
+ nir_ssa_def *inst_transform[] = {
+ nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 0)),
+ .align_mul = 4, .align_offset = 0),
+ nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 16)),
+ .align_mul = 4, .align_offset = 0),
+ nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 32)),
+ .align_mul = 4, .align_offset = 0)};
+ nir_ssa_def *inst3 =
+ nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 48)),
+ .align_mul = 4, .align_offset = 0);
+
+ nir_ssa_def *node_offset =
+ nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 128)));
+ node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
+ nir_ssa_def *node_id =
+ nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), nir_imm_int(&b, 6));
+ nir_build_store_global(&b, node_id, scratch_addr, .write_mask = 1, .align_mul = 4,
+ .align_offset = 0);
+
+ nir_variable *bounds[2] = {
+ nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
+ nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
+ };
+
+ nir_store_var(&b, bounds[0], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
+ nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
+
+ nir_ssa_def *header_addr = nir_pack_64_2x32(&b, nir_channels(&b, inst3, 12));
+ nir_push_if(&b, nir_ine(&b, header_addr, nir_imm_int64(&b, 0)));
+ nir_ssa_def *header_root_offset =
+ nir_build_load_global(&b, 1, 32, nir_iadd(&b, header_addr, nir_imm_int64(&b, 0)),
+ .align_mul = 4, .align_offset = 0);
+ nir_ssa_def *header_min =
+ nir_build_load_global(&b, 3, 32, nir_iadd(&b, header_addr, nir_imm_int64(&b, 8)),
+ .align_mul = 4, .align_offset = 0);
+ nir_ssa_def *header_max =
+ nir_build_load_global(&b, 3, 32, nir_iadd(&b, header_addr, nir_imm_int64(&b, 20)),
+ .align_mul = 4, .align_offset = 0);
+
+ nir_ssa_def *bound_defs[2][3];
+ for (unsigned i = 0; i < 3; ++i) {
+ bound_defs[0][i] = bound_defs[1][i] = nir_channel(&b, inst_transform[i], 3);
+
+ nir_ssa_def *mul_a = nir_fmul(&b, nir_channels(&b, inst_transform[i], 7), header_min);
+ nir_ssa_def *mul_b = nir_fmul(&b, nir_channels(&b, inst_transform[i], 7), header_max);
+ nir_ssa_def *mi = nir_fmin(&b, mul_a, mul_b);
+ nir_ssa_def *ma = nir_fmax(&b, mul_a, mul_b);
+ for (unsigned j = 0; j < 3; ++j) {
+ bound_defs[0][i] = nir_fadd(&b, bound_defs[0][i], nir_channel(&b, mi, j));
+ bound_defs[1][i] = nir_fadd(&b, bound_defs[1][i], nir_channel(&b, ma, j));
+ }
+ }
+
+ nir_store_var(&b, bounds[0], nir_vec(&b, bound_defs[0], 3), 7);
+ nir_store_var(&b, bounds[1], nir_vec(&b, bound_defs[1], 3), 7);
+
+ nir_ssa_def *m_in[3][3], *m_out[3][3], *m_vec[3][4];
+ for (unsigned i = 0; i < 3; ++i)
+ for (unsigned j = 0; j < 3; ++j)
+ m_in[i][j] = nir_channel(&b, inst_transform[i], j);
+ nir_invert_3x3(&b, m_in, m_out);
+ for (unsigned i = 0; i < 3; ++i) {
+ for (unsigned j = 0; j < 3; ++j)
+ m_vec[i][j] = m_out[i][j];
+ m_vec[i][3] = nir_channel(&b, inst_transform[i], 3);
+ }
+
+ for (unsigned i = 0; i < 3; ++i) {
+ nir_build_store_global(&b, nir_vec(&b, m_vec[i], 4),
+ nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 16 + 16 * i)),
+ .write_mask = 0xf, .align_mul = 4, .align_offset = 0);
+ }
+
+ nir_ssa_def *out0[4] = {
+ nir_ior(&b, nir_channel(&b, nir_unpack_64_2x32(&b, header_addr), 0), header_root_offset),
+ nir_channel(&b, nir_unpack_64_2x32(&b, header_addr), 1), nir_channel(&b, inst3, 0),
+ nir_channel(&b, inst3, 1)};
+ nir_build_store_global(&b, nir_vec(&b, out0, 4),
+ nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 0)), .write_mask = 0xf,
+ .align_mul = 4, .align_offset = 0);
+ nir_build_store_global(&b, global_id, nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 88)),
+ .write_mask = 0x1, .align_mul = 4, .align_offset = 0);
+ nir_pop_if(&b, NULL);
+ nir_build_store_global(&b, nir_load_var(&b, bounds[0]),
+ nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 64)), .write_mask = 0x7,
+ .align_mul = 4, .align_offset = 0);
+ nir_build_store_global(&b, nir_load_var(&b, bounds[1]),
+ nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 76)), .write_mask = 0x7,
+ .align_mul = 4, .align_offset = 0);
+ }
+ nir_pop_if(&b, NULL);
+ nir_pop_if(&b, NULL);
+
+ return b.shader;
+}
+
+static void
+determine_bounds(nir_builder *b, nir_ssa_def *node_addr, nir_ssa_def *node_id,
+ nir_variable *bounds_vars[2])
+{
+ nir_ssa_def *node_type = nir_iand(b, node_id, nir_imm_int(b, 7));
+ node_addr = nir_iadd(
+ b, node_addr,
+ nir_u2u64(b, nir_ishl(b, nir_iand(b, node_id, nir_imm_int(b, ~7u)), nir_imm_int(b, 3))));
+
+ nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 0)));
+ {
+ nir_ssa_def *positions[3];
+ for (unsigned i = 0; i < 3; ++i)
+ positions[i] =
+ nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, i * 12)),
+ .align_mul = 4, .align_offset = 0);
+ nir_ssa_def *bounds[] = {positions[0], positions[0]};
+ for (unsigned i = 1; i < 3; ++i) {
+ bounds[0] = nir_fmin(b, bounds[0], positions[i]);
+ bounds[1] = nir_fmax(b, bounds[1], positions[i]);
+ }
+ nir_store_var(b, bounds_vars[0], bounds[0], 7);
+ nir_store_var(b, bounds_vars[1], bounds[1], 7);
+ }
+ nir_push_else(b, NULL);
+ nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 5)));
+ {
+ nir_ssa_def *input_bounds[4][2];
+ for (unsigned i = 0; i < 4; ++i)
+ for (unsigned j = 0; j < 2; ++j)
+ input_bounds[i][j] = nir_build_load_global(
+ b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 16 + i * 24 + j * 12)),
+ .align_mul = 4, .align_offset = 0);
+ nir_ssa_def *bounds[] = {input_bounds[0][0], input_bounds[0][1]};
+ for (unsigned i = 1; i < 4; ++i) {
+ bounds[0] = nir_fmin(b, bounds[0], input_bounds[i][0]);
+ bounds[1] = nir_fmax(b, bounds[1], input_bounds[i][1]);
+ }
+
+ nir_store_var(b, bounds_vars[0], bounds[0], 7);
+ nir_store_var(b, bounds_vars[1], bounds[1], 7);
+ }
+ nir_push_else(b, NULL);
+ nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 6)));
+ { /* Instances */
+ nir_ssa_def *bounds[2];
+ for (unsigned i = 0; i < 2; ++i)
+ bounds[i] =
+ nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 64 + i * 12)),
+ .align_mul = 4, .align_offset = 0);
+ nir_store_var(b, bounds_vars[0], bounds[0], 7);
+ nir_store_var(b, bounds_vars[1], bounds[1], 7);
+ }
+ nir_push_else(b, NULL);
+ { /* AABBs */
+ nir_ssa_def *bounds[2];
+ for (unsigned i = 0; i < 2; ++i)
+ bounds[i] =
+ nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, i * 12)),
+ .align_mul = 4, .align_offset = 0);
+ nir_store_var(b, bounds_vars[0], bounds[0], 7);
+ nir_store_var(b, bounds_vars[1], bounds[1], 7);
+ }
+ nir_pop_if(b, NULL);
+ nir_pop_if(b, NULL);
+ nir_pop_if(b, NULL);
+}
+
+static nir_shader *
+build_internal_shader(struct radv_device *dev)
+{
+ const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "accel_build_internal_shader");
+
+ b.shader->info.workgroup_size[0] = 64;
+ b.shader->info.workgroup_size[1] = 1;
+ b.shader->info.workgroup_size[2] = 1;
+
+ /*
+ * push constants:
+ * i32 x 2: node dst address
+ * i32 x 2: scratch address
+ * i32: dst offset
+ * i32: dst scratch offset
+ * i32: src scratch offset
+ * i32: src_node_count | (fill_header << 31)
+ */
+ nir_ssa_def *pconst0 =
+ nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
+ nir_ssa_def *pconst1 =
+ nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 16, .range = 16);
+
+ nir_ssa_def *node_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 3));
+ nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 12));
+ nir_ssa_def *node_dst_offset = nir_channel(&b, pconst1, 0);
+ nir_ssa_def *dst_scratch_offset = nir_channel(&b, pconst1, 1);
+ nir_ssa_def *src_scratch_offset = nir_channel(&b, pconst1, 2);
+ nir_ssa_def *src_node_count =
+ nir_iand(&b, nir_channel(&b, pconst1, 3), nir_imm_int(&b, 0x7FFFFFFFU));
+ nir_ssa_def *fill_header =
+ nir_ine(&b, nir_iand(&b, nir_channel(&b, pconst1, 3), nir_imm_int(&b, 0x80000000U)),
+ nir_imm_int(&b, 0));
+
+ nir_ssa_def *global_id =
+ nir_iadd(&b,
+ nir_umul24(&b, nir_channels(&b, nir_load_workgroup_id(&b, 32), 1),
+ nir_imm_int(&b, b.shader->info.workgroup_size[0])),
+ nir_channels(&b, nir_load_local_invocation_id(&b), 1));
+ nir_ssa_def *src_idx = nir_imul(&b, global_id, nir_imm_int(&b, 4));
+ nir_ssa_def *src_count = nir_umin(&b, nir_imm_int(&b, 4), nir_isub(&b, src_node_count, src_idx));
+
+ nir_ssa_def *node_offset =
+ nir_iadd(&b, node_dst_offset, nir_ishl(&b, global_id, nir_imm_int(&b, 7)));
+ nir_ssa_def *node_dst_addr = nir_iadd(&b, node_addr, nir_u2u64(&b, node_offset));
+ nir_ssa_def *src_nodes = nir_build_load_global(
+ &b, 4, 32,
+ nir_iadd(&b, scratch_addr,
+ nir_u2u64(&b, nir_iadd(&b, src_scratch_offset,
+ nir_ishl(&b, global_id, nir_imm_int(&b, 4))))),
+ .align_mul = 4, .align_offset = 0);
+
+ nir_build_store_global(&b, src_nodes, nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 0)),
+ .write_mask = 0xf, .align_mul = 4, .align_offset = 0);
+
+ nir_ssa_def *total_bounds[2] = {
+ nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7),
+ nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7),
+ };
+
+ for (unsigned i = 0; i < 4; ++i) {
+ nir_variable *bounds[2] = {
+ nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
+ nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
+ };
+ nir_store_var(&b, bounds[0], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
+ nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
+
+ nir_push_if(&b, nir_ilt(&b, nir_imm_int(&b, i), src_count));
+ determine_bounds(&b, node_addr, nir_channel(&b, src_nodes, i), bounds);
+ nir_pop_if(&b, NULL);
+ nir_build_store_global(&b, nir_load_var(&b, bounds[0]),
+ nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 16 + 24 * i)),
+ .write_mask = 0x7, .align_mul = 4, .align_offset = 0);
+ nir_build_store_global(&b, nir_load_var(&b, bounds[1]),
+ nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 28 + 24 * i)),
+ .write_mask = 0x7, .align_mul = 4, .align_offset = 0);
+ total_bounds[0] = nir_fmin(&b, total_bounds[0], nir_load_var(&b, bounds[0]));
+ total_bounds[1] = nir_fmax(&b, total_bounds[1], nir_load_var(&b, bounds[1]));
+ }
+
+ nir_ssa_def *node_id =
+ nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), nir_imm_int(&b, 5));
+ nir_ssa_def *dst_scratch_addr = nir_iadd(
+ &b, scratch_addr,
+ nir_u2u64(&b, nir_iadd(&b, dst_scratch_offset, nir_ishl(&b, global_id, nir_imm_int(&b, 2)))));
+ nir_build_store_global(&b, node_id, dst_scratch_addr, .write_mask = 1, .align_mul = 4,
+ .align_offset = 0);
+
+ nir_push_if(&b, fill_header);
+ nir_build_store_global(&b, node_id, node_addr, .write_mask = 1, .align_mul = 4,
+ .align_offset = 0);
+ nir_build_store_global(&b, total_bounds[0], nir_iadd(&b, node_addr, nir_imm_int64(&b, 8)),
+ .write_mask = 7, .align_mul = 4, .align_offset = 0);
+ nir_build_store_global(&b, total_bounds[1], nir_iadd(&b, node_addr, nir_imm_int64(&b, 20)),
+ .write_mask = 7, .align_mul = 4, .align_offset = 0);
+ nir_pop_if(&b, NULL);
+ return b.shader;
+}
+
+void
+radv_device_finish_accel_struct_build_state(struct radv_device *device)
+{
+ struct radv_meta_state *state = &device->meta_state;
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.internal_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->accel_struct_build.internal_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->accel_struct_build.leaf_p_layout, &state->alloc);
+}
+
+VkResult
+radv_device_init_accel_struct_build_state(struct radv_device *device)
+{
+ VkResult result;
+ nir_shader *leaf_cs = build_leaf_shader(device);
+ nir_shader *internal_cs = build_internal_shader(device);
+
+ const VkPipelineLayoutCreateInfo leaf_pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 0,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
+ sizeof(struct build_primitive_constants)},
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.accel_struct_build.leaf_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineShaderStageCreateInfo leaf_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(leaf_cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo leaf_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = leaf_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.accel_struct_build.leaf_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &leaf_pipeline_info, NULL, &device->meta_state.accel_struct_build.leaf_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ const VkPipelineLayoutCreateInfo internal_pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 0,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
+ sizeof(struct build_internal_constants)},
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &internal_pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.accel_struct_build.internal_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineShaderStageCreateInfo internal_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(internal_cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo internal_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = internal_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.accel_struct_build.internal_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &internal_pipeline_info, NULL, &device->meta_state.accel_struct_build.internal_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ return VK_SUCCESS;
+
+fail:
+ radv_device_finish_accel_struct_build_state(device);
+ ralloc_free(internal_cs);
+ ralloc_free(leaf_cs);
+ return result;
+}
+
+struct bvh_state {
+ uint32_t node_offset;
+ uint32_t node_count;
+ uint32_t scratch_offset;
+};
+
+void
+radv_CmdBuildAccelerationStructuresKHR(
+ VkCommandBuffer commandBuffer, uint32_t infoCount,
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
+ const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_meta_saved_state saved_state;
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
+ struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
+
+ for (uint32_t i = 0; i < infoCount; ++i) {
+ RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
+ pInfos[i].dstAccelerationStructure);
+
+ struct build_primitive_constants prim_consts = {
+ .node_dst_addr = radv_accel_struct_get_va(accel_struct),
+ .scratch_addr = pInfos[i].scratchData.deviceAddress,
+ .dst_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64) + 128,
+ .dst_scratch_offset = 0,
+ };
+
+ for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
+ const VkAccelerationStructureGeometryKHR *geom =
+ pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
+
+ prim_consts.geometry_type = geom->geometryType;
+ prim_consts.geometry_id = j | (geom->flags << 28);
+ unsigned prim_size;
+ switch (geom->geometryType) {
+ case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
+ prim_consts.vertex_addr =
+ geom->geometry.triangles.vertexData.deviceAddress +
+ ppBuildRangeInfos[i][j].firstVertex * geom->geometry.triangles.vertexStride +
+ (geom->geometry.triangles.indexType != VK_INDEX_TYPE_NONE_KHR
+ ? ppBuildRangeInfos[i][j].primitiveOffset
+ : 0);
+ prim_consts.index_addr = geom->geometry.triangles.indexData.deviceAddress +
+ ppBuildRangeInfos[i][j].primitiveOffset;
+ prim_consts.transform_addr = geom->geometry.triangles.transformData.deviceAddress +
+ ppBuildRangeInfos[i][j].transformOffset;
+ prim_consts.vertex_stride = geom->geometry.triangles.vertexStride;
+ prim_consts.vertex_format = geom->geometry.triangles.vertexFormat;
+ prim_consts.index_format = geom->geometry.triangles.indexType;
+ prim_size = 64;
+ break;
+ case VK_GEOMETRY_TYPE_AABBS_KHR:
+ prim_consts.aabb_addr =
+ geom->geometry.aabbs.data.deviceAddress + ppBuildRangeInfos[i][j].primitiveOffset;
+ prim_consts.aabb_stride = geom->geometry.aabbs.stride;
+ prim_size = 64;
+ break;
+ case VK_GEOMETRY_TYPE_INSTANCES_KHR:
+ prim_consts.instance_data = geom->geometry.instances.data.deviceAddress;
+ prim_size = 128;
+ break;
+ default:
+ unreachable("Unknown geometryType");
+ }
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(prim_consts), &prim_consts);
+ radv_unaligned_dispatch(cmd_buffer, ppBuildRangeInfos[i][j].primitiveCount, 1, 1);
+ prim_consts.dst_offset += prim_size * ppBuildRangeInfos[i][j].primitiveCount;
+ prim_consts.dst_scratch_offset += 4 * ppBuildRangeInfos[i][j].primitiveCount;
+ }
+ bvh_states[i].node_offset = prim_consts.dst_offset;
+ bvh_states[i].node_count = prim_consts.dst_scratch_offset / 4;
+ }
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ cmd_buffer->device->meta_state.accel_struct_build.internal_pipeline);
+ bool progress = true;
+ for (unsigned iter = 0; progress; ++iter) {
+ progress = false;
+ for (uint32_t i = 0; i < infoCount; ++i) {
+ RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
+ pInfos[i].dstAccelerationStructure);
+
+ if (iter && bvh_states[i].node_count == 1)
+ continue;
+
+ if (!progress) {
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL) |
+ radv_dst_access_flush(cmd_buffer,
+ VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, NULL);
+ }
+ progress = true;
+ uint32_t dst_node_count = MAX2(1, DIV_ROUND_UP(bvh_states[i].node_count, 4));
+ bool final_iter = dst_node_count == 1;
+ uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
+ uint32_t dst_scratch_offset = src_scratch_offset ? 0 : bvh_states[i].node_count * 4;
+ uint32_t dst_node_offset = bvh_states[i].node_offset;
+ if (final_iter)
+ dst_node_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
+
+ const struct build_internal_constants consts = {
+ .node_dst_addr = radv_accel_struct_get_va(accel_struct),
+ .scratch_addr = pInfos[i].scratchData.deviceAddress,
+ .dst_offset = dst_node_offset,
+ .dst_scratch_offset = dst_scratch_offset,
+ .src_scratch_offset = src_scratch_offset,
+ .fill_header = bvh_states[i].node_count | (final_iter ? 0x80000000U : 0),
+ };
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ cmd_buffer->device->meta_state.accel_struct_build.internal_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
+ radv_unaligned_dispatch(cmd_buffer, dst_node_count, 1, 1);
+ bvh_states[i].node_offset += dst_node_count * 128;
+ bvh_states[i].node_count = dst_node_count;
+ bvh_states[i].scratch_offset = dst_scratch_offset;
+ }
+ }
+ free(bvh_states);
+ radv_meta_restore(&saved_state, cmd_buffer);
+}
\ No newline at end of file
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index 8416e9adfda..b54e4bd183c 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -474,8 +474,14 @@ radv_device_init_meta(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail_fmask_expand;
+ result = radv_device_init_accel_struct_build_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_accel_struct_build;
+
return VK_SUCCESS;
+fail_accel_struct_build:
+ radv_device_finish_meta_fmask_expand_state(device);
fail_fmask_expand:
radv_device_finish_meta_resolve_fragment_state(device);
fail_resolve_fragment:
@@ -507,6 +513,7 @@ fail_clear:
void
radv_device_finish_meta(struct radv_device *device)
{
+ radv_device_finish_accel_struct_build_state(device);
radv_device_finish_meta_clear_state(device);
radv_device_finish_meta_resolve_state(device);
radv_device_finish_meta_blit_state(device);
diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index c827baf969c..d04da9d3a2b 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -133,6 +133,9 @@ void radv_device_finish_meta_dcc_retile_state(struct radv_device *device);
void radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device);
+VkResult radv_device_init_accel_struct_build_state(struct radv_device *device);
+void radv_device_finish_accel_struct_build_state(struct radv_device *device);
+
void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer,
uint32_t flags);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b5abef0827d..bf99e928b9f 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -661,6 +661,13 @@ struct radv_meta_state {
VkPipelineLayout p_layout;
VkPipeline pipeline;
} dcc_retile;
+
+ struct {
+ VkPipelineLayout leaf_p_layout;
+ VkPipeline leaf_pipeline;
+ VkPipelineLayout internal_p_layout;
+ VkPipeline internal_pipeline;
+ } accel_struct_build;
};
/* queue types */
More information about the mesa-commit
mailing list