[Mesa-dev] [PATCH v2] radv: only load 2-dwords for vertex buffers when robustness is disabled
Samuel Pitoiset
samuel.pitoiset at gmail.com
Tue Mar 19 08:45:04 UTC 2019
This patch requires the typed vertex fetches series.
Totals from affected shaders:
SGPRS: 445574 -> 452638 (1.59 %)
VGPRS: 373392 -> 370436 (-0.79 %)
Spilled SGPRs: 77 -> 14 (-81.82 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 14162288 -> 14413036 (1.77 %) bytes
Max Waves: 119999 -> 120509 (0.43 %)
v2: - fix vertex descriptors
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
src/amd/vulkan/radv_cmd_buffer.c | 32 +++++++++++++++++++------------
src/amd/vulkan/radv_device.c | 2 ++
src/amd/vulkan/radv_nir_to_llvm.c | 21 +++++++++++++++++++-
src/amd/vulkan/radv_private.h | 1 +
src/amd/vulkan/radv_shader.c | 1 +
src/amd/vulkan/radv_shader.h | 1 +
6 files changed, 45 insertions(+), 13 deletions(-)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index ae8f50d0348..0c8572bd1e5 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1991,6 +1991,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->state.pipeline->num_vertex_bindings &&
radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements;
+ unsigned desc_size_bytes = cmd_buffer->device->robust_buffer_access ? 16 : 8;
unsigned vb_offset;
void *vb_ptr;
uint32_t i = 0;
@@ -1998,12 +1999,13 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
uint64_t va;
/* allocate some descriptor state for vertex buffers */
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, 256,
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer,
+ count * desc_size_bytes, 256,
&vb_offset, &vb_ptr))
return;
for (i = 0; i < count; i++) {
- uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
+ uint32_t *desc = &((uint32_t *)vb_ptr)[i * (desc_size_bytes / 4)];
uint32_t offset;
struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer;
uint32_t stride = cmd_buffer->state.pipeline->binding_stride[i];
@@ -2017,16 +2019,22 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
va += offset + buffer->offset;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
- if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
- desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1;
- else
- desc[2] = buffer->size - offset;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+ if (cmd_buffer->device->robust_buffer_access) {
+ /* Enable out of bounds checking only when
+ * robust buffer access is requested.
+ */
+ if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
+ desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1;
+ else
+ desc[2] = buffer->size - offset;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
}
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index dc4346b7498..895e7d67d50 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1798,6 +1798,8 @@ VkResult radv_CreateDevice(
device->has_distributed_tess =
device->physical_device->rad_info.chip_class >= VI &&
device->physical_device->rad_info.max_se >= 2;
+ device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
+ pCreateInfo->pEnabledFeatures->robustBufferAccess;
if (getenv("RADV_TRACE_FILE")) {
const char *filename = getenv("RADV_TRACE_FILE");
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 58a3cf18fe1..f0ed27e0a53 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -807,8 +807,11 @@ declare_vs_specific_input_sgprs(struct radv_shader_context *ctx,
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (ctx->shader_info->info.vs.has_vertex_buffers) {
+ LLVMTypeRef type =
+ ctx->options->robust_buffer_access ? ctx->ac.v4i32 : ctx->ac.v2i32;
+
add_arg(args, ARG_SGPR,
- ac_array_in_const32_addr_space(ctx->ac.v4i32),
+ ac_array_in_const32_addr_space(type),
&ctx->vertex_buffers);
}
add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
@@ -2188,6 +2191,22 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
+ if (!ctx->options->robust_buffer_access) {
+ LLVMValueRef desc[4];
+ desc[0] = ac_llvm_extract_elem(&ctx->ac, t_list, 0);
+ desc[1] = ac_llvm_extract_elem(&ctx->ac, t_list, 1);
+ desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0);
+ desc[3] = LLVMConstInt(ctx->ac.i32,
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0);
+
+ t_list = ac_build_gather_values(&ctx->ac, desc, 4);
+ }
+
input = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
buffer_index,
LLVMConstInt(ctx->ac.i32, attrib_offset, false),
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 39fa6110fde..f81ba4c602c 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -676,6 +676,7 @@ struct radv_device {
bool always_use_syncobj;
bool has_distributed_tess;
+ bool robust_buffer_access;
bool pbb_allowed;
bool dfsm_allowed;
uint32_t tess_offchip_block_dw_size;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index bd045a0b92f..ae7e315652e 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -606,6 +606,7 @@ shader_variant_create(struct radv_device *device,
options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
options->address32_hi = device->physical_device->rad_info.address32_hi;
+ options->robust_buffer_access = device->robust_buffer_access;
if (options->supports_spill)
tm_options |= AC_TM_SUPPORTS_SPILL;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index fe2f2868630..9e214457ad8 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -127,6 +127,7 @@ struct radv_nir_compiler_options {
bool dump_preoptir;
bool record_llvm_ir;
bool check_ir;
+ bool robust_buffer_access;
enum radeon_family family;
enum chip_class chip_class;
uint32_t tess_offchip_block_dw_size;
--
2.21.0
More information about the mesa-dev
mailing list