Mesa (main): radv: allocate shaders to 32-bit address to skip PGM_HI
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Aug 23 11:45:06 UTC 2021
Module: Mesa
Branch: main
Commit: e0353296daa4e7f65ccb1f97b6baa16755422c16
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e0353296daa4e7f65ccb1f97b6baa16755422c16
Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date: Thu Aug 19 09:04:46 2021 +0200
radv: allocate shaders to 32-bit address to skip PGM_HI
This reduces the number of emitted registers.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12466>
---
src/amd/vulkan/radv_pipeline.c | 29 ++++++++---------------------
src/amd/vulkan/radv_shader.c | 2 +-
src/amd/vulkan/si_cmd_buffer.c | 20 ++++++++++++++++++++
3 files changed, 29 insertions(+), 22 deletions(-)
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 4bf7a02b243..ce211086d9d 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -4420,9 +4420,7 @@ radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, const struct radv_pipelin
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
uint32_t rsrc2 = shader->config.rsrc2;
- radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
+ radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
@@ -4447,9 +4445,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
: pipeline->shaders[MESA_SHADER_VERTEX];
const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
- radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+ radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
+
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_emit(cs, shader->config.rsrc1);
radeon_emit(cs, shader->config.rsrc2);
@@ -4592,13 +4589,9 @@ radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, const struct radv_pipelin
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
+ radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
} else {
- radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B414_MEM_BASE(va >> 40));
+ radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
}
radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
@@ -4793,13 +4786,9 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+ radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
} else {
- radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B214_MEM_BASE(va >> 40));
+ radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
}
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
@@ -5576,9 +5565,7 @@ radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, const struct radv_pipelin
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
struct radv_device *device = pipeline->device;
- radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B834_DATA(va >> 40));
+ radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
radeon_emit(cs, shader->config.rsrc1);
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 82dade3dee4..f38ca7c8f45 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1059,7 +1059,7 @@ radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant
slab->size = MAX2(256 * 1024, shader->code_size);
VkResult result = device->ws->buffer_create(
device->ws, slab->size, 256, RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT |
(device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0
: RADEON_FLAG_READ_ONLY),
RADV_BO_PRIORITY_SHADER, 0, &slab->bo);
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index a4471d87910..b1c4f25fe1f 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -79,6 +79,9 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_emit(cs, 0);
radeon_emit(cs, 0);
+ radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI,
+ S_00B834_DATA(device->physical_device->rad_info.address32_hi >> 8));
+
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
* renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
@@ -291,6 +294,23 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
}
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
+ S_00B524_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
+ radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
+ S_00B324_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
+ } else if (device->physical_device->rad_info.chip_class == GFX9) {
+ radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS,
+ S_00B414_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
+ radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES,
+ S_00B214_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
+ } else {
+ radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
+ S_00B524_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
+ radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
+ S_00B324_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
+ }
+
unsigned cu_mask_ps = 0xffffffff;
/* It's wasteful to enable all CUs for PS if shader arrays have a
More information about the mesa-commit
mailing list