[Mesa-dev] [PATCH 4/4] radv: move compute related code to radv_compute.c
Samuel Pitoiset
samuel.pitoiset at gmail.com
Thu Sep 14 16:50:07 UTC 2017
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
src/amd/vulkan/Makefile.sources | 2 +
src/amd/vulkan/radv_cmd_buffer.c | 239 +---------------------------------
src/amd/vulkan/radv_compute.c | 275 +++++++++++++++++++++++++++++++++++++++
src/amd/vulkan/radv_compute.h | 69 ++++++++++
src/amd/vulkan/radv_meta.h | 1 +
src/amd/vulkan/radv_private.h | 35 +++--
src/amd/vulkan/si_cmd_buffer.c | 50 +------
7 files changed, 381 insertions(+), 290 deletions(-)
create mode 100644 src/amd/vulkan/radv_compute.c
create mode 100644 src/amd/vulkan/radv_compute.h
diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources
index 9489219f5b..7cef56b43d 100644
--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -32,6 +32,8 @@ RADV_WS_AMDGPU_FILES := \
VULKAN_FILES := \
radv_cmd_buffer.c \
+ radv_compute.c \
+ radv_compute.h \
radv_cs.h \
radv_debug.c \
radv_debug.h \
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 10a071c3d6..af9f8210bf 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -34,6 +34,7 @@
#include "vk_format.h"
#include "radv_debug.h"
#include "radv_meta.h"
+#include "radv_compute.h"
#include "ac_debug.h"
@@ -366,7 +367,7 @@ void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
}
-static void
+void
radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer)
{
if (cmd_buffer->device->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
@@ -386,7 +387,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer)
radv_cmd_buffer_trace_emit(cmd_buffer);
}
-static void
+void
radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline, enum ring_type ring)
{
@@ -601,14 +602,6 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer,
raster->pa_su_sc_mode_cntl);
}
-static inline void
-radv_emit_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
- unsigned size)
-{
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
- si_cp_dma_prefetch(cmd_buffer, va, size);
-}
-
static void
radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline,
@@ -1577,7 +1570,7 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer)
AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
}
-static void
+void
radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
VkShaderStageFlags stages)
{
@@ -1615,7 +1608,7 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
assert(cmd_buffer->cs->cdw <= cdw_max);
}
-static void
+void
radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline,
VkShaderStageFlags stages)
@@ -2108,7 +2101,8 @@ VkResult radv_BeginCommandBuffer(
radv_set_db_count_control(cmd_buffer);
break;
case RADV_QUEUE_COMPUTE:
- si_init_compute(cmd_buffer);
+ radv_init_compute(cmd_buffer->device->physical_device,
+ cmd_buffer->cs);
break;
case RADV_QUEUE_TRANSFER:
default:
@@ -2378,58 +2372,6 @@ VkResult radv_EndCommandBuffer(
return cmd_buffer->record_result;
}
-static void
-radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
-{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- struct radv_shader_variant *compute_shader;
- struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- uint64_t va;
-
- if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
- return;
-
- cmd_buffer->state.emitted_compute_pipeline = pipeline;
-
- compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
- va = ws->buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
-
- ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8);
- radv_emit_prefetch(cmd_buffer, va, compute_shader->code_size);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 16);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B830_COMPUTE_PGM_LO, 2);
- radeon_emit(cmd_buffer->cs, va >> 8);
- radeon_emit(cmd_buffer->cs, va >> 40);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
- radeon_emit(cmd_buffer->cs, compute_shader->rsrc1);
- radeon_emit(cmd_buffer->cs, compute_shader->rsrc2);
-
-
- cmd_buffer->compute_scratch_size_needed =
- MAX2(cmd_buffer->compute_scratch_size_needed,
- pipeline->max_waves * pipeline->scratch_bytes_per_wave);
-
- /* change these once we have scratch support */
- radeon_set_sh_reg(cmd_buffer->cs, R_00B860_COMPUTE_TMPRING_SIZE,
- S_00B860_WAVES(pipeline->max_waves) |
- S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
- radeon_emit(cmd_buffer->cs,
- S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0]));
- radeon_emit(cmd_buffer->cs,
- S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1]));
- radeon_emit(cmd_buffer->cs,
- S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]));
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
- radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
-}
-
static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer)
{
for (unsigned i = 0; i < MAX_SETS; i++) {
@@ -3124,157 +3066,6 @@ void radv_CmdDrawIndexedIndirectCountAMD(
maxDrawCount, stride);
}
-struct radv_dispatch_info {
- /**
- * Determine the layout of the grid (in block units) to be used.
- */
- uint32_t blocks[3];
-
- /**
- * Whether it's an unaligned compute dispatch.
- */
- bool unaligned;
-
- /**
- * Indirect compute parameters resource.
- */
- struct radv_buffer *indirect;
- uint64_t indirect_offset;
-};
-
-static void
-radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_dispatch_info *info)
-{
- struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- struct radeon_winsys_cs *cs = cmd_buffer->cs;
- struct ac_userdata_info *loc;
- uint8_t grid_used;
-
- grid_used = compute_shader->info.info.cs.grid_components_used;
-
- loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE,
- AC_UD_CS_GRID_SIZE);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(ws, cs, 25);
-
- if (info->indirect) {
- uint64_t va = ws->buffer_get_va(info->indirect->bo);
-
- va += info->indirect->offset + info->indirect_offset;
-
- ws->cs_add_buffer(cs, info->indirect->bo, 8);
-
- if (loc->sgpr_idx != -1) {
- for (unsigned i = 0; i < grid_used; ++i) {
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG));
- radeon_emit(cs, (va + 4 * i));
- radeon_emit(cs, (va + 4 * i) >> 32);
- radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0
- + loc->sgpr_idx * 4) >> 2) + i);
- radeon_emit(cs, 0);
- }
- }
-
- if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
- radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, 1);
- } else {
- radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, 1);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, 0);
- radeon_emit(cs, 1);
- }
- } else {
- unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] };
- unsigned dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
-
- if (info->unaligned) {
- unsigned *cs_block_size = compute_shader->info.cs.block_size;
- unsigned remainder[3];
-
- /* If aligned, these should be an entire block size,
- * not 0.
- */
- remainder[0] = blocks[0] + cs_block_size[0] -
- align_u32_npot(blocks[0], cs_block_size[0]);
- remainder[1] = blocks[1] + cs_block_size[1] -
- align_u32_npot(blocks[1], cs_block_size[1]);
- remainder[2] = blocks[2] + cs_block_size[2] -
- align_u32_npot(blocks[2], cs_block_size[2]);
-
- blocks[0] = round_up_u32(blocks[0], cs_block_size[0]);
- blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
- blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
-
- radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
- radeon_emit(cs,
- S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
- radeon_emit(cs,
- S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
- radeon_emit(cs,
- S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
-
- dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
- }
-
- if (loc->sgpr_idx != -1) {
- assert(!loc->indirect);
- assert(loc->num_sgprs == grid_used);
-
- radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
- loc->sgpr_idx * 4, grid_used);
- radeon_emit(cs, blocks[0]);
- if (grid_used > 1)
- radeon_emit(cs, blocks[1]);
- if (grid_used > 2)
- radeon_emit(cs, blocks[2]);
- }
-
- radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, blocks[0]);
- radeon_emit(cs, blocks[1]);
- radeon_emit(cs, blocks[2]);
- radeon_emit(cs, dispatch_initiator);
- }
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
-}
-
-static void
-radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_dispatch_info *info)
-{
- radv_emit_compute_pipeline(cmd_buffer);
-
- radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
- radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline,
- VK_SHADER_STAGE_COMPUTE_BIT);
-
- si_emit_cache_flush(cmd_buffer);
-
- radv_emit_dispatch_packets(cmd_buffer, info);
-
- radv_cmd_buffer_after_draw(cmd_buffer);
-}
-
void radv_CmdDispatch(
VkCommandBuffer commandBuffer,
uint32_t x,
@@ -3306,22 +3097,6 @@ void radv_CmdDispatchIndirect(
radv_dispatch(cmd_buffer, &info);
}
-void radv_unaligned_dispatch(
- struct radv_cmd_buffer *cmd_buffer,
- uint32_t x,
- uint32_t y,
- uint32_t z)
-{
- struct radv_dispatch_info info = {};
-
- info.blocks[0] = x;
- info.blocks[1] = y;
- info.blocks[2] = z;
- info.unaligned = 1;
-
- radv_dispatch(cmd_buffer, &info);
-}
-
void radv_CmdEndRenderPass(
VkCommandBuffer commandBuffer)
{
diff --git a/src/amd/vulkan/radv_compute.c b/src/amd/vulkan/radv_compute.c
new file mode 100644
index 0000000000..b230686223
--- /dev/null
+++ b/src/amd/vulkan/radv_compute.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "radv_private.h"
+#include "radv_radeon_winsys.h"
+#include "radv_shader.h"
+#include "radv_cs.h"
+#include "sid.h"
+#include "gfx9d.h"
+#include "vk_format.h"
+#include "radv_debug.h"
+#include "radv_meta.h"
+#include "radv_compute.h"
+#include "ac_debug.h"
+
+void
+radv_init_compute(struct radv_physical_device *physical_device,
+ struct radeon_winsys_cs *cs)
+{
+ radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+
+ radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 3);
+ radeon_emit(cs, 0);
+ /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
+ radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+ radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff));
+
+ if (physical_device->rad_info.chip_class >= CIK) {
+ /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
+ radeon_set_sh_reg_seq(cs,
+ R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
+ radeon_emit(cs, S_00B864_SH0_CU_EN(0xffff) |
+ S_00B864_SH1_CU_EN(0xffff));
+ radeon_emit(cs, S_00B868_SH0_CU_EN(0xffff) |
+ S_00B868_SH1_CU_EN(0xffff));
+ }
+
+ /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
+ * and is now per pipe, so it should be handled in the
+ * kernel if we want to use something other than the default value,
+ * which is now 0x22f.
+ */
+ if (physical_device->rad_info.chip_class <= SI) {
+ /* XXX: This should be:
+ * (number of compute units) * 4 * (waves per simd) - 1 */
+
+ radeon_set_sh_reg(cs, R_00B82C_COMPUTE_MAX_WAVE_ID,
+ 0x190 /* Default value */);
+ }
+}
+
+static void
+radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+ struct radeon_winsys *ws = cmd_buffer->device->ws;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+ struct radv_shader_variant *compute_shader;
+ uint64_t va;
+
+ if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
+ return;
+
+ cmd_buffer->state.emitted_compute_pipeline = pipeline;
+
+ compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+ va = ws->buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
+
+ ws->cs_add_buffer(cs, compute_shader->bo, 8);
+ radv_emit_prefetch(cmd_buffer, va, compute_shader->code_size);
+
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(ws, cs, 16);
+
+ radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+
+ radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
+ radeon_emit(cs, compute_shader->rsrc1);
+ radeon_emit(cs, compute_shader->rsrc2);
+
+ cmd_buffer->compute_scratch_size_needed =
+ MAX2(cmd_buffer->compute_scratch_size_needed,
+ pipeline->max_waves * pipeline->scratch_bytes_per_wave);
+
+ /* change these once we have scratch support */
+ radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
+ S_00B860_WAVES(pipeline->max_waves) |
+ S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
+
+ radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+ radeon_emit(cs,
+ S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0]));
+ radeon_emit(cs,
+ S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1]));
+ radeon_emit(cs,
+ S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]));
+
+ assert(cmd_buffer->cs->cdw <= cdw_max);
+ radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
+}
+
+static void
+radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_dispatch_info *info)
+{
+ struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+ struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+ struct radeon_winsys *ws = cmd_buffer->device->ws;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+ struct ac_userdata_info *loc;
+ uint8_t grid_used;
+
+ grid_used = compute_shader->info.info.cs.grid_components_used;
+
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE,
+ AC_UD_CS_GRID_SIZE);
+
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(ws, cs, 25);
+
+ if (info->indirect) {
+ uint64_t va = ws->buffer_get_va(info->indirect->bo);
+
+ va += info->indirect->offset + info->indirect_offset;
+
+ ws->cs_add_buffer(cs, info->indirect->bo, 8);
+
+ if (loc->sgpr_idx != -1) {
+ for (unsigned i = 0; i < grid_used; ++i) {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ COPY_DATA_DST_SEL(COPY_DATA_REG));
+ radeon_emit(cs, (va + 4 * i));
+ radeon_emit(cs, (va + 4 * i) >> 32);
+ radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0
+ + loc->sgpr_idx * 4) >> 2) + i);
+ radeon_emit(cs, 0);
+ }
+ }
+
+ if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, 1);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, 1);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 1);
+ }
+ } else {
+ unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] };
+ unsigned dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
+
+ if (info->unaligned) {
+ unsigned *cs_block_size = compute_shader->info.cs.block_size;
+ unsigned remainder[3];
+
+ /* If aligned, these should be an entire block size,
+ * not 0.
+ */
+ remainder[0] = blocks[0] + cs_block_size[0] -
+ align_u32_npot(blocks[0], cs_block_size[0]);
+ remainder[1] = blocks[1] + cs_block_size[1] -
+ align_u32_npot(blocks[1], cs_block_size[1]);
+ remainder[2] = blocks[2] + cs_block_size[2] -
+ align_u32_npot(blocks[2], cs_block_size[2]);
+
+ blocks[0] = round_up_u32(blocks[0], cs_block_size[0]);
+ blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
+ blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
+
+ radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+ radeon_emit(cs,
+ S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
+ S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
+ radeon_emit(cs,
+ S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) |
+ S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
+ radeon_emit(cs,
+ S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) |
+ S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
+
+ dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
+ }
+
+ if (loc->sgpr_idx != -1) {
+ assert(!loc->indirect);
+ assert(loc->num_sgprs == grid_used);
+
+ radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
+ loc->sgpr_idx * 4, grid_used);
+ radeon_emit(cs, blocks[0]);
+ if (grid_used > 1)
+ radeon_emit(cs, blocks[1]);
+ if (grid_used > 2)
+ radeon_emit(cs, blocks[2]);
+ }
+
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, blocks[0]);
+ radeon_emit(cs, blocks[1]);
+ radeon_emit(cs, blocks[2]);
+ radeon_emit(cs, dispatch_initiator);
+ }
+
+ assert(cmd_buffer->cs->cdw <= cdw_max);
+}
+
+void
+radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_dispatch_info *info)
+{
+ radv_emit_compute_pipeline(cmd_buffer);
+
+ radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
+ radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline,
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ si_emit_cache_flush(cmd_buffer);
+
+ radv_emit_dispatch_packets(cmd_buffer, info);
+
+ radv_cmd_buffer_after_draw(cmd_buffer);
+}
+
+void
+radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer,
+ uint32_t x, uint32_t y, uint32_t z)
+{
+ struct radv_dispatch_info info = {};
+
+ info.blocks[0] = x;
+ info.blocks[1] = y;
+ info.blocks[2] = z;
+ info.unaligned = 1;
+
+ radv_dispatch(cmd_buffer, &info);
+}
diff --git a/src/amd/vulkan/radv_compute.h b/src/amd/vulkan/radv_compute.h
new file mode 100644
index 0000000000..fc9770b772
--- /dev/null
+++ b/src/amd/vulkan/radv_compute.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef RADV_COMPUTE_H
+#define RADV_COMPUTE_H
+
+#include "radv_private.h"
+
+struct radv_dispatch_info {
+ /**
+ * Determine the layout of the grid (in block units) to be used.
+ */
+ uint32_t blocks[3];
+
+ /**
+ * Whether it's an unaligned compute dispatch.
+ */
+ bool unaligned;
+
+ /**
+ * Indirect compute parameters resource.
+ */
+ struct radv_buffer *indirect;
+ uint64_t indirect_offset;
+};
+
+void
+radv_init_compute(struct radv_physical_device *physical_device,
+ struct radeon_winsys_cs *cs);
+
+void
+radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_dispatch_info *info);
+
+/**
+ * Takes x,y,z as exact numbers of invocations, instead of blocks.
+ *
+ * Limitations: Can't call normal dispatch functions without binding or rebinding
+ * the compute pipeline.
+ */
+void
+radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer,
+ uint32_t x, uint32_t y, uint32_t z);
+
+#endif
diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index 5d28cc5f0f..a35dfee184 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -27,6 +27,7 @@
#define RADV_META_H
#include "radv_private.h"
+#include "radv_compute.h"
#include "radv_shader.h"
#ifdef __cplusplus
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index e5092a8923..9cb4570100 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -937,17 +937,6 @@ void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
bool radv_get_memory_fd(struct radv_device *device,
struct radv_device_memory *memory,
int *pFD);
-/*
- * Takes x,y,z as exact numbers of invocations, instead of blocks.
- *
- * Limitations: Can't call normal dispatch functions without binding or rebinding
- * the compute pipeline.
- */
-void radv_unaligned_dispatch(
- struct radv_cmd_buffer *cmd_buffer,
- uint32_t x,
- uint32_t y,
- uint32_t z);
struct radv_event {
struct radeon_winsys_bo *bo;
@@ -1521,6 +1510,30 @@ void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image, uint32_t value);
+static inline void
+radv_emit_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
+ unsigned size)
+{
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
+ si_cp_dma_prefetch(cmd_buffer, va, size);
+}
+
+void
+radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline, enum ring_type ring);
+
+void
+radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
+ VkShaderStageFlags stages);
+
+void
+radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline,
+ VkShaderStageFlags stages);
+
+void
+radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer);
+
struct radv_fence {
struct radeon_winsys_fence *fence;
bool submitted;
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 9f8d881d27..a63a57de06 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -30,6 +30,7 @@
#include "radv_private.h"
#include "radv_shader.h"
#include "radv_cs.h"
+#include "radv_compute.h"
#include "sid.h"
#include "gfx9d.h"
#include "radv_util.h"
@@ -170,52 +171,6 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
S_030800_INSTANCE_BROADCAST_WRITES(1));
}
-static void
-si_emit_compute(struct radv_physical_device *physical_device,
- struct radeon_winsys_cs *cs)
-{
- radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
-
- radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 3);
- radeon_emit(cs, 0);
- /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
- radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
- radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff));
-
- if (physical_device->rad_info.chip_class >= CIK) {
- /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
- radeon_set_sh_reg_seq(cs,
- R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
- radeon_emit(cs, S_00B864_SH0_CU_EN(0xffff) |
- S_00B864_SH1_CU_EN(0xffff));
- radeon_emit(cs, S_00B868_SH0_CU_EN(0xffff) |
- S_00B868_SH1_CU_EN(0xffff));
- }
-
- /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
- * and is now per pipe, so it should be handled in the
- * kernel if we want to use something other than the default value,
- * which is now 0x22f.
- */
- if (physical_device->rad_info.chip_class <= SI) {
- /* XXX: This should be:
- * (number of compute units) * 4 * (waves per simd) - 1 */
-
- radeon_set_sh_reg(cs, R_00B82C_COMPUTE_MAX_WAVE_ID,
- 0x190 /* Default value */);
- }
-}
-
-void
-si_init_compute(struct radv_cmd_buffer *cmd_buffer)
-{
- struct radv_physical_device *physical_device = cmd_buffer->device->physical_device;
- si_emit_compute(physical_device, cmd_buffer->cs);
-}
-
static void
si_emit_config(struct radv_physical_device *physical_device,
struct radeon_winsys_cs *cs)
@@ -486,7 +441,8 @@ si_emit_config(struct radv_physical_device *physical_device,
S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0);
}
- si_emit_compute(physical_device, cs);
+
+ radv_init_compute(physical_device, cs);
}
void si_init_config(struct radv_cmd_buffer *cmd_buffer)
--
2.14.1
More information about the mesa-dev
mailing list