[PATCH 20/22] drm/amdgpu: implement ring set_priority for gfx_v8 compute
Andres Rodriguez
andresx7 at gmail.com
Tue Feb 28 22:14:47 UTC 2017
Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.
Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
0x2: CS_H
0x1: CS_M
0x0: CS_L
The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.
In order to prevent getting stuck in loops of CUs bouncing between GFX
and high priority compute and introducing further latency, we reserve
CUs 2+ for high priority compute on-demand.
Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 85 +++++++++++++++++++++++++++++-
3 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0676495..e37f20d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -913,20 +913,23 @@ struct amdgpu_gfx {
uint32_t me_feature_version;
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
uint32_t rlc_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
unsigned num_compute_rings;
+ spinlock_t cu_reserve_lock;
+ uint32_t cu_reserve_pipe_mask;
+ uint32_t cu_reserve_queue_mask[AMDGPU_MAX_COMPUTE_RINGS];
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
/* gfx status */
uint32_t gfx_current_status;
/* ce ram size*/
unsigned ce_ram_size;
struct amdgpu_cu_info cu_info;
const struct amdgpu_gfx_funcs *funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fb1303..86d76e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1701,20 +1701,21 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* Registers mapping */
/* TODO: block userspace mapping of io register */
spin_lock_init(&adev->mmio_idx_lock);
spin_lock_init(&adev->smc_idx_lock);
spin_lock_init(&adev->pcie_idx_lock);
spin_lock_init(&adev->uvd_ctx_idx_lock);
spin_lock_init(&adev->didt_idx_lock);
spin_lock_init(&adev->gc_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
+ spin_lock_init(&adev->gfx.cu_reserve_lock);
INIT_LIST_HEAD(&adev->shadow_list);
mutex_init(&adev->shadow_list_lock);
INIT_LIST_HEAD(&adev->gtt_list);
spin_lock_init(&adev->gtt_list_lock);
INIT_LIST_HEAD(&adev->ring_lru_list);
mutex_init(&adev->ring_lru_list_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a778d58..4fdec23 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -46,21 +46,23 @@
#include "gca/gfx_8_0_sh_mask.h"
#include "gca/gfx_8_0_enum.h"
#include "dce/dce_10_0_d.h"
#include "dce/dce_10_0_sh_mask.h"
#include "smu/smu_7_1_3_d.h"
#define GFX8_NUM_GFX_RINGS 1
#define GFX8_MEC_HPD_SIZE 2048
-
+#define GFX8_CU_RESERVE_RESOURCES 0x45888
+#define GFX8_CU_NUM 8
+#define GFX8_CU_RESERVE_PIPE_SHIFT 7
#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
@@ -6667,20 +6669,100 @@ static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
adev->wb.wb[ring->wptr_offs] = ring->wptr;
WDOORBELL32(ring->doorbell_index, ring->wptr);
}
+static void gfx_v8_0_cu_reserve(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring, bool acquire)
+{
+ int i, resources;
+ int tmp = 0, queue_mask = 0, type_mask = 0;
+ int reserve_res_reg, reserve_en_reg;
+
+ /* gfx_v8_0_cu_reserve only supports compute path */
+ if (ring->hw_ip != AMDGPU_HW_IP_COMPUTE)
+ return;
+
+ spin_lock(&adev->gfx.cu_reserve_lock);
+ if (acquire) {
+ adev->gfx.cu_reserve_pipe_mask |= (1 << ring->pipe);
+ adev->gfx.cu_reserve_queue_mask[ring->pipe] |= (1 << ring->queue);
+ } else {
+ adev->gfx.cu_reserve_pipe_mask &= ~(1 << ring->pipe);
+ adev->gfx.cu_reserve_queue_mask[ring->pipe] &= ~(1 << ring->queue);
+ }
+
+ /* compute pipe 0 starts at GFX8_CU_RESERVE_PIPE_SHIFT */
+ type_mask = (adev->gfx.cu_reserve_pipe_mask << GFX8_CU_RESERVE_PIPE_SHIFT);
+
+ /* HW only has one register for queue mask, so we collaspse them */
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_RINGS; i++)
+ queue_mask |= adev->gfx.cu_reserve_queue_mask[i];
+
+ /* leave the first 2 CUs for general processing */
+ for (i = 2; i < GFX8_CU_NUM; i++) {
+ reserve_res_reg = mmSPI_RESOURCE_RESERVE_CU_0 + i;
+ reserve_en_reg = mmSPI_RESOURCE_RESERVE_EN_CU_0 + i;
+
+ tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+ TYPE_MASK, type_mask);
+ tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+ QUEUE_MASK, queue_mask);
+ if (queue_mask) {
+ resources = GFX8_CU_RESERVE_RESOURCES;
+ tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+ EN, 1);
+ } else {
+ resources = 0;
+ tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+ EN, 0);
+ }
+ /* Commit */
+ WREG32(reserve_res_reg, resources);
+ WREG32(reserve_en_reg, tmp);
+ }
+
+ spin_unlock(&adev->gfx.cu_reserve_lock);
+}
+
+static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+ int priority)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->hw_ip != AMDGPU_HW_IP_COMPUTE)
+ return;
+
+ mutex_lock(&adev->srbm_mutex);
+ vi_srbm_select(adev, ring->me, ring->pipe, 0, 0);
+
+ switch (priority) {
+ case AMDGPU_CTX_PRIORITY_NORMAL:
+ WREG32(mmCP_HQD_PIPE_PRIORITY, 0x0);
+ WREG32(mmCP_HQD_QUEUE_PRIORITY, 0x0);
+ gfx_v8_0_cu_reserve(adev, ring, false);
+ break;
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ WREG32(mmCP_HQD_PIPE_PRIORITY, 0x2);
+ WREG32(mmCP_HQD_QUEUE_PRIORITY, 0xf);
+ gfx_v8_0_cu_reserve(adev, ring, true);
+ break;
+ }
+
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
u64 addr, u64 seq,
unsigned flags)
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
@@ -7074,20 +7156,21 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.emit_fence = gfx_v8_0_ring_emit_fence_compute,
.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+ .set_priority = gfx_v8_0_ring_set_priority_compute,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
.type = AMDGPU_RING_TYPE_KIQ,
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.get_rptr = gfx_v8_0_ring_get_rptr,
.get_wptr = gfx_v8_0_ring_get_wptr_compute,
.set_wptr = gfx_v8_0_ring_set_wptr_compute,
.emit_frame_size =
--
2.7.4
More information about the amd-gfx
mailing list