[PATCH 20/22] drm/amdgpu: implement ring set_priority for gfx_v8 compute v2
Andres Rodriguez
andresx7 at gmail.com
Thu Mar 2 07:03:22 UTC 2017
Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.
Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
0x2: CS_H
0x1: CS_M
0x0: CS_L
The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.
In order to prevent getting stuck in loops of CUs bouncing between GFX
and high priority compute and introducing further latency, we reserve
CUs 2+ for high priority compute on-demand.
v2: fix srbm_select to ring->queue and use ring->funcs->type
Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 96 +++++++++++++++++++++++++++++-
3 files changed, 99 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index accb885..3d13127 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -921,6 +921,9 @@ struct amdgpu_gfx {
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
unsigned num_compute_rings;
+ spinlock_t cu_reserve_lock;
+ uint32_t cu_reserve_pipe_mask;
+ uint32_t cu_reserve_queue_mask[AMDGPU_MAX_COMPUTE_RINGS];
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fb1303..86d76e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1708,6 +1708,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->gc_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
+ spin_lock_init(&adev->gfx.cu_reserve_lock);
INIT_LIST_HEAD(&adev->shadow_list);
mutex_init(&adev->shadow_list_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5db5bac..141c964 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -53,7 +53,10 @@
#define GFX8_NUM_GFX_RINGS 1
#define GFX8_MEC_HPD_SIZE 2048
-
+#define GFX8_CU_RESERVE_RESOURCES 0x45888
+#define GFX8_CU_NUM 8
+#define GFX8_UNRESERVED_CU_NUM 2
+#define GFX8_CU_RESERVE_PIPE_SHIFT 7
#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -6674,6 +6677,96 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
WDOORBELL32(ring->doorbell_index, ring->wptr);
}
+static void gfx_v8_0_cu_reserve(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring, bool acquire)
+{
+ int i, resources;
+ int tmp = 0, queue_mask = 0, type_mask = 0;
+ int reserve_res_reg, reserve_en_reg;
+
+ /* gfx_v8_0_cu_reserve only supports compute path */
+ if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+ return;
+
+ spin_lock(&adev->gfx.cu_reserve_lock);
+ if (acquire) {
+ adev->gfx.cu_reserve_pipe_mask |= (1 << ring->pipe);
+ adev->gfx.cu_reserve_queue_mask[ring->pipe] |= (1 << ring->queue);
+ } else {
+ adev->gfx.cu_reserve_pipe_mask &= ~(1 << ring->pipe);
+ adev->gfx.cu_reserve_queue_mask[ring->pipe] &= ~(1 << ring->queue);
+ }
+
+ /* compute pipe 0 starts at GFX8_CU_RESERVE_PIPE_SHIFT */
+ type_mask = (adev->gfx.cu_reserve_pipe_mask << GFX8_CU_RESERVE_PIPE_SHIFT);
+
+ /* HW only has one register for queue mask, so we collaspse them */
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_RINGS; i++)
+ queue_mask |= adev->gfx.cu_reserve_queue_mask[i];
+
+ /* leave the first CUs for general processing */
+ for (i = GFX8_UNRESERVED_CU_NUM; i < GFX8_CU_NUM; i++) {
+ reserve_res_reg = mmSPI_RESOURCE_RESERVE_CU_0 + i;
+ reserve_en_reg = mmSPI_RESOURCE_RESERVE_EN_CU_0 + i;
+
+ tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+ TYPE_MASK, type_mask);
+ tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+ QUEUE_MASK, queue_mask);
+ if (queue_mask) {
+ resources = GFX8_CU_RESERVE_RESOURCES;
+ tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+ EN, 1);
+ } else {
+ resources = 0;
+ tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+ EN, 0);
+ }
+ /* Commit */
+ WREG32(reserve_res_reg, resources);
+ WREG32(reserve_en_reg, tmp);
+ }
+
+ spin_unlock(&adev->gfx.cu_reserve_lock);
+}
+
+static void gfx_v8_0_set_spi_priority(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ int priority)
+{
+ mutex_lock(&adev->srbm_mutex);
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (priority) {
+ case AMDGPU_CTX_PRIORITY_NORMAL:
+ WREG32(mmCP_HQD_PIPE_PRIORITY, 0x0);
+ WREG32(mmCP_HQD_QUEUE_PRIORITY, 0x0);
+ break;
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ WREG32(mmCP_HQD_PIPE_PRIORITY, 0x2);
+ WREG32(mmCP_HQD_QUEUE_PRIORITY, 0xf);
+ break;
+ default:
+ WARN(1, "Attempt to set invalid SPI priority for ring:%d\n",
+ ring->idx);
+ break;
+ }
+
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+ int priority)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+ return;
+
+ gfx_v8_0_set_spi_priority(adev, ring, priority);
+ gfx_v8_0_cu_reserve(adev, ring, priority == AMDGPU_CTX_PRIORITY_HIGH);
+}
+
static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
u64 addr, u64 seq,
unsigned flags)
@@ -7081,6 +7174,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+ .set_priority = gfx_v8_0_ring_set_priority_compute,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
--
2.9.3
More information about the amd-gfx
mailing list