[PATCH 17/25] drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4

Andres Rodriguez andresx7 at gmail.com
Tue Apr 4 22:05:43 UTC 2017


Use an LRU policy to map usermode rings to HW compute queues.

Most compute clients use one queue, and usually the first queue
available. This results in poor pipe/queue work distribution when
multiple compute apps are running. In most cases pipe 0 queue 0 is
the only queue that gets used.

In order to better distribute work across multiple HW queues, we adopt
a policy to map the usermode ring ids to the LRU HW queue.

This fixes a large majority of multi-app compute workloads sharing the
same HW queue, even though 7 other queues are available.

v2: use ring->funcs->type instead of ring->hw_ip
v3: remove amdgpu_queue_mapper_funcs
v4: change ring_lru_list_lock to spinlock, grab only once in lru_get()

Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 38 +++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c      | 63 +++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h      |  4 ++
 5 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9ca6479..62bfb53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1644,20 +1644,23 @@ struct amdgpu_device {
 	struct kfd_dev          *kfd;
 
 	struct amdgpu_virt	virt;
 
 	/* link all shadow bo */
 	struct list_head                shadow_list;
 	struct mutex                    shadow_list_lock;
 	/* link all gtt */
 	spinlock_t			gtt_list_lock;
 	struct list_head                gtt_list;
+	/* keep an lru list of rings by HW IP */
+	struct list_head		ring_lru_list;
+	spinlock_t			ring_lru_list_lock;
 
 	/* record hw reset is performed */
 	bool has_hw_reset;
 
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
 {
 	return container_of(bdev, struct amdgpu_device, mman.bdev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 50bdf55..f48cc4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1889,20 +1889,23 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	spin_lock_init(&adev->gc_cac_idx_lock);
 	spin_lock_init(&adev->audio_endpt_idx_lock);
 	spin_lock_init(&adev->mm_stats.lock);
 
 	INIT_LIST_HEAD(&adev->shadow_list);
 	mutex_init(&adev->shadow_list_lock);
 
 	INIT_LIST_HEAD(&adev->gtt_list);
 	spin_lock_init(&adev->gtt_list_lock);
 
+	INIT_LIST_HEAD(&adev->ring_lru_list);
+	spin_lock_init(&adev->ring_lru_list_lock);
+
 	if (adev->asic_type >= CHIP_BONAIRE) {
 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
 	} else {
 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
 	}
 
 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
 	if (adev->rmmio == NULL) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 3e9ac80..054d750 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -84,20 +84,54 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
 		break;
 	default:
 		*out_ring = NULL;
 		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
 		return -EINVAL;
 	}
 
 	return amdgpu_update_cached_map(mapper, ring, *out_ring);
 }
 
+static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
+{
+	switch (hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		return AMDGPU_RING_TYPE_GFX;
+	case AMDGPU_HW_IP_COMPUTE:
+		return AMDGPU_RING_TYPE_COMPUTE;
+	case AMDGPU_HW_IP_DMA:
+		return AMDGPU_RING_TYPE_SDMA;
+	case AMDGPU_HW_IP_UVD:
+		return AMDGPU_RING_TYPE_UVD;
+	case AMDGPU_HW_IP_VCE:
+		return AMDGPU_RING_TYPE_VCE;
+	default:
+		DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
+		return -1;
+	}
+}
+
+static int amdgpu_lru_map(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mapper *mapper,
+			  int user_ring,
+			  struct amdgpu_ring **out_ring)
+{
+	int r;
+	int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
+
+	r = amdgpu_ring_lru_get(adev, ring_type, out_ring);
+	if (r)
+		return r;
+
+	return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
+}
+
 /**
  * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
  *
  * @adev: amdgpu_device pointer
  * @mgr: amdgpu_queue_mgr structure holding queue information
  *
  * Initialize the the selected @mgr (all asics).
  *
  * Returns 0 on success, error on failure.
  */
@@ -205,26 +239,28 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 
 	*out_ring = amdgpu_get_cached_map(mapper, ring);
 	if (*out_ring) {
 		/* cache hit */
 		r = 0;
 		goto out_unlock;
 	}
 
 	switch (mapper->hw_ip) {
 	case AMDGPU_HW_IP_GFX:
-	case AMDGPU_HW_IP_COMPUTE:
 	case AMDGPU_HW_IP_DMA:
 	case AMDGPU_HW_IP_UVD:
 	case AMDGPU_HW_IP_VCE:
 		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
 		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		r = amdgpu_lru_map(adev, mapper, ring, out_ring);
+		break;
 	default:
 		*out_ring = NULL;
 		r = -EINVAL;
 		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
 	}
 
 out_unlock:
 	mutex_unlock(&mapper->lock);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 12fc815..2b452b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -173,20 +173,22 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 	count = ring->funcs->align_mask + 1 -
 		(ring->wptr & ring->funcs->align_mask);
 	count %= ring->funcs->align_mask + 1;
 	ring->funcs->insert_nop(ring, count);
 
 	mb();
 	amdgpu_ring_set_wptr(ring);
 
 	if (ring->funcs->end_use)
 		ring->funcs->end_use(ring);
+
+	amdgpu_ring_lru_touch(ring->adev, ring);
 }
 
 /**
  * amdgpu_ring_undo - reset the wptr
  *
  * @ring: amdgpu_ring structure holding ring information
  *
  * Reset the driver's copy of the wptr (all asics).
  */
 void amdgpu_ring_undo(struct amdgpu_ring *ring)
@@ -291,20 +293,22 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 					    &ring->gpu_addr,
 					    (void **)&ring->ring);
 		if (r) {
 			dev_err(adev->dev, "(%d) ring create failed\n", r);
 			return r;
 		}
 		amdgpu_ring_clear_ring(ring);
 	}
 
 	ring->max_dw = max_dw;
+	INIT_LIST_HEAD(&ring->lru_list);
+	amdgpu_ring_lru_touch(adev, ring);
 
 	if (amdgpu_debugfs_ring_init(adev, ring)) {
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
 	}
 	return 0;
 }
 
 /**
  * amdgpu_ring_fini - tear down the driver ring struct.
  *
@@ -332,20 +336,79 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 
 	amdgpu_bo_free_kernel(&ring->ring_obj,
 			      &ring->gpu_addr,
 			      (void **)&ring->ring);
 
 	amdgpu_debugfs_ring_fini(ring);
 
 	ring->adev->rings[ring->idx] = NULL;
 }
 
+static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
+					 struct amdgpu_ring *ring)
+{
+	/* list_move_tail handles the case where ring isn't part of the list */
+	list_move_tail(&ring->lru_list, &adev->ring_lru_list);
+}
+
+/**
+ * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
+ *
+ * @adev: amdgpu_device pointer
+ * @type: amdgpu_ring_type enum
+ * @ring: output ring
+ *
+ * Retrieve the amdgpu_ring structure for the least recently used ring of
+ * a specific IP block (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
+			struct amdgpu_ring **ring)
+{
+	struct amdgpu_ring *entry;
+
+	/* List is sorted in LRU order, find first entry corresponding
+	 * to the desired HW IP */
+	*ring = NULL;
+	spin_lock(&adev->ring_lru_list_lock);
+	list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
+		if (entry->funcs->type == type) {
+			*ring = entry;
+			amdgpu_ring_lru_touch_locked(adev, *ring);
+			break;
+		}
+	}
+	spin_unlock(&adev->ring_lru_list_lock);
+
+	if (!*ring) {
+		DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_ring_lru_touch - mark a ring as recently being used
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: ring to touch
+ *
+ * Move @ring to the tail of the lru list
+ */
+void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	spin_lock(&adev->ring_lru_list_lock);
+	amdgpu_ring_lru_touch_locked(adev, ring);
+	spin_unlock(&adev->ring_lru_list_lock);
+}
+
 /*
  * Debugfs info
  */
 #if defined(CONFIG_DEBUG_FS)
 
 /* Layout of file is 12 bytes consisting of
  * - rptr
  * - wptr
  * - driver's copy of wptr
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index cf411f7..41ab767 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -141,20 +141,21 @@ struct amdgpu_ring_funcs {
 	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 };
 
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
 	const struct amdgpu_ring_funcs	*funcs;
 	struct amdgpu_fence_driver	fence_drv;
 	struct amd_gpu_scheduler	sched;
+	struct list_head		lru_list;
 
 	struct amdgpu_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr_offs;
 	u64			wptr;
 	u64			wptr_old;
 	unsigned		ring_size;
 	unsigned		max_dw;
 	int			count_dw;
 	uint64_t		gpu_addr;
@@ -187,19 +188,22 @@ int amdgpu_ring_is_valid_index(struct amdgpu_device *adev,
 			       int hw_ip, int ring);
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int hw_ip,
+			struct amdgpu_ring **ring);
+void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 	int i = 0;
 	while (i <= ring->buf_mask)
 		ring->ring[i++] = ring->funcs->nop;
 
 }
 
 #endif
-- 
2.9.3



More information about the amd-gfx mailing list