[PATCH] drm/amdgpu: optimize out a spin lock (v2)
Alex Xie
AlexBin.Xie at amd.com
Thu Jun 22 02:45:54 UTC 2017
Use atomic instead of spin lock.
v2: Adjust commit message
Signed-off-by: Alex Xie <AlexBin.Xie at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 110 +++++++++++++++++++----------
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
3 files changed, 76 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7caf514..21d318b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1588,9 +1588,8 @@ struct amdgpu_device {
/* data for buffer migration throttling */
struct {
- spinlock_t lock;
- s64 last_update_us;
- s64 accum_us; /* accumulated microseconds */
+ atomic64_t last_update_us;
+ atomic64_t accum_us; /* accumulated microseconds */
u32 log2_max_MBps;
} mm_stats;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82131d7..7b6f42e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -225,6 +225,9 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
s64 time_us, increment_us;
u64 max_bytes;
u64 free_vram, total_vram, used_vram;
+ s64 old_update_us, head_time_us;
+ s64 accum_us;
+ s64 old_accum_us, head_accum_us;
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
* throttling.
@@ -242,47 +245,83 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
used_vram = atomic64_read(&adev->vram_usage);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
- spin_lock(&adev->mm_stats.lock);
-
/* Increase the amount of accumulated us. */
- time_us = ktime_to_us(ktime_get());
- increment_us = time_us - adev->mm_stats.last_update_us;
- adev->mm_stats.last_update_us = time_us;
- adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
- us_upper_bound);
-
- /* This prevents the short period of low performance when the VRAM
- * usage is low and the driver is in debt or doesn't have enough
- * accumulated us to fill VRAM quickly.
- *
- * The situation can occur in these cases:
- * - a lot of VRAM is freed by userspace
- * - the presence of a big buffer causes a lot of evictions
- * (solution: split buffers into smaller ones)
- *
- * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
- * accum_us to a positive number.
- */
- if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
- s64 min_us;
-
- /* Be more aggresive on dGPUs. Try to fill a portion of free
- * VRAM now.
- */
- if (!(adev->flags & AMD_IS_APU))
- min_us = bytes_to_us(adev, free_vram / 4);
+ old_update_us = atomic64_read(&adev->mm_stats.last_update_us);
+ for (;;) {
+ time_us = ktime_to_us(ktime_get());
+ head_time_us = atomic64_cmpxchg(&adev->mm_stats.last_update_us,
+ old_update_us, time_us);
+
+ if (likely(head_time_us == old_update_us))
+ /*
+ * No other task modified adev->mm_stats.last_update_us.
+ * Update was successful.
+ */
+ break;
else
- min_us = 0; /* Reset accum_us on APUs. */
+ /* Another task modified the value after we read it.
+ * A rare contention happens, let us retry.
+ * In most case, one retry can do the job.
+ * See function atomic64_add_unless as a similar idea.
+ */
+ old_update_us = head_time_us;
+ }
+ increment_us = time_us - old_update_us;
+
+ old_accum_us = atomic64_read(&adev->mm_stats.accum_us);
+
+ for (;;) {
+ accum_us = min(old_accum_us + increment_us, us_upper_bound);
+
+ /* This prevents the short period of low performance when the
+ * VRAM usage is low and the driver is in debt or doesn't have
+ * enough accumulated us to fill VRAM quickly.
+ *
+ * The situation can occur in these cases:
+ * - a lot of VRAM is freed by userspace
+ * - the presence of a big buffer causes a lot of evictions
+ * (solution: split buffers into smaller ones)
+ *
+ * If 128 MB or 1/8th of VRAM is free, start filling it now by
+ * setting accum_us to a positive number.
+ */
+ if (free_vram >= 128 * 1024 * 1024 ||
+ free_vram >= total_vram / 8) {
+ s64 min_us;
+
+ /* Be more aggresive on dGPUs. Try to fill a portion of
+ * free VRAM now.
+ */
+ if (!(adev->flags & AMD_IS_APU))
+ min_us = bytes_to_us(adev, free_vram / 4);
+ else
+ min_us = 0; /* Reset accum_us on APUs. */
+
+ accum_us = max(min_us, accum_us);
+ }
+
+ head_accum_us = atomic64_cmpxchg(&adev->mm_stats.accum_us,
+ old_accum_us, accum_us);
- adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
+ if (likely(head_accum_us == old_accum_us))
+ /*
+ * No other task modified adev->mm_stats.accum_us.
+ * Update was successful.
+ */
+ break;
+ else
+ /* Another task modified the value after we read it.
+ * A rare contention happens, let us retry.
+ * In most case, one retry can do the job.
+ * See function atomic64_add_unless as a similar idea.
+ */
+ old_accum_us = head_accum_us;
}
/* This returns 0 if the driver is in debt to disallow (optional)
* buffer moves.
*/
- max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
-
- spin_unlock(&adev->mm_stats.lock);
+ max_bytes = us_to_bytes(adev, accum_us);
return max_bytes;
}
@@ -292,9 +331,8 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
*/
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
{
- spin_lock(&adev->mm_stats.lock);
- adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
- spin_unlock(&adev->mm_stats.lock);
+ s64 i = bytes_to_us(adev, num_bytes);
+ atomic64_sub(i, &adev->mm_stats.accum_us);
}
static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ff90f78..9e9d592 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2117,7 +2117,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->didt_idx_lock);
spin_lock_init(&adev->gc_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
- spin_lock_init(&adev->mm_stats.lock);
INIT_LIST_HEAD(&adev->shadow_list);
mutex_init(&adev->shadow_list_lock);
--
2.7.4
More information about the amd-gfx
mailing list