[PATCH 3/3] drm/msm: Use Hardware counters for perf profiling
Sharat Masetty
smasetty at codeaurora.org
Wed Oct 17 13:04:01 UTC 2018
This patch attempts to make use of the hardware counters for GPU busy %
estimation when possible and skip using the software counters as it also
accounts for software side delays. This should help give more accurate
representation of the GPU workload.
Signed-off-by: Sharat Masetty <smasetty at codeaurora.org>
---
drivers/gpu/drm/msm/msm_gpu.c | 30 ++++++++++++++++++++++++++----
drivers/gpu/drm/msm/msm_gpu.h | 5 +++--
drivers/gpu/drm/msm/msm_perf.c | 10 +++++-----
3 files changed, 34 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index e9b5426..a896541 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -592,6 +592,9 @@ static void update_sw_cntrs(struct msm_gpu *gpu)
uint32_t elapsed;
unsigned long flags;
+ if (gpu->funcs->gpu_busy)
+ return;
+
spin_lock_irqsave(&gpu->perf_lock, flags);
if (!gpu->perfcntr_active)
goto out;
@@ -620,6 +623,7 @@ void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
/* we could dynamically enable/disable perfcntr registers too.. */
gpu->last_sample.active = msm_gpu_active(gpu);
gpu->last_sample.time = ktime_get();
+ gpu->last_sample.busy_cycles = 0;
gpu->activetime = gpu->totaltime = 0;
gpu->perfcntr_active = true;
update_hw_cntrs(gpu, 0, NULL);
@@ -632,9 +636,22 @@ void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
pm_runtime_put_sync(&gpu->pdev->dev);
}
+static void msm_gpu_hw_sample(struct msm_gpu *gpu, uint64_t *activetime,
+ uint64_t *totaltime)
+{
+ ktime_t time;
+
+ *activetime = gpu->funcs->gpu_busy(gpu,
+ &gpu->last_sample.busy_cycles);
+
+ time = ktime_get();
+ *totaltime = ktime_us_delta(time, gpu->last_sample.time);
+ gpu->last_sample.time = time;
+}
+
/* returns -errno or # of cntrs sampled */
-int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
- uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
+int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint64_t *activetime,
+ uint64_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
{
unsigned long flags;
int ret;
@@ -646,13 +663,18 @@ int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
goto out;
}
+ ret = update_hw_cntrs(gpu, ncntrs, cntrs);
+
+ if (gpu->funcs->gpu_busy) {
+ msm_gpu_hw_sample(gpu, activetime, totaltime);
+ goto out;
+ }
+
*activetime = gpu->activetime;
*totaltime = gpu->totaltime;
gpu->activetime = gpu->totaltime = 0;
- ret = update_hw_cntrs(gpu, ncntrs, cntrs);
-
out:
spin_unlock_irqrestore(&gpu->perf_lock, flags);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 0ff23ca..7dc775f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -90,6 +90,7 @@ struct msm_gpu {
struct {
bool active;
ktime_t time;
+ u64 busy_cycles;
} last_sample;
uint32_t totaltime, activetime; /* sw counters */
uint32_t last_cntrs[5]; /* hw counters */
@@ -275,8 +276,8 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
-int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
- uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
+int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint64_t *activetime,
+ uint64_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
void msm_gpu_retire(struct msm_gpu *gpu);
void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c
index 5ab21bd..318f7dd 100644
--- a/drivers/gpu/drm/msm/msm_perf.c
+++ b/drivers/gpu/drm/msm/msm_perf.c
@@ -17,7 +17,7 @@
/* For profiling, userspace can:
*
- * tail -f /sys/kernel/debug/dri/<minor>/gpu
+ * tail -f /sys/kernel/debug/dri/<minor>/perf
*
* This will enable performance counters/profiling to track the busy time
* and any gpu specific performance counters that are supported.
@@ -85,9 +85,9 @@ static int refill_buf(struct msm_perf_state *perf)
}
} else {
/* Sample line: */
- uint32_t activetime = 0, totaltime = 0;
+ uint64_t activetime = 0, totaltime = 0;
uint32_t cntrs[5];
- uint32_t val;
+ uint64_t val;
int ret;
/* sleep until next sample time: */
@@ -101,14 +101,14 @@ static int refill_buf(struct msm_perf_state *perf)
return ret;
val = totaltime ? 1000 * activetime / totaltime : 0;
- n = snprintf(ptr, rem, "%3d.%d%%", val / 10, val % 10);
+ n = snprintf(ptr, rem, "%3llu.%llu%%", val / 10, val % 10);
ptr += n;
rem -= n;
for (i = 0; i < ret; i++) {
/* cycle counters (I think).. convert to MHz.. */
val = cntrs[i] / 10000;
- n = snprintf(ptr, rem, "\t%5d.%02d",
+ n = snprintf(ptr, rem, "\t%5llu.%02llu",
val / 100, val % 100);
ptr += n;
rem -= n;
--
1.9.1
More information about the dri-devel
mailing list