[PATCH 3/3] drm/msm: Use Hardware counters for perf profiling

Sharat Masetty smasetty at codeaurora.org
Wed Oct 17 13:04:01 UTC 2018


This patch attempts to make use of the hardware counters for GPU busy %
estimation when possible and skip using the software counters as it also
accounts for software side delays. This should help give more accurate
representation of the GPU workload.

Signed-off-by: Sharat Masetty <smasetty at codeaurora.org>
---
 drivers/gpu/drm/msm/msm_gpu.c  | 30 ++++++++++++++++++++++++++----
 drivers/gpu/drm/msm/msm_gpu.h  |  5 +++--
 drivers/gpu/drm/msm/msm_perf.c | 10 +++++-----
 3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index e9b5426..a896541 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -592,6 +592,9 @@ static void update_sw_cntrs(struct msm_gpu *gpu)
 	uint32_t elapsed;
 	unsigned long flags;
 
+	if (gpu->funcs->gpu_busy)
+		return;
+
 	spin_lock_irqsave(&gpu->perf_lock, flags);
 	if (!gpu->perfcntr_active)
 		goto out;
@@ -620,6 +623,7 @@ void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
 	/* we could dynamically enable/disable perfcntr registers too.. */
 	gpu->last_sample.active = msm_gpu_active(gpu);
 	gpu->last_sample.time = ktime_get();
+	gpu->last_sample.busy_cycles = 0;
 	gpu->activetime = gpu->totaltime = 0;
 	gpu->perfcntr_active = true;
 	update_hw_cntrs(gpu, 0, NULL);
@@ -632,9 +636,22 @@ void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
 	pm_runtime_put_sync(&gpu->pdev->dev);
 }
 
+static void msm_gpu_hw_sample(struct msm_gpu *gpu, uint64_t *activetime,
+		uint64_t *totaltime)
+{
+	ktime_t time;
+
+	*activetime = gpu->funcs->gpu_busy(gpu,
+			&gpu->last_sample.busy_cycles);
+
+	time = ktime_get();
+	*totaltime = ktime_us_delta(time, gpu->last_sample.time);
+	gpu->last_sample.time = time;
+}
+
 /* returns -errno or # of cntrs sampled */
-int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
-		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
+int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint64_t *activetime,
+		uint64_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
 {
 	unsigned long flags;
 	int ret;
@@ -646,13 +663,18 @@ int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
 		goto out;
 	}
 
+	ret = update_hw_cntrs(gpu, ncntrs, cntrs);
+
+	if (gpu->funcs->gpu_busy) {
+		msm_gpu_hw_sample(gpu, activetime, totaltime);
+		goto out;
+	}
+
 	*activetime = gpu->activetime;
 	*totaltime = gpu->totaltime;
 
 	gpu->activetime = gpu->totaltime = 0;
 
-	ret = update_hw_cntrs(gpu, ncntrs, cntrs);
-
 out:
 	spin_unlock_irqrestore(&gpu->perf_lock, flags);
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 0ff23ca..7dc775f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -90,6 +90,7 @@ struct msm_gpu {
 	struct {
 		bool active;
 		ktime_t time;
+		u64 busy_cycles;
 	} last_sample;
 	uint32_t totaltime, activetime;    /* sw counters */
 	uint32_t last_cntrs[5];            /* hw counters */
@@ -275,8 +276,8 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
 
 void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
-int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
-		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
+int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint64_t *activetime,
+		uint64_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
 
 void msm_gpu_retire(struct msm_gpu *gpu);
 void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c
index 5ab21bd..318f7dd 100644
--- a/drivers/gpu/drm/msm/msm_perf.c
+++ b/drivers/gpu/drm/msm/msm_perf.c
@@ -17,7 +17,7 @@
 
 /* For profiling, userspace can:
  *
- *   tail -f /sys/kernel/debug/dri/<minor>/gpu
+ *   tail -f /sys/kernel/debug/dri/<minor>/perf
  *
  * This will enable performance counters/profiling to track the busy time
  * and any gpu specific performance counters that are supported.
@@ -85,9 +85,9 @@ static int refill_buf(struct msm_perf_state *perf)
 		}
 	} else {
 		/* Sample line: */
-		uint32_t activetime = 0, totaltime = 0;
+		uint64_t activetime = 0, totaltime = 0;
 		uint32_t cntrs[5];
-		uint32_t val;
+		uint64_t val;
 		int ret;
 
 		/* sleep until next sample time: */
@@ -101,14 +101,14 @@ static int refill_buf(struct msm_perf_state *perf)
 			return ret;
 
 		val = totaltime ? 1000 * activetime / totaltime : 0;
-		n = snprintf(ptr, rem, "%3d.%d%%", val / 10, val % 10);
+		n = snprintf(ptr, rem, "%3llu.%llu%%", val / 10, val % 10);
 		ptr += n;
 		rem -= n;
 
 		for (i = 0; i < ret; i++) {
 			/* cycle counters (I think).. convert to MHz.. */
 			val = cntrs[i] / 10000;
-			n = snprintf(ptr, rem, "\t%5d.%02d",
+			n = snprintf(ptr, rem, "\t%5llu.%02llu",
 					val / 100, val % 100);
 			ptr += n;
 			rem -= n;
-- 
1.9.1



More information about the dri-devel mailing list