[Intel-gfx] [RFC 8/8] drm/i915: Support for retrieving MMIO register values alongwith timestamps through perf

sourab.gupta at intel.com sourab.gupta at intel.com
Tue Aug 4 22:55:44 PDT 2015


From: Sourab Gupta <sourab.gupta at intel.com>

This patch adds support for retrieving MMIO register values alongwith
timestamps and forwarding them to userspace through perf.
The userspace can request upto 8 MMIO register values to be dumped.
The addresses of upto 8 MMIO registers can be passed through perf attr
config. The registers are checked against a whitelist before passing them
on. The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith commands to dump the timestamps.

v2: Implement suggestions by Chris, pertaining to code restructuring, using
BUILD_BUG_ON etc.

Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  2 +
 drivers/gpu/drm/i915/i915_oa_perf.c | 99 ++++++++++++++++++++++++++++++++++---
 include/uapi/drm/i915_drm.h         | 11 ++++-
 3 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c3e823f..5c6e37a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2022,7 +2022,9 @@ struct drm_i915_private {
 #define I915_GEN_PMU_SAMPLE_RING		(1<<0)
 #define I915_GEN_PMU_SAMPLE_PID			(1<<1)
 #define I915_GEN_PMU_SAMPLE_TAG			(1<<2)
+#define I915_GEN_PMU_SAMPLE_MMIO		(1<<3)
 		int sample_info_flags;
+		u32 mmio_list[I915_PMU_MMIO_NUM];
 	} gen_pmu;
 
 	void (*emit_profiling_data[I915_PROFILE_MAX])
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index e065e06..4197dbd 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -12,6 +12,7 @@
 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
 
 #define TS_DATA_SIZE sizeof(struct drm_i915_ts_data)
+#define MMIO_DATA_SIZE sizeof(struct drm_i915_mmio_data)
 #define CTX_INFO_SIZE sizeof(struct drm_i915_ts_node_ctx_id)
 #define RING_INFO_SIZE sizeof(struct drm_i915_ts_node_ring_id)
 #define PID_INFO_SIZE sizeof(struct drm_i915_ts_node_pid)
@@ -129,8 +130,8 @@ static void i915_gen_emit_ts_data(struct drm_i915_gem_request *req,
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_object *obj = dev_priv->gen_pmu.buffer.obj;
 	struct i915_gen_pmu_node *entry;
-	u32 addr = 0;
-	int ret;
+	u32 mmio_addr, addr = 0;
+	int ret, i, count = 0;
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (entry == NULL) {
@@ -138,7 +139,12 @@ static void i915_gen_emit_ts_data(struct drm_i915_gem_request *req,
 		return;
 	}
 
-	ret = intel_ring_begin(ring, 6);
+	for (count = 0; count < I915_PMU_MMIO_NUM; count++) {
+		if (0 == dev_priv->gen_pmu.mmio_list[count])
+			break;
+	}
+
+	ret = intel_ring_begin(ring, 6 + 4*count);
 	if (ret) {
 		kfree(entry);
 		return;
@@ -173,6 +179,7 @@ static void i915_gen_emit_ts_data(struct drm_i915_gem_request *req,
 	spin_unlock(&dev_priv->gen_pmu.lock);
 
 	addr = dev_priv->gen_pmu.buffer.gtt_offset + entry->offset;
+	mmio_addr = addr + TS_DATA_SIZE;
 
 	if (ring->id == RCS) {
 		intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
@@ -192,6 +199,32 @@ static void i915_gen_emit_ts_data(struct drm_i915_gem_request *req,
 		intel_ring_emit(ring, MI_NOOP);
 		intel_ring_emit(ring, MI_NOOP);
 	}
+
+	/*
+	 * Note:
+	 * 1) The optimization to store the register array with a single
+	 * command doesn't seem to be working with SRM commands. Hence, have a
+	 * loop with a single SRM command repeated. Missing anything here?
+	 * 2) This fn is presently called before and after batch buffer. As
+	 * such, there should already be the CS stall commands called after BB.
+	 * Is there a need/necessity for a command barrier to be inserted in
+	 * ring here? If so, which commands? (CS Stall?)
+	 */
+	for (i = 0; i < I915_PMU_MMIO_NUM; i++) {
+		if (0 == dev_priv->gen_pmu.mmio_list[i])
+			break;
+
+		addr = mmio_addr +
+			i * sizeof(dev_priv->gen_pmu.mmio_list[i]);
+
+		intel_ring_emit(ring,
+				MI_STORE_REGISTER_MEM(1) |
+				MI_SRM_LRM_GLOBAL_GTT);
+		intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]);
+		intel_ring_emit(ring, addr);
+		intel_ring_emit(ring, MI_NOOP);
+	}
+
 	intel_ring_advance(ring);
 
 	obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
@@ -553,7 +586,7 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
 {
 	struct perf_sample_data data;
 	struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
-	int snapshot_size;
+	int snapshot_size, mmio_size;
 	u8 *snapshot, *current_ptr;
 	struct drm_i915_ts_node_ctx_id *ctx_info;
 	struct drm_i915_ts_node_ring_id *ring_info;
@@ -565,10 +598,16 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
 			(RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8) ||
 			(TAG_INFO_SIZE != 8));
 
+	if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_MMIO)
+		mmio_size = MMIO_DATA_SIZE;
+	else
+		mmio_size = 0;
+
 	snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
-	snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;
+	snapshot_size = TS_DATA_SIZE + mmio_size + CTX_INFO_SIZE;
 
-	ctx_info = (struct drm_i915_ts_node_ctx_id *)(snapshot + TS_DATA_SIZE);
+	ctx_info = (struct drm_i915_ts_node_ctx_id *)
+				(snapshot + mmio_size +	TS_DATA_SIZE);
 	ctx_info->ctx_id = node->ctx_id;
 	current_ptr = snapshot + snapshot_size;
 
@@ -1046,6 +1085,9 @@ static int init_gen_pmu_buffer(struct perf_event *event)
 	if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG)
 		node_size += TAG_INFO_SIZE;
 
+	if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_MMIO)
+		node_size += MMIO_DATA_SIZE;
+
 	/* size has to be aligned to 8 bytes */
 	node_size = ALIGN(node_size, 8);
 	dev_priv->gen_pmu.buffer.node_size = node_size;
@@ -1641,6 +1683,40 @@ err_size:
 	goto out;
 }
 
+
+static int check_mmio_whitelist(struct drm_i915_private *dev_priv,
+				struct drm_i915_gen_pmu_attr *gen_attr)
+{
+
+#define GEN_RANGE(l, h) GENMASK(h, l)
+	static const struct register_whitelist {
+		uint64_t offset;
+		uint32_t size;
+		/* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+		uint32_t gen_bitmask;
+	} whitelist[] = {
+		{ GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) },
+		{ GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) },
+	};
+	int i, count;
+
+	for (count = 0; count < I915_PMU_MMIO_NUM; count++) {
+		if (!gen_attr->mmio_list[count])
+			break;
+
+		for (i = 0; i < ARRAY_SIZE(whitelist); i++) {
+			if (whitelist[i].offset == gen_attr->mmio_list[count] &&
+			    (1 << INTEL_INFO(dev_priv)->gen &
+					whitelist[i].gen_bitmask))
+				break;
+		}
+
+		if (i == ARRAY_SIZE(whitelist))
+			return -EINVAL;
+	}
+	return 0;
+}
+
 static int i915_gen_event_init(struct perf_event *event)
 {
 	struct drm_i915_private *dev_priv =
@@ -1670,6 +1746,17 @@ static int i915_gen_event_init(struct perf_event *event)
 	if (gen_attr.sample_tag)
 		dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_TAG;
 
+	if (gen_attr.sample_mmio) {
+		ret = check_mmio_whitelist(dev_priv, &gen_attr);
+		if (ret)
+			return ret;
+
+		dev_priv->gen_pmu.sample_info_flags |=
+				I915_GEN_PMU_SAMPLE_MMIO;
+		memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list,
+				sizeof(dev_priv->gen_pmu.mmio_list));
+	}
+
 	/* To avoid the complexity of having to accurately filter
 	 * data and marshal to the appropriate client
 	 * we currently only allow exclusive access */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index db91098..4153cdf 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -81,7 +81,7 @@
 
 #define I915_OA_ATTR_SIZE_VER0		32  /* sizeof first published struct */
 
-#define I915_GEN_PMU_ATTR_SIZE_VER0	8  /* sizeof first published struct */
+#define I915_GEN_PMU_ATTR_SIZE_VER0	40  /* sizeof first published struct */
 
 typedef struct _drm_i915_oa_attr {
 	__u32 size;
@@ -105,7 +105,9 @@ struct drm_i915_gen_pmu_attr {
 	__u32 sample_ring:1,
 		sample_pid:1,
 		sample_tag:1,
-		__reserved_1:29;
+		sample_mmio:1,
+		__reserved_1:28;
+	__u32 mmio_list[8];
 };
 
 /* Header for PERF_RECORD_DEVICE type events */
@@ -155,6 +157,11 @@ struct drm_i915_ts_data {
 	__u32 ts_high;
 };
 
+struct drm_i915_mmio_data {
+#define I915_PMU_MMIO_NUM	8
+	__u32 mmio[I915_PMU_MMIO_NUM];
+};
+
 struct drm_i915_ts_node_ctx_id {
 	__u32 ctx_id;
 	__u32 pad;
-- 
1.8.5.1



More information about the Intel-gfx mailing list