[Intel-gfx] [PATCH 14/14] drm/i915: Support for capturing MMIO register values

Sagar Arun Kamble sagar.a.kamble at intel.com
Thu Sep 7 10:06:14 UTC 2017


From: Sourab Gupta <sourab.gupta at intel.com>

This patch adds support for capturing MMIO register values through
i915 perf interface.
The userspace can request upto 8 MMIO register values to be dumped.
The addresses of these registers can be passed through the corresponding
property 'value' field while opening the stream.
The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith other commands.

v2: Updated error return on copy_from_user failure. (Chris)

v3: Adjusted the patching of request ringbuffer contents w.r.t new of
allocating samples.

Testcase: igt/intel_perf_dapc/perf-mmio
Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  10 ++
 drivers/gpu/drm/i915/i915_gem_request.h |   3 +
 drivers/gpu/drm/i915/i915_perf.c        | 193 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |   3 +
 include/uapi/drm/i915_drm.h             |  14 +++
 5 files changed, 220 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d9f12a5..caf855c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2274,6 +2274,13 @@ struct i915_perf_cs_sample {
 	u32 ts_offset;
 
 	/**
+	 * @mmio_offset: Offset into ``&stream->cs_buffer`` where the mmio reg
+	 * values for this perf sample will be collected (if the stream is
+	 * configured for collection of mmio data)
+	 */
+	u32 mmio_offset;
+
+	/**
 	 * @size: buffer size corresponding to this perf sample
 	 */
 	u32 size;
@@ -2751,6 +2758,9 @@ struct drm_i915_private {
 			struct i915_oa_ops ops;
 			const struct i915_oa_format *oa_formats;
 		} oa;
+
+		u32 num_mmio;
+		u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
 	} perf;
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 691a0eb..60aee3f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -29,6 +29,7 @@
 
 #include "i915_gem.h"
 #include "i915_sw_fence.h"
+#include <drm/i915_drm.h>
 
 struct drm_file;
 struct drm_i915_gem_object;
@@ -202,6 +203,8 @@ struct drm_i915_gem_request {
 	u32 tag;
 	u32 *pre_ts_offset;
 	u32 *post_ts_offset;
+	u32 *pre_mmio_offset[I915_PERF_MMIO_NUM_MAX];
+	u32 *post_mmio_offset[I915_PERF_MMIO_NUM_MAX];
 };
 
 extern const struct dma_fence_ops i915_fence_ops;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 630af3e..35d18f9 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -304,6 +304,7 @@ struct i915_perf_sample_data {
 	u64 tag;
 	u64 ts;
 	const u8 *report;
+	const u8 *mmio;
 };
 
 /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
@@ -361,6 +362,7 @@ struct i915_perf_sample_data {
 #define SAMPLE_PID	      (1<<3)
 #define SAMPLE_TAG	      (1<<4)
 #define SAMPLE_TS	      (1<<5)
+#define SAMPLE_MMIO	      (1<<6)
 
 /**
  * struct perf_open_properties - for validated properties given to open a stream
@@ -584,6 +586,53 @@ static int i915_emit_ts_capture(struct drm_i915_gem_request *request,
 }
 
 /**
+ * i915_emit_mmio_capture - Insert the commands to capture mmio
+ * data into the GPU command stream
+ * @request: request in whose context the mmio data being collected.
+ * @preallocate: allocate space in ring for related sample.
+ */
+static int i915_emit_mmio_capture(struct drm_i915_gem_request *request,
+				  bool preallocate)
+{
+	struct drm_i915_private *dev_priv = request->i915;
+	struct intel_engine_cs *engine = request->engine;
+	int i, num_mmio = engine->num_mmio;
+	u32 cmd, len, *cs;
+
+	len = 4 * num_mmio;
+
+	if (preallocate)
+		request->reserved_space += len;
+	else
+		request->reserved_space -= len;
+
+	cs = intel_ring_begin(request, 4 * num_mmio);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (INTEL_GEN(dev_priv) >= 8)
+		cmd = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+	else
+		cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+
+	for (i = 0; i < num_mmio; i++) {
+		*cs++ = cmd;
+		*cs++ = engine->mmio_list[i];
+		if (preallocate)
+			request->pre_mmio_offset[i] = cs++;
+		else
+			request->post_mmio_offset[i] = cs++;
+
+		if (INTEL_GEN(dev_priv) >= 8)
+			*cs++ = 0;
+		else
+			*cs++ = MI_NOOP;
+	}
+	intel_ring_advance(request, cs);
+	return 0;
+}
+
+/**
  * i915_perf_stream_emit_sample_capture - Insert the commands to capture perf
  * metrics into the GPU command stream
  * @stream: Stream to which this request corresponds.
@@ -622,6 +671,12 @@ static void i915_perf_stream_emit_sample_capture(
 	if (stream->sample_flags & SAMPLE_TAG)
 		request->tag = tag;
 
+	if (stream->sample_flags & SAMPLE_MMIO) {
+		ret = i915_emit_mmio_capture(request, preallocate);
+		if (ret)
+			DRM_ERROR("Emit of MMIO capture commands failed\n");
+	}
+
 	reservation_object_lock(resv, NULL);
 	if (reservation_object_reserve_shared(resv) == 0)
 		reservation_object_add_shared_fence(resv, &request->fence);
@@ -738,6 +793,29 @@ static void i915_perf_stream_patch_sample_ts(struct i915_perf_stream *stream,
 	}
 }
 
+static void i915_perf_stream_patch_sample_mmio(struct i915_perf_stream *stream,
+					struct drm_i915_gem_request *request,
+					struct i915_perf_cs_sample *sample)
+{
+	int i, num_mmio = stream->engine->num_mmio;
+	u32 mmio_addr;
+
+	mmio_addr = stream->cs_buffer.vma->node.start + sample->mmio_offset;
+
+	switch (sample->id) {
+	case PRE_REQUEST_SAMPLE_ID:
+		for (i = 0; i < num_mmio; i++)
+			*request->pre_mmio_offset[i] = mmio_addr + 4*i;
+		break;
+	case POST_REQUEST_SAMPLE_ID:
+		for (i = 0; i < num_mmio; i++)
+			*request->post_mmio_offset[i] = mmio_addr + 4*i;
+		break;
+	default:
+		DRM_ERROR("Invalid sample being patched\n");
+	}
+}
+
 /**
  * i915_perf_stream_patch_request - Assign free sample. If none available,
  * remove one. Patch offset of the perf sample address with the one from
@@ -776,6 +854,9 @@ static void i915_perf_stream_patch_request(struct i915_perf_stream *stream,
 		else if (stream->sample_flags & SAMPLE_TS)
 			i915_perf_stream_patch_sample_ts(stream, request,
 							 sample);
+		if (stream->sample_flags & SAMPLE_MMIO)
+			i915_perf_stream_patch_sample_mmio(stream, request,
+							   sample);
 
 		spin_unlock_irqrestore(&stream->samples_lock, flags);
 		sample_id++;
@@ -1109,6 +1190,12 @@ static int append_perf_sample(struct i915_perf_stream *stream,
 		buf += I915_PERF_TS_SAMPLE_SIZE;
 	}
 
+	if (sample_flags & SAMPLE_MMIO) {
+		if (copy_to_user(buf, data->mmio, 4 * stream->engine->num_mmio))
+			return -EFAULT;
+		buf += 4 * stream->engine->num_mmio;
+	}
+
 	if (sample_flags & SAMPLE_OA_REPORT) {
 		if (copy_to_user(buf, data->report, report_size))
 			return -EFAULT;
@@ -1158,6 +1245,7 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 	u32 sample_flags = stream->sample_flags;
 	struct i915_perf_sample_data data = { 0 };
+	u32 mmio_list_dummy[I915_PERF_MMIO_NUM_MAX] = { 0 };
 
 	if (sample_flags & SAMPLE_OA_SOURCE)
 		data.source = I915_PERF_SAMPLE_OA_SOURCE_OABUFFER;
@@ -1175,6 +1263,10 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
 	if (sample_flags & SAMPLE_TS)
 		data.ts = get_gpu_ts_from_oa_report(dev_priv, report);
 
+	/* Periodic OA samples don't have mmio associated with them */
+	if (sample_flags & SAMPLE_MMIO)
+		data.mmio = (u8 *)mmio_list_dummy;
+
 	if (sample_flags & SAMPLE_OA_REPORT)
 		data.report = report;
 
@@ -1813,6 +1905,9 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
 		data.ts = gpu_ts;
 	}
 
+	if (sample_flags & SAMPLE_MMIO)
+		data.mmio = stream->cs_buffer.vaddr + node->mmio_offset;
+
 	return append_perf_sample(stream, buf, count, offset, &data);
 }
 
@@ -2443,6 +2538,9 @@ static int init_perf_samples(struct i915_perf_stream *stream)
 		sample_size += I915_PERF_TS_SAMPLE_SIZE;
 	}
 
+	if (stream->sample_flags & SAMPLE_MMIO)
+		sample_size += 4 * stream->engine->num_mmio;
+
 	while ((offset + sample_size) < stream->cs_buffer.vma->size) {
 		sample = kzalloc(sizeof(*sample), GFP_KERNEL);
 		if (sample == NULL) {
@@ -2464,6 +2562,12 @@ static int init_perf_samples(struct i915_perf_stream *stream)
 			offset = sample->ts_offset + I915_PERF_TS_SAMPLE_SIZE;
 		}
 
+		if (stream->sample_flags & SAMPLE_MMIO) {
+			sample->mmio_offset = offset;
+			offset = sample->mmio_offset +
+						4 * stream->engine->num_mmio;
+		}
+
 		list_add_tail(&sample->link, &stream->free_samples);
 	}
 
@@ -3062,9 +3166,12 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 	bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
 						      SAMPLE_OA_SOURCE);
 	bool cs_sample_data = props->sample_flags & (SAMPLE_OA_REPORT |
-						     SAMPLE_TS);
+						     SAMPLE_TS |
+						     SAMPLE_MMIO);
 	bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
-						      SAMPLE_TAG);
+						      SAMPLE_TAG |
+						      SAMPLE_MMIO);
+
 	struct i915_perf_stream *curr_stream;
 	struct intel_engine_cs *engine = NULL;
 	int ret;
@@ -3240,7 +3347,7 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 	}
 
 	if (require_cs_mode && !props->cs_mode) {
-		DRM_ERROR("PID/TAG/TS sampling requires engine "
+		DRM_ERROR("PID/TAG/TS/MMIO sampling requires engine "
 			  "to be specified");
 		ret = -EINVAL;
 		goto err_enable;
@@ -3294,6 +3401,17 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 		srcu_read_unlock(&engine->srcu, idx);
 
 		stream->engine = engine;
+
+		if (props->sample_flags & SAMPLE_MMIO) {
+			memset(engine->mmio_list, 0, I915_PERF_MMIO_NUM_MAX);
+			engine->num_mmio = dev_priv->perf.num_mmio;
+			memcpy(engine->mmio_list, dev_priv->perf.mmio_list,
+			       4 * engine->num_mmio);
+
+			stream->sample_flags |= SAMPLE_MMIO;
+			stream->sample_size += 4 * engine->num_mmio;
+		}
+
 		ret = alloc_cs_buffer(stream);
 		if (ret)
 			goto err_enable;
@@ -3860,6 +3978,69 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
 		       dev_priv->perf.oa.timestamp_frequency);
 }
 
+static int check_mmio_whitelist(struct drm_i915_private *dev_priv, u32 num_mmio)
+{
+#define GEN_RANGE(l, h) GENMASK(h, l)
+	static const struct register_whitelist {
+		i915_reg_t mmio;
+		uint32_t size;
+		/* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+		uint32_t gen_bitmask;
+	} whitelist[] = {
+		{ GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) },
+		{ GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) },
+	};
+	int i, count;
+
+	for (count = 0; count < num_mmio; count++) {
+		/* Coarse check on mmio reg addresses being non zero */
+		if (!dev_priv->perf.mmio_list[count])
+			return -EINVAL;
+
+		for (i = 0; i < ARRAY_SIZE(whitelist); i++) {
+			if ((i915_mmio_reg_offset(whitelist[i].mmio) ==
+				dev_priv->perf.mmio_list[count]) &&
+			    (1 << INTEL_INFO(dev_priv)->gen &
+					whitelist[i].gen_bitmask))
+				break;
+		}
+
+		if (i == ARRAY_SIZE(whitelist))
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int copy_mmio_list(struct drm_i915_private *dev_priv,
+				void __user *mmio)
+{
+	void __user *mmio_list = ((u8 __user *)mmio + 4);
+	u32 num_mmio;
+	int ret;
+
+	if (!mmio)
+		return -EINVAL;
+
+	ret = get_user(num_mmio, (u32 __user *)mmio);
+	if (ret)
+		return ret;
+
+	if (num_mmio > I915_PERF_MMIO_NUM_MAX)
+		return -EINVAL;
+
+	memset(dev_priv->perf.mmio_list, 0, I915_PERF_MMIO_NUM_MAX);
+	if (copy_from_user(dev_priv->perf.mmio_list, mmio_list, 4 * num_mmio))
+		return -EFAULT;
+
+	ret = check_mmio_whitelist(dev_priv, num_mmio);
+	if (ret)
+		return ret;
+
+	dev_priv->perf.num_mmio = num_mmio;
+
+	return 0;
+}
+
 /**
  * read_properties_unlocked - validate + copy userspace stream open properties
  * @dev_priv: i915 device instance
@@ -4012,6 +4193,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		case DRM_I915_PERF_PROP_SAMPLE_TS:
 			props->sample_flags |= SAMPLE_TS;
 			break;
+		case DRM_I915_PERF_PROP_SAMPLE_MMIO:
+			ret = copy_mmio_list(dev_priv, (u64 __user *)value);
+			if (ret)
+				return ret;
+			props->sample_flags |= SAMPLE_MMIO;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2fcfaf4..3ad429a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -449,6 +449,9 @@ struct intel_engine_cs {
 	 */
 	struct i915_perf_stream __rcu *exclusive_stream;
 	struct srcu_struct srcu;
+
+	u32 num_mmio;
+	u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
 };
 
 static inline unsigned int
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 257418b..76e2b17 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1393,6 +1393,12 @@ enum drm_i915_perf_sample_oa_source {
 	I915_PERF_SAMPLE_OA_SOURCE_MAX	/* non-ABI */
 };
 
+#define I915_PERF_MMIO_NUM_MAX	8
+struct drm_i915_perf_mmio_list {
+	__u32 num_mmio;
+	__u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
+};
+
 enum drm_i915_perf_property_id {
 	/**
 	 * Open the stream for a specific context handle (as used with
@@ -1465,6 +1471,13 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_SAMPLE_TS,
 
+	/**
+	 * This property requests inclusion of mmio register values in the perf
+	 * sample data. The value of this property specifies the address of user
+	 * struct having the register addresses.
+	 */
+	DRM_I915_PERF_PROP_SAMPLE_MMIO,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
@@ -1535,6 +1548,7 @@ enum drm_i915_perf_record_type {
 	 *     { u64 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID
 	 *     { u64 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
 	 *     { u64 timestamp; } && DRM_I915_PERF_PROP_SAMPLE_TS
+	 *     { u32 mmio[]; } && DRM_I915_PERF_PROP_SAMPLE_MMIO
 	 *     { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
 	 * };
 	 */
-- 
1.9.1



More information about the Intel-gfx mailing list