[Intel-gfx] [PATCH 15/15] drm/i915: Support for capturing MMIO register values
sourab.gupta at intel.com
sourab.gupta at intel.com
Fri Nov 4 09:30:44 UTC 2016
From: Sourab Gupta <sourab.gupta at intel.com>
This patch adds support for capturing MMIO register values through
i915 perf interface.
The userspace can request upto 8 MMIO register values to be dumped.
The addresses of these registers can be passed through the corresponding
property 'value' field while opening the stream.
The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith other commands.
Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 4 +
drivers/gpu/drm/i915/i915_perf.c | 153 ++++++++++++++++++++++++++++++++++++++-
include/uapi/drm/i915_drm.h | 14 ++++
3 files changed, 168 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 557a124..14cd9cf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1885,6 +1885,7 @@ struct i915_perf_cs_data_node {
u32 start_offset;
u32 oa_offset;
u32 ts_offset;
+ u32 mmio_offset;
/* buffer size corresponding to this entry */
u32 size;
@@ -2230,6 +2231,9 @@ struct drm_i915_private {
wait_queue_head_t poll_wq[I915_NUM_ENGINES];
atomic_t pollin[I915_NUM_ENGINES];
+ u32 num_mmio;
+ u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
+
struct {
u32 specific_ctx_id;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b11e953..ed6b31f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -277,6 +277,7 @@ struct sample_data {
u64 gpu_ts;
u64 clk_monoraw;
const u8 *report;
+ const u8 *mmio;
};
/* for sysctl proc_dointvec_minmax of i915_oa_min_timer_exponent */
@@ -335,6 +336,7 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
#define SAMPLE_TAG (1<<4)
#define SAMPLE_TS (1<<5)
#define SAMPLE_CLK_MONO_RAW (1<<6)
+#define SAMPLE_MMIO (1<<7)
struct perf_open_properties {
u32 sample_flags;
@@ -567,6 +569,9 @@ static int insert_perf_entry(struct drm_i915_private *dev_priv,
sample_ts = true;
}
+ if (sample_flags & SAMPLE_MMIO)
+ entry_size += 4*dev_priv->perf.num_mmio;
+
spin_lock(&dev_priv->perf.node_list_lock[id]);
if (list_empty(&dev_priv->perf.node_list[id])) {
offset = 0;
@@ -644,6 +649,10 @@ out:
entry->ts_offset = ALIGN(entry->ts_offset, TS_ADDR_ALIGN);
offset = entry->ts_offset + I915_PERF_TS_SAMPLE_SIZE;
}
+ if (sample_flags & SAMPLE_MMIO) {
+ entry->mmio_offset = offset;
+ offset = entry->mmio_offset + 4*dev_priv->perf.num_mmio;
+ }
list_add_tail(&entry->link, &dev_priv->perf.node_list[id]);
#ifndef CMD_STREAM_BUF_OVERFLOW_ALLOWED
@@ -744,6 +753,47 @@ static int i915_ring_stream_capture_ts(struct drm_i915_gem_request *req,
return 0;
}
+static int i915_ring_stream_capture_mmio(struct drm_i915_gem_request *req,
+ u32 offset)
+{
+ struct drm_i915_private *dev_priv = req->i915;
+ enum intel_engine_id id = req->engine->id;
+ struct intel_ring *ring = req->ring;
+ int num_mmio = dev_priv->perf.num_mmio;
+ u32 mmio_addr, addr = 0;
+ int ret, i;
+
+ ret = intel_ring_begin(req, 4*num_mmio);
+ if (ret)
+ return ret;
+
+ mmio_addr =
+ dev_priv->perf.command_stream_buf[id].vma->node.start + offset;
+
+ for (i = 0; i < num_mmio; i++) {
+ uint32_t cmd;
+
+ addr = mmio_addr + 4*i;
+
+ if (INTEL_INFO(dev_priv)->gen >= 8)
+ cmd = MI_STORE_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT;
+ else
+ cmd = MI_STORE_REGISTER_MEM |
+ MI_SRM_LRM_GLOBAL_GTT;
+
+ intel_ring_emit(ring, cmd);
+ intel_ring_emit(ring, dev_priv->perf.mmio_list[i]);
+ intel_ring_emit(ring, addr);
+ if (INTEL_INFO(dev_priv)->gen >= 8)
+ intel_ring_emit(ring, 0);
+ else
+ intel_ring_emit(ring, MI_NOOP);
+ }
+ intel_ring_advance(ring);
+ return 0;
+}
+
static void i915_ring_stream_cs_hook(struct i915_perf_stream *stream,
struct drm_i915_gem_request *req, u32 tag)
{
@@ -784,6 +834,12 @@ static void i915_ring_stream_cs_hook(struct i915_perf_stream *stream,
goto err_unref;
}
+ if (sample_flags & SAMPLE_MMIO) {
+ ret = i915_ring_stream_capture_mmio(req,
+ entry->mmio_offset);
+ if (ret)
+ goto err_unref;
+ }
i915_vma_move_to_active(dev_priv->perf.command_stream_buf[id].vma, req,
EXEC_OBJECT_WRITE);
@@ -1045,6 +1101,12 @@ static int append_sample(struct i915_perf_stream *stream,
buf += I915_PERF_TS_SAMPLE_SIZE;
}
+ if (sample_flags & SAMPLE_MMIO) {
+ if (copy_to_user(buf, data->mmio, 4*dev_priv->perf.num_mmio))
+ return -EFAULT;
+ buf += 4*dev_priv->perf.num_mmio;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, data->report, report_size))
return -EFAULT;
@@ -1102,6 +1164,7 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
struct drm_i915_private *dev_priv = stream->dev_priv;
u32 sample_flags = stream->sample_flags;
struct sample_data data = { 0 };
+ u32 mmio_list_dummy[I915_PERF_MMIO_NUM_MAX] = { 0 };
if (sample_flags & SAMPLE_OA_SOURCE_INFO) {
enum drm_i915_perf_oa_event_source source;
@@ -1142,6 +1205,10 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
data.clk_monoraw = get_clk_monoraw_from_gpu_ts(stream, gpu_ts);
}
+ /* Periodic OA samples don't have mmio associated with them */
+ if (sample_flags & SAMPLE_MMIO)
+ data.mmio = (u8 *)mmio_list_dummy;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1696,6 +1763,10 @@ static int append_one_cs_sample(struct i915_perf_stream *stream,
stream, gpu_ts);
}
+ if (sample_flags & SAMPLE_MMIO)
+ data.mmio = dev_priv->perf.command_stream_buf[id].addr +
+ node->mmio_offset;
+
return append_sample(stream, buf, count, offset, &data);
}
@@ -2564,10 +2635,12 @@ static int i915_ring_stream_init(struct i915_perf_stream *stream,
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_OA_SOURCE_INFO);
bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
- SAMPLE_TAG);
+ SAMPLE_TAG |
+ SAMPLE_MMIO);
bool cs_sample_data = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_TS |
- SAMPLE_CLK_MONO_RAW);
+ SAMPLE_CLK_MONO_RAW |
+ SAMPLE_MMIO);
int ret;
if ((props->sample_flags & SAMPLE_CTX_ID) && !props->cs_mode) {
@@ -2744,7 +2817,7 @@ static int i915_ring_stream_init(struct i915_perf_stream *stream,
if (require_cs_mode && !props->cs_mode) {
DRM_ERROR(
- "PID, TAG or TS sampling require a ring to be specified");
+ "PID, TAG, TS or MMIO sampling require a ring to be specified");
ret = -EINVAL;
goto cs_error;
}
@@ -2792,6 +2865,11 @@ static int i915_ring_stream_init(struct i915_perf_stream *stream,
stream->sample_size += 4;
}
+ if (props->sample_flags & SAMPLE_MMIO) {
+ stream->sample_flags |= SAMPLE_MMIO;
+ stream->sample_size += 4 * dev_priv->perf.num_mmio;
+ }
+
ret = alloc_command_stream_buf(dev_priv, stream->engine);
if (ret)
goto cs_error;
@@ -3349,6 +3427,69 @@ err:
return ret;
}
+static int check_mmio_whitelist(struct drm_i915_private *dev_priv, u32 num_mmio)
+{
+#define GEN_RANGE(l, h) GENMASK(h, l)
+ static const struct register_whitelist {
+ i915_reg_t mmio;
+ uint32_t size;
+ /* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+ uint32_t gen_bitmask;
+ } whitelist[] = {
+ { GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) },
+ { GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) },
+ };
+ int i, count;
+
+ for (count = 0; count < num_mmio; count++) {
+ /* Coarse check on mmio reg addresses being non zero */
+ if (!dev_priv->perf.mmio_list[count])
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(whitelist); i++) {
+ if (( i915_mmio_reg_offset(whitelist[i].mmio) ==
+ dev_priv->perf.mmio_list[count]) &&
+ (1 << INTEL_INFO(dev_priv)->gen &
+ whitelist[i].gen_bitmask))
+ break;
+ }
+
+ if (i == ARRAY_SIZE(whitelist))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int copy_mmio_list(struct drm_i915_private *dev_priv,
+ void __user *mmio)
+{
+ void __user *mmio_list = ((u8 __user *)mmio + 4);
+ u32 num_mmio;
+ int ret;
+
+ if (!mmio)
+ return -EINVAL;
+
+ ret = get_user(num_mmio, (u32 __user *)mmio);
+ if (ret)
+ return ret;
+
+ if (num_mmio > I915_PERF_MMIO_NUM_MAX)
+ return -EINVAL;
+
+ memset(dev_priv->perf.mmio_list, 0, I915_PERF_MMIO_NUM_MAX);
+ if (copy_from_user(dev_priv->perf.mmio_list, mmio_list, 4*num_mmio))
+ return -EINVAL;
+
+ ret = check_mmio_whitelist(dev_priv, num_mmio);
+ if (ret)
+ return ret;
+
+ dev_priv->perf.num_mmio = num_mmio;
+
+ return 0;
+}
+
/* Note we copy the properties from userspace outside of the i915 perf
* mutex to avoid an awkward lockdep with mmap_sem.
*
@@ -3469,6 +3610,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_PROP_SAMPLE_CLOCK_MONOTONIC_RAW:
props->sample_flags |= SAMPLE_CLK_MONO_RAW;
break;
+ case DRM_I915_PERF_PROP_SAMPLE_MMIO:
+ ret = copy_mmio_list(dev_priv, (u64 __user *)value);
+ if (ret)
+ return ret;
+ props->sample_flags |= SAMPLE_MMIO;
+ break;
case DRM_I915_PERF_PROP_MAX:
BUG();
}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0108284..ef5d737 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1258,6 +1258,12 @@ enum drm_i915_perf_oa_event_source {
I915_PERF_OA_EVENT_SOURCE_MAX /* non-ABI */
};
+#define I915_PERF_MMIO_NUM_MAX 8
+struct drm_i915_perf_mmio_list {
+ __u32 num_mmio;
+ __u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
+};
+
enum drm_i915_perf_property_id {
/**
* Open the stream for a specific context handle (as used with
@@ -1336,6 +1342,13 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_SAMPLE_CLOCK_MONOTONIC_RAW,
+ /**
+ * This property requests inclusion of mmio register values in the perf
+ * sample data. The value of this property specifies the address of user
+ * struct having the register addresses.
+ */
+ DRM_I915_PERF_PROP_SAMPLE_MMIO,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1406,6 +1419,7 @@ enum drm_i915_perf_record_type {
* { u32 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
* { u64 gpu_ts; } && DRM_I915_PERF_PROP_SAMPLE_TS
* { u64 clk_mono; } && DRM_I915_PERF_PROP_SAMPLE_CLOCK_MONOTONIC
+ * { u32 mmio[]; } && DRM_I915_PERF_PROP_SAMPLE_MMIO
* { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
* };
*/
--
1.9.1
More information about the Intel-gfx
mailing list