[Intel-gfx] [RFC 7/7] drm/i915: Add support for retrieving MMIO register values in Gen Perf PMU
sourab.gupta at intel.com
sourab.gupta at intel.com
Mon Jun 22 02:55:09 PDT 2015
From: Sourab Gupta <sourab.gupta at intel.com>
This patch adds support for retrieving MMIO register values through Gen Perf PMU
interface. Through this interface, now the userspace can request upto 8 MMIO
register values to be dumped, alongwith the timestamp values which were dumped
earlier across the batchbuffer boundaries.
Userspace can pass the addresses of upto 8 MMIO registers through perf attr
config. The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith commands to dump the timestamps.
Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 4 +-
drivers/gpu/drm/i915/i915_oa_perf.c | 119 ++++++++++++++++++++++++++++++++----
include/uapi/drm/i915_drm.h | 9 ++-
3 files changed, 117 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a0e1d17..1f86358 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1718,9 +1718,10 @@ struct drm_i915_ts_node_info {
struct drm_i915_gem_request *req;
};
-struct drm_i915_ts_node {
+struct drm_i915_ts_mmio_node {
/* ensure timestamp starts on a qword boundary */
struct drm_i915_ts_data timestamp;
+ __u32 mmio[8];
struct drm_i915_ts_node_info node_info;
};
#endif
@@ -2024,6 +2025,7 @@ struct drm_i915_private {
struct work_struct work_timer;
struct work_struct work_event_stop;
struct completion complete;
+ u32 mmio_list[8];
} gen_pmu;
struct list_head profile_cmd;
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index ed0bdc9..465e823 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -113,10 +113,10 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
dev_priv->gen_pmu.buffer.addr;
void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset;
int data_size = (queue_hdr->size_in_bytes - queue_hdr->data_offset);
- u32 node_offset, timestamp_offset, addr = 0;
- int ret;
+ u32 node_offset, timestamp_offset, mmio_offset, addr = 0;
+ int ret, i = 0;
- struct drm_i915_ts_node *nodes = data_ptr;
+ struct drm_i915_ts_mmio_node *nodes = data_ptr;
int num_nodes = 0;
int index = 0;
@@ -124,12 +124,14 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
index = queue_hdr->node_count % num_nodes;
timestamp_offset = offsetof(struct drm_i915_ts_data, ts_low);
+ mmio_offset =
+ offsetof(struct drm_i915_ts_mmio_node, mmio);
node_offset = i915_gem_obj_ggtt_offset(dev_priv->gen_pmu.buffer.obj) +
queue_hdr->data_offset +
- index * sizeof(struct drm_i915_ts_node);
+ index * sizeof(struct drm_i915_ts_mmio_node);
addr = node_offset +
- offsetof(struct drm_i915_ts_node, timestamp) +
+ offsetof(struct drm_i915_ts_mmio_node, timestamp) +
timestamp_offset;
if (ring->id == RCS) {
@@ -158,6 +160,27 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
intel_ring_emit(ring, 0); /* imm high, must be zero */
intel_ring_advance(ring);
}
+
+ for (i = 0; i < 8; i++) {
+ if (0 == dev_priv->gen_pmu.mmio_list[i])
+ break;
+
+ addr = node_offset + mmio_offset +
+ i * sizeof(dev_priv->gen_pmu.mmio_list[i]);
+
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return;
+
+ intel_ring_emit(ring,
+ MI_STORE_REGISTER_MEM(1) |
+ MI_SRM_LRM_GLOBAL_GTT);
+ intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]);
+ intel_ring_emit(ring, addr);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+ }
+
node_info = &nodes[index].node_info;
i915_gem_request_assign(&node_info->req,
ring->outstanding_lazy_request);
@@ -314,11 +337,11 @@ static void init_gen_pmu_buf_queue(struct drm_i915_private *dev_priv)
}
static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
- struct drm_i915_ts_node *node)
+ struct drm_i915_ts_mmio_node *node)
{
struct perf_sample_data data;
struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
- int snapshot_size = sizeof(struct drm_i915_ts_usernode);
+ int snapshot_size = sizeof(struct drm_i915_ts_mmio_usernode);
struct perf_raw_record raw;
perf_sample_data_init(&data, 0, event->hw.last_period);
@@ -338,11 +361,11 @@ void i915_gen_pmu_wait_gpu(struct drm_i915_private *dev_priv)
struct drm_i915_ts_queue_header *hdr =
(struct drm_i915_ts_queue_header *)
dev_priv->gen_pmu.buffer.addr;
- struct drm_i915_ts_node *first_node, *node;
+ struct drm_i915_ts_mmio_node *first_node, *node;
int head, tail, num_nodes, ret;
struct drm_i915_gem_request *req;
- first_node = (struct drm_i915_ts_node *)
+ first_node = (struct drm_i915_ts_mmio_node *)
((char *)hdr + hdr->data_offset);
num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
sizeof(*node);
@@ -375,14 +398,14 @@ void forward_gen_pmu_snapshots_work(struct work_struct *__work)
struct drm_i915_ts_queue_header *hdr =
(struct drm_i915_ts_queue_header *)
dev_priv->gen_pmu.buffer.addr;
- struct drm_i915_ts_node *first_node, *node;
+ struct drm_i915_ts_mmio_node *first_node, *node;
int head, tail, num_nodes, ret;
struct drm_i915_gem_request *req;
if (dev_priv->gen_pmu.event_active == false)
return;
- first_node = (struct drm_i915_ts_node *)
+ first_node = (struct drm_i915_ts_mmio_node *)
((char *)hdr + hdr->data_offset);
num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
sizeof(*node);
@@ -421,11 +444,11 @@ void i915_gen_pmu_stop_work_fn(struct work_struct *__work)
struct drm_i915_ts_queue_header *hdr =
(struct drm_i915_ts_queue_header *)
dev_priv->gen_pmu.buffer.addr;
- struct drm_i915_ts_node *first_node, *node;
+ struct drm_i915_ts_mmio_node *first_node, *node;
int head, tail, num_nodes, ret;
struct drm_i915_gem_request *req;
- first_node = (struct drm_i915_ts_node *)
+ first_node = (struct drm_i915_ts_mmio_node *)
((char *)hdr + hdr->data_offset);
num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
sizeof(*node);
@@ -1467,15 +1490,85 @@ static int i915_oa_event_event_idx(struct perf_event *event)
return 0;
}
+static int i915_gen_pmu_copy_attr(struct drm_i915_gen_pmu_attr __user *uattr,
+ struct drm_i915_gen_pmu_attr *attr)
+{
+ u32 size;
+ int ret;
+
+ if (!access_ok(VERIFY_WRITE, uattr, I915_GEN_PMU_ATTR_SIZE_VER0))
+ return -EFAULT;
+
+ /*
+ * zero the full structure, so that a short copy will be nice.
+ */
+ memset(attr, 0, sizeof(*attr));
+
+ ret = get_user(size, &uattr->size);
+ if (ret)
+ return ret;
+
+ if (size > PAGE_SIZE) /* silly large */
+ goto err_size;
+
+ if (size < I915_GEN_PMU_ATTR_SIZE_VER0)
+ goto err_size;
+
+ /*
+ * If we're handed a bigger struct than we know of,
+ * ensure all the unknown bits are 0 - i.e. new
+ * user-space does not rely on any kernel feature
+ * extensions we dont know about yet.
+ */
+ if (size > sizeof(*attr)) {
+ unsigned char __user *addr;
+ unsigned char __user *end;
+ unsigned char val;
+
+ addr = (void __user *)uattr + sizeof(*attr);
+ end = (void __user *)uattr + size;
+
+ for (; addr < end; addr++) {
+ ret = get_user(val, addr);
+ if (ret)
+ return ret;
+ if (val)
+ goto err_size;
+ }
+ size = sizeof(*attr);
+ }
+
+ ret = copy_from_user(attr, uattr, size);
+ if (ret)
+ return -EFAULT;
+
+out:
+ return ret;
+
+err_size:
+ put_user(sizeof(*attr), &uattr->size);
+ ret = -E2BIG;
+ goto out;
+}
+
static int i915_gen_event_init(struct perf_event *event)
{
struct drm_i915_private *dev_priv =
container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+ struct drm_i915_gen_pmu_attr gen_attr;
int ret = 0;
if (event->attr.type != event->pmu->type)
return -ENOENT;
+ ret = i915_gen_pmu_copy_attr(to_user_ptr(event->attr.config),
+ &gen_attr);
+ if (ret)
+ return ret;
+
+ memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list,
+ sizeof(dev_priv->gen_pmu.mmio_list));
+
/* To avoid the complexity of having to accurately filter
* data and marshal to the appropriate client
* we currently only allow exclusive access */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a7da421..8d4deec 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -80,6 +80,7 @@
#define I915_OA_METRICS_SET_MAX I915_OA_METRICS_SET_SAMPLER_BALANCE
#define I915_OA_ATTR_SIZE_VER0 32 /* sizeof first published struct */
+#define I915_GEN_PMU_ATTR_SIZE_VER0 36 /* sizeof first published struct */
typedef struct _drm_i915_oa_attr {
__u32 size;
@@ -97,6 +98,11 @@ typedef struct _drm_i915_oa_attr {
__reserved_2:31;
} drm_i915_oa_attr_t;
+struct drm_i915_gen_pmu_attr {
+ __u32 size;
+ __u32 mmio_list[8];
+};
+
/* Header for PERF_RECORD_DEVICE type events */
typedef struct _drm_i915_oa_event_header {
__u32 type;
@@ -143,9 +149,10 @@ struct drm_i915_ts_data {
__u32 ts_high;
};
-struct drm_i915_ts_usernode {
+struct drm_i915_ts_mmio_usernode {
/* ensure timestamp starts on a qword boundary */
struct drm_i915_ts_data timestamp;
+ __u32 mmio[8];
struct drm_i915_ts_node_footer node_info;
};
--
1.8.5.1
More information about the Intel-gfx
mailing list