[Intel-gfx] [RFC 6/8] drm/i915: Add support for forwarding pid in timestamp sample metadata through perf
sourab.gupta at intel.com
sourab.gupta at intel.com
Tue Aug 4 22:55:42 PDT 2015
From: Sourab Gupta <sourab.gupta at intel.com>
This patch introduces flags and adds support for having pid output with the
timestamp samples and forwarding them through perf.
When the userspace expresses its interest in listening to the pid through a
gen pmu attr field during event init, the samples generated would have an
additional field appended with the pid information.
Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_oa_perf.c | 19 ++++++++++++++++++-
include/uapi/drm/i915_drm.h | 8 +++++++-
3 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 70f1bd6..f46687a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1684,6 +1684,7 @@ struct i915_gen_pmu_node {
bool discard;
u32 ctx_id;
u32 ring;
+ u32 pid;
};
extern const struct i915_oa_reg i915_oa_3d_mux_config_hsw[];
@@ -2018,6 +2019,7 @@ struct drm_i915_private {
struct work_struct forward_work;
struct work_struct event_destroy_work;
#define I915_GEN_PMU_SAMPLE_RING (1<<0)
+#define I915_GEN_PMU_SAMPLE_PID (1<<1)
int sample_info_flags;
} gen_pmu;
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index 41e2407..f73d23c 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -14,6 +14,7 @@
#define TS_DATA_SIZE sizeof(struct drm_i915_ts_data)
#define CTX_INFO_SIZE sizeof(struct drm_i915_ts_node_ctx_id)
#define RING_INFO_SIZE sizeof(struct drm_i915_ts_node_ring_id)
+#define PID_INFO_SIZE sizeof(struct drm_i915_ts_node_pid)
static u32 i915_oa_event_paranoid = true;
@@ -145,6 +146,8 @@ static void i915_gen_emit_ts_data(struct drm_i915_gem_request *req,
entry->ctx_id = global_ctx_id;
if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_RING)
entry->ring = ring_id_mask(ring);
+ if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_PID)
+ entry->pid = current->pid;
i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
spin_lock(&dev_priv->gen_pmu.lock);
@@ -551,10 +554,11 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
u8 *snapshot, *current_ptr;
struct drm_i915_ts_node_ctx_id *ctx_info;
struct drm_i915_ts_node_ring_id *ring_info;
+ struct drm_i915_ts_node_pid *pid_info;
struct perf_raw_record raw;
BUILD_BUG_ON((TS_DATA_SIZE != 8) || (CTX_INFO_SIZE != 8) ||
- (RING_INFO_SIZE != 8));
+ (RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8));
snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;
@@ -570,6 +574,13 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
current_ptr = snapshot + snapshot_size;
}
+ if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_PID) {
+ pid_info = (struct drm_i915_ts_node_pid *)current_ptr;
+ pid_info->pid = node->pid;
+ snapshot_size += PID_INFO_SIZE;
+ current_ptr = snapshot + snapshot_size;
+ }
+
/* Note: the raw sample consists of a u32 size member and raw data. The
* combined size of these two fields is required to be 8 byte aligned.
* The size of raw data field is assumed to be 8 byte aligned already.
@@ -1017,6 +1028,9 @@ static int init_gen_pmu_buffer(struct perf_event *event)
if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_RING)
node_size += RING_INFO_SIZE;
+ if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_PID)
+ node_size += PID_INFO_SIZE;
+
/* size has to be aligned to 8 bytes */
node_size = ALIGN(node_size, 8);
dev_priv->gen_pmu.buffer.node_size = node_size;
@@ -1635,6 +1649,9 @@ static int i915_gen_event_init(struct perf_event *event)
dev_priv->gen_pmu.sample_info_flags |=
I915_GEN_PMU_SAMPLE_RING;
+ if (gen_attr.sample_pid)
+ dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_PID;
+
/* To avoid the complexity of having to accurately filter
* data and marshal to the appropriate client
* we currently only allow exclusive access */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5b484fb..3dcc862 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -103,7 +103,8 @@ typedef struct _drm_i915_oa_attr {
struct drm_i915_gen_pmu_attr {
__u32 size;
__u32 sample_ring:1,
- __reserved_1:31;
+ sample_pid:1,
+ __reserved_1:30;
};
/* Header for PERF_RECORD_DEVICE type events */
@@ -163,6 +164,11 @@ struct drm_i915_ts_node_ring_id {
__u32 pad;
};
+struct drm_i915_ts_node_pid {
+ __u32 pid;
+ __u32 pad;
+};
+
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
--
1.8.5.1
More information about the Intel-gfx
mailing list