[Intel-gfx] [RFC 6/8] drm/i915: Routines for inserting OA capture commands in the ringbuffer
sourab.gupta at intel.com
sourab.gupta at intel.com
Mon Jun 22 02:50:17 PDT 2015
From: Sourab Gupta <sourab.gupta at intel.com>
This patch introduces the routines which insert commands for capturing OA
snapshots, into the ringbuffer for RCS engine.
The command MI_REPORT_PERF_COUNT can be used to capture snapshots of OA
counters. The routines introduced in this patch can be called to insert these
commands at appropriate points during workload submission
Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
---
drivers/gpu/drm/i915/i915_dma.c | 1 +
drivers/gpu/drm/i915/i915_drv.h | 3 ++
drivers/gpu/drm/i915/i915_oa_perf.c | 86 +++++++++++++++++++++++++++++++++++++
3 files changed, 90 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0553f20..f12feaa 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -821,6 +821,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
/* Must at least be registered before trying to pin any context
* otherwise i915_oa_context_pin_notify() will lock an un-initialized
* spinlock, upsetting lockdep checks */
+ INIT_LIST_HEAD(&dev_priv->profile_cmd);
i915_oa_pmu_register(dev);
intel_pm_setup(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5453842..798da49 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1982,6 +1982,8 @@ struct drm_i915_private {
struct work_struct work_event_stop;
struct completion complete;
} oa_pmu;
+
+ struct list_head profile_cmd;
#endif
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -3162,6 +3164,7 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id);
#else
static inline void
i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index 5d63dab..b02850c 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -25,6 +25,76 @@ static int hsw_perf_format_sizes[] = {
64 /* C4_B8_HSW */
};
+struct drm_i915_insert_cmd {
+ struct list_head list;
+ void (*insert_cmd)(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+};
+
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+{
+ struct intel_engine_cs *ring = ringbuf->ring;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ struct drm_i915_insert_cmd *entry;
+
+ list_for_each_entry(entry, &dev_priv->profile_cmd, list)
+ entry->insert_cmd(ringbuf, ctx_id);
+}
+
+void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+{
+ struct intel_engine_cs *ring = ringbuf->ring;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ struct drm_i915_oa_async_node_info *node_info = NULL;
+ struct drm_i915_oa_async_queue_header *queue_hdr =
+ (struct drm_i915_oa_async_queue_header *)
+ dev_priv->oa_pmu.oa_async_buffer.addr;
+ void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset;
+ int data_size = (queue_hdr->size_in_bytes - queue_hdr->data_offset);
+ u32 data_offset, addr = 0;
+ int ret;
+
+ struct drm_i915_oa_async_node *nodes = data_ptr;
+ int num_nodes = 0;
+ int index = 0;
+
+ /* OA counters are only supported on the render ring */
+ if (ring->id != RCS)
+ return;
+
+ num_nodes = data_size / sizeof(*nodes);
+ index = queue_hdr->node_count % num_nodes;
+
+ data_offset = offsetof(struct drm_i915_oa_async_node, report_perf);
+
+ addr = i915_gem_obj_ggtt_offset(dev_priv->oa_pmu.oa_async_buffer.obj) +
+ queue_hdr->data_offset +
+ index * sizeof(struct drm_i915_oa_async_node) +
+ data_offset;
+
+ /* addr should be 64 byte aligned */
+ BUG_ON(addr & 0x3f);
+
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return;
+
+ intel_ring_emit(ring, MI_REPORT_PERF_COUNT | (1<<0));
+ intel_ring_emit(ring, addr | MI_REPORT_PERF_COUNT_GGTT);
+ intel_ring_emit(ring, ring->outstanding_lazy_request->seqno);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+
+ node_info = &nodes[index].node_info;
+ i915_gem_request_assign(&node_info->req,
+ ring->outstanding_lazy_request);
+
+ node_info->pid = current->pid;
+ node_info->ctx_id = ctx_id;
+ queue_hdr->node_count++;
+ if (queue_hdr->node_count > num_nodes)
+ queue_hdr->wrap_count++;
+}
+
static void init_oa_async_buf_queue(struct drm_i915_private *dev_priv)
{
struct drm_i915_oa_async_queue_header *hdr =
@@ -865,6 +935,7 @@ void i915_oa_async_stop_work_fn(struct work_struct *__work)
container_of(__work, typeof(*dev_priv),
oa_pmu.work_event_stop);
struct perf_event *event = dev_priv->oa_pmu.exclusive_event;
+ struct drm_i915_insert_cmd *entry, *next;
struct drm_i915_oa_async_queue_header *hdr =
(struct drm_i915_oa_async_queue_header *)
dev_priv->oa_pmu.oa_async_buffer.addr;
@@ -882,6 +953,13 @@ void i915_oa_async_stop_work_fn(struct work_struct *__work)
if (ret)
return;
+ list_for_each_entry_safe(entry, next, &dev_priv->profile_cmd, list) {
+ if (entry->insert_cmd == i915_oa_insert_cmd) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+
dev_priv->oa_pmu.event_active = false;
i915_oa_async_wait_gpu(dev_priv);
@@ -920,8 +998,14 @@ static void i915_oa_event_start(struct perf_event *event, int flags)
struct drm_i915_private *dev_priv =
container_of(event->pmu, typeof(*dev_priv), oa_pmu.pmu);
unsigned long lock_flags;
+ struct drm_i915_insert_cmd *entry;
u32 oastatus1, tail;
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ return;
+ entry->insert_cmd = i915_oa_insert_cmd;
+
if (dev_priv->oa_pmu.metrics_set == I915_OA_METRICS_SET_3D) {
config_oa_regs(dev_priv, i915_oa_3d_mux_config_hsw,
i915_oa_3d_mux_config_hsw_len);
@@ -976,6 +1060,8 @@ static void i915_oa_event_start(struct perf_event *event, int flags)
dev_priv->oa_pmu.event_active = true;
update_oacontrol(dev_priv);
+ list_add_tail(&entry->list, &dev_priv->profile_cmd);
+
/* Reset the head ptr to ensure we don't forward reports relating
* to a previous perf event */
oastatus1 = I915_READ(GEN7_OASTATUS1);
--
1.8.5.1
More information about the Intel-gfx
mailing list