[Intel-gfx] [RFC 5/8] drm/i915: Wait for GPU to finish before event stop, in async OA counter mode

Mon Jun 22 02:50:16 PDT 2015

From: Sourab Gupta <sourab.gupta at intel.com>

The mode of asynchronous OA counter snapshot collection would need insertion
of MI_REPORT_PERF_COUNT commands into the ringbuffer. Therefore, during the
stop event call, we need to wait for GPU to complete processing the last
request for which MI_RPC command was inserted. We need to ensure the processing
is completed before event_destroy callback which deallocates the buffer

Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  2 +
 drivers/gpu/drm/i915/i915_oa_perf.c | 95 ++++++++++++++++++++++++++++++-------
 2 files changed, 81 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d738f7a..5453842 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1979,6 +1979,8 @@ struct drm_i915_private {
 			u8 *snapshot;
 		} oa_async_buffer;
 		struct work_struct work_timer;
+		struct work_struct work_event_stop;
+		struct completion complete;
 	} oa_pmu;
 #endif
 
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index 3bf4c47..5d63dab 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -118,6 +118,9 @@ void forward_oa_async_snapshots_work(struct work_struct *__work)
 	int ret, head, tail, num_nodes;
 	struct drm_i915_gem_request *req;
 
+	if (dev_priv->oa_pmu.event_active == false)
+		return;
+
 	first_node = (struct drm_i915_oa_async_node *)
 			((char *)hdr + hdr->data_offset);
 	num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
@@ -298,6 +301,7 @@ static void flush_oa_snapshots(struct drm_i915_private *dev_priv,
 static void
 oa_async_buffer_destroy(struct drm_i915_private *i915)
 {
+	wait_for_completion(&i915->oa_pmu.complete);
 	mutex_lock(&i915->dev->struct_mutex);
 
 	vunmap(i915->oa_pmu.oa_async_buffer.addr);
@@ -854,6 +858,63 @@ static void config_oa_regs(struct drm_i915_private *dev_priv,
 	}
 }
 
+
+void i915_oa_async_stop_work_fn(struct work_struct *__work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(__work, typeof(*dev_priv),
+			oa_pmu.work_event_stop);
+	struct perf_event *event = dev_priv->oa_pmu.exclusive_event;
+	struct drm_i915_oa_async_queue_header *hdr =
+		(struct drm_i915_oa_async_queue_header *)
+		dev_priv->oa_pmu.oa_async_buffer.addr;
+	struct drm_i915_oa_async_node *first_node, *node;
+	struct drm_i915_gem_request *req;
+	int ret, head, tail, num_nodes;
+
+	first_node = (struct drm_i915_oa_async_node *)
+			((char *)hdr + hdr->data_offset);
+	num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
+			sizeof(*node);
+
+
+	ret = i915_mutex_lock_interruptible(dev_priv->dev);
+	if (ret)
+		return;
+
+	dev_priv->oa_pmu.event_active = false;
+
+	i915_oa_async_wait_gpu(dev_priv);
+
+	update_oacontrol(dev_priv);
+	mmiowb();
+
+	/* Ensure that all requests are completed*/
+	tail = hdr->node_count;
+	head = dev_priv->oa_pmu.oa_async_buffer.head;
+	while ((head % num_nodes) != (tail % num_nodes)) {
+		node = &first_node[head % num_nodes];
+		req = node->node_info.req;
+		if (req && !i915_gem_request_completed(req, true))
+			WARN_ON(1);
+		head++;
+	}
+
+	if (event->attr.sample_period) {
+		hrtimer_cancel(&dev_priv->oa_pmu.timer);
+		flush_oa_snapshots(dev_priv, false);
+	}
+	cancel_work_sync(&dev_priv->oa_pmu.work_timer);
+
+	dev_priv->oa_pmu.oa_async_buffer.tail = 0;
+	dev_priv->oa_pmu.oa_async_buffer.head = 0;
+
+	mutex_unlock(&dev_priv->dev->struct_mutex);
+
+	event->hw.state = PERF_HES_STOPPED;
+	complete(&dev_priv->oa_pmu.complete);
+}
+
 static void i915_oa_event_start(struct perf_event *event, int flags)
 {
 	struct drm_i915_private *dev_priv =
@@ -939,25 +1000,23 @@ static void i915_oa_event_stop(struct perf_event *event, int flags)
 		container_of(event->pmu, typeof(*dev_priv), oa_pmu.pmu);
 	unsigned long lock_flags;
 
-	spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
-
-	dev_priv->oa_pmu.event_active = false;
-	update_oacontrol(dev_priv);
-
-	mmiowb();
-	spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags);
+	if (dev_priv->oa_pmu.async_sample_mode)
+		schedule_work(&dev_priv->oa_pmu.work_event_stop);
+	else {
+		spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
+		dev_priv->oa_pmu.event_active = false;
+		update_oacontrol(dev_priv);
 
-	if (event->attr.sample_period) {
-		hrtimer_cancel(&dev_priv->oa_pmu.timer);
-		flush_oa_snapshots(dev_priv, false);
-	}
+		mmiowb();
+		spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags);
+		if (event->attr.sample_period) {
+			hrtimer_cancel(&dev_priv->oa_pmu.timer);
+			flush_oa_snapshots(dev_priv, false);
+		}
 
-	if (dev_priv->oa_pmu.async_sample_mode) {
-		dev_priv->oa_pmu.oa_async_buffer.tail = 0;
-		dev_priv->oa_pmu.oa_async_buffer.head = 0;
+		event->hw.state = PERF_HES_STOPPED;
 	}
 
-	event->hw.state = PERF_HES_STOPPED;
 }
 
 static int i915_oa_event_add(struct perf_event *event, int flags)
@@ -1092,6 +1151,8 @@ void i915_oa_pmu_register(struct drm_device *dev)
 	i915->oa_pmu.timer.function = hrtimer_sample;
 
 	INIT_WORK(&i915->oa_pmu.work_timer, forward_oa_async_snapshots_work);
+	INIT_WORK(&i915->oa_pmu.work_event_stop, i915_oa_async_stop_work_fn);
+	init_completion(&i915->oa_pmu.complete);
 
 	spin_lock_init(&i915->oa_pmu.lock);
 
@@ -1122,8 +1183,10 @@ void i915_oa_pmu_unregister(struct drm_device *dev)
 	if (i915->oa_pmu.pmu.event_init == NULL)
 		return;
 
-	if (i915->oa_pmu.async_sample_mode)
+	if (i915->oa_pmu.async_sample_mode) {
 		cancel_work_sync(&i915->oa_pmu.work_timer);
+		cancel_work_sync(&i915->oa_pmu.work_event_stop);
+	}
 
 	unregister_sysctl_table(i915->oa_pmu.sysctl_header);
 
-- 
1.8.5.1