[Intel-gfx] [PATCH] drm/i915: Tracing infrastructure

Chris Wilson chris at chris-wilson.co.uk
Wed Jul 8 20:35:11 CEST 2009


By adding tracepoints for WATCH_BUF/EXEC we are able to monitor the
lifetime of objects and requests. These events can be relayed to userspace
via the per-device i915_trace channel in debugfs and investigated for
detailed performance analysis.

Overhead:
without          poppler-alt-20090608 1265.996 1293.744   1.04%    3/3
disabled         poppler-alt-20090608 1264.857 1284.478   0.75%    3/3
enabled          poppler-alt-20090608 1757.622 1786.996   0.79%    3/3

(Not a fully representative set of benchmarks - limited to the one that
will run on tiny using this kernel without the shrinker patches.)
As can be seen from above the effect of compiling in the tracepoint,
without enabling them, is negligible.

Or they can be simply monitored by the perf tool (or via systemtap etc).
For example, we can record the complate stack for anything that triggers
a CPU stall (i.e. a call that waits on a request to complete) with:

$ CAIRO_TEST_TARGET=drm perf record -e tp=i915/i915_gem_request_wait_begin \
  -c 1 -g -f \ ./cairo-perf-trace
[ # ]  backend                         test   min(s) median(s) stddev.  count
[  0]      drm             poppler-20090608  566.724  569.368   3.07%   3/3
$ perf report --callchain fractal,5
    97.23%   lt-cairo-perf-t  [vdso]
                |
                 --99.09%-- _i915_glyph_cache_add_glyph
                            _i915_surface_show_glyphs
                            _cairo_surface_show_text_glyphs
                            _cairo_gstate_show_text_glyphs
                            cairo_show_glyphs
                            _show_glyphs
                            0xb7efa87e
                            0xb7f06a99
                            _csi_scan_file
                            0xb7efa89f
                            cairo_script_interpreter_run
                            cairo_perf_trace
                            cairo_perf_trace_dir
                            main
                            __libc_start_main
                            0x804acd1

     2.77%   lt-cairo-perf-t  /lib/tls/i686/cmov/libc-2.9.so

which tells me:

1. Stalls are now very infrequent, just 795 i915_wait_request() during
that profile (due to no longer having to wait for fences)
2. The main culprit is having to wait for the glyph cache, and maybe I
should investigate the cache efficacy more closely.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/Kconfig           |    9 +
 drivers/gpu/drm/i915/Makefile     |    7 +-
 drivers/gpu/drm/i915/i915_dma.c   |    3 +
 drivers/gpu/drm/i915/i915_drv.h   |   14 +-
 drivers/gpu/drm/i915/i915_gem.c   |   66 ++++-
 drivers/gpu/drm/i915/i915_irq.c   |   10 +-
 drivers/gpu/drm/i915/i915_trace.c |  511 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_trace.h |  264 +++++++++++++++++++
 include/drm/i915_drm.h            |   35 +++
 9 files changed, 899 insertions(+), 20 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_trace.c
 create mode 100644 drivers/gpu/drm/i915/i915_trace.h

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 39b393d..1a138d6 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -111,6 +111,15 @@ config DRM_I915_KMS
 	  the driver to bind to PCI devices, which precludes loading things
 	  like intelfb.
 
+config DRM_I915_TRACE
+	bool "i915 trace"
+	depends on DRM_I915
+	select GENERIC_TRACER
+	help
+	  Choose this option if you want to enable event tracing in the
+	  i915 driver. This is used to identify performance problems
+	  within the driver and applications. If unsure, say N.
+
 endchoice
 
 config DRM_MGA
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1142be2..a89f764 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -29,7 +29,8 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
 	  dvo_tfp410.o \
 	  dvo_sil164.o
 
-i915-$(CONFIG_ACPI)	+= i915_opregion.o
-i915-$(CONFIG_COMPAT)   += i915_ioc32.o
+i915-$(CONFIG_ACPI)           += i915_opregion.o
+i915-$(CONFIG_COMPAT)         += i915_ioc32.o
+i915-$(CONFIG_DRM_I915_TRACE) += i915_trace.o
 
-obj-$(CONFIG_DRM_I915)  += i915.o
+obj-$(CONFIG_DRM_I915) += i915.o
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 1ba716b..389ba92 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -32,6 +32,7 @@
 #include "intel_drv.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include "i915_trace.h"
 
 #define I915_DRV	"i915_drv"
 
@@ -1246,6 +1247,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 		intel_opregion_init(dev, 0);
 
 	i915_pmu_init(dev);
+	i915_trace_init(dev);
 
 	return 0;
 
@@ -1263,6 +1265,7 @@ int i915_driver_unload(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	i915_pmu_uninit(dev);
+	i915_trace_cleanup(dev);
 
 	io_mapping_free(dev_priv->mm.gtt_mapping);
 	if (dev_priv->mm.gtt_mtrr >= 0) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f3eb646..1e49f81 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -149,6 +149,8 @@ struct drm_i915_error_state {
 	struct timeval time;
 };
 
+struct i915_trace;
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 
@@ -438,6 +440,7 @@ typedef struct drm_i915_private {
 	struct sdvo_device_mapping sdvo_mappings[2];
 
 	struct list_head samplers;
+	struct i915_trace *trace;
 } drm_i915_private_t;
 
 /** driver private structure attached to each drm_gem_object */
@@ -735,9 +738,14 @@ extern int i915_restore_state(struct drm_device *dev);
 extern int i915_pmu_init(struct drm_device *dev);
 extern void i915_pmu_uninit(struct drm_device *dev);
 
-/* i915_suspend.c */
-extern int i915_save_state(struct drm_device *dev);
-extern int i915_restore_state(struct drm_device *dev);
+#if CONFIG_DRM_I915_TRACE
+/* i915_trace.c */
+extern int i915_trace_init(struct drm_device *dev);
+extern void i915_trace_cleanup(struct drm_device *dev);
+#else
+static inline int i915_trace_init(struct drm_device *dev) { return 0 }
+static inline void i915_trace_cleanup(struct drm_device *dev) { }
+#endif
 
 #ifdef CONFIG_ACPI
 /* i915_opregion.c */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 876b65c..f0237f9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,6 +29,7 @@
 #include "drm.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include "i915_trace.h"
 #include <linux/swap.h>
 #include <linux/pci.h>
 
@@ -1539,6 +1540,8 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
 			    obj->write_domain) {
 				obj->write_domain = 0;
 				i915_gem_object_move_to_active(obj, seqno);
+
+				trace_i915_gem_object_change_domain(obj);
 			}
 		}
 
@@ -1583,6 +1586,8 @@ i915_gem_retire_request(struct drm_device *dev,
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
+	trace_i915_gem_request_retire(dev, request->seqno);
+
 	/* Move any buffers on the active list that are no longer referenced
 	 * by the ringbuffer to the flushing/inactive lists as appropriate.
 	 */
@@ -1723,6 +1728,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
 			i915_driver_irq_postinstall(dev);
 		}
 
+		trace_i915_gem_request_wait_begin(dev, seqno);
+
 		dev_priv->mm.waiting_gem_seqno = seqno;
 		i915_user_irq_get(dev);
 		ret = wait_event_interruptible(dev_priv->irq_queue,
@@ -1731,6 +1738,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
 					       dev_priv->mm.wedged);
 		i915_user_irq_put(dev);
 		dev_priv->mm.waiting_gem_seqno = 0;
+
+		trace_i915_gem_request_wait_end(dev, seqno);
 	}
 	if (dev_priv->mm.wedged)
 		ret = -EIO;
@@ -1763,6 +1772,8 @@ i915_gem_flush(struct drm_device *dev,
 	DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
 		  invalidate_domains, flush_domains);
 #endif
+	trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno,
+				     invalidate_domains, flush_domains);
 
 	if (flush_domains & I915_GEM_DOMAIN_CPU)
 		drm_agp_chipset_flush(dev);
@@ -1919,6 +1930,8 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 	if (!list_empty(&obj_priv->list))
 		list_del_init(&obj_priv->list);
 
+	trace_i915_gem_object_unbind(obj);
+
 	return 0;
 }
 
@@ -2291,6 +2304,8 @@ try_again:
 	else
 		i830_write_fence_reg(reg);
 
+	trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode);
+
 	return 0;
 }
 
@@ -2458,6 +2473,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 	BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
 	BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
 
+	trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
+
 	return 0;
 }
 
@@ -2501,6 +2518,8 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
 	seqno = i915_add_request(dev, NULL, obj->write_domain);
 	obj->write_domain = 0;
 	i915_gem_object_move_to_active(obj, seqno);
+
+	trace_i915_gem_object_change_domain(obj);
 }
 
 /** Flushes the GTT write domain for the object if it's dirty. */
@@ -2515,6 +2534,8 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
 	 * no chipset flush.  It also doesn't land in render cache.
 	 */
 	obj->write_domain = 0;
+
+	trace_i915_gem_object_change_domain(obj);
 }
 
 /** Flushes the CPU write domain for the object if it's dirty. */
@@ -2529,6 +2550,8 @@ i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
 	i915_gem_clflush_object(obj);
 	drm_agp_chipset_flush(dev);
 	obj->write_domain = 0;
+
+	trace_i915_gem_object_change_domain(obj);
 }
 
 /**
@@ -2571,6 +2594,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
 		obj_priv->dirty = 1;
 	}
 
+	trace_i915_gem_object_change_domain(obj);
+
 	return 0;
 }
 
@@ -2618,6 +2643,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
 		obj->write_domain = I915_GEM_DOMAIN_CPU;
 	}
 
+	trace_i915_gem_object_change_domain(obj);
+
 	return 0;
 }
 
@@ -2801,6 +2828,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
 		 obj->read_domains, obj->write_domain,
 		 dev->invalidate_domains, dev->flush_domains);
 #endif
+
+	trace_i915_gem_object_change_domain(obj);
 }
 
 /**
@@ -2901,6 +2930,8 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
 
 	obj->read_domains |= I915_GEM_DOMAIN_CPU;
 
+	trace_i915_gem_object_change_domain(obj);
+
 	return 0;
 }
 
@@ -3086,6 +3117,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
 	exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
 	exec_len = (uint32_t) exec->batch_len;
 
+	trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno);
+
 	count = nbox ? nbox : 1;
 
 	for (i = 0; i < count; i++) {
@@ -3451,6 +3484,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 		struct drm_gem_object *obj = object_list[i];
 
 		obj->write_domain = obj->pending_write_domain;
+		trace_i915_gem_object_change_domain(obj);
 	}
 
 	i915_verify_inactive(dev, __FILE__, __LINE__);
@@ -3789,6 +3823,8 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj_priv->list);
 
+	trace_i915_gem_object_create(obj);
+
 	return 0;
 }
 
@@ -3797,6 +3833,8 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 	struct drm_device *dev = obj->dev;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
+	trace_i915_gem_object_destroy(obj);
+
 	while (obj_priv->pin_count > 0)
 		i915_gem_object_unpin(obj);
 
@@ -3923,24 +3961,28 @@ i915_gem_idle(struct drm_device *dev)
 	 * the GPU domains and just stuff them onto inactive.
 	 */
 	while (!list_empty(&dev_priv->mm.active_list)) {
-		struct drm_i915_gem_object *obj_priv;
+		struct drm_gem_object *obj;
 
-		obj_priv = list_first_entry(&dev_priv->mm.active_list,
-					    struct drm_i915_gem_object,
-					    list);
-		obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
-		i915_gem_object_move_to_inactive(obj_priv->obj);
+		obj = list_first_entry(&dev_priv->mm.active_list,
+				       struct drm_i915_gem_object,
+				       list)->obj;
+		obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+		i915_gem_object_move_to_inactive(obj);
+
+		trace_i915_gem_object_change_domain(obj);
 	}
 	spin_unlock(&dev_priv->mm.active_list_lock);
 
 	while (!list_empty(&dev_priv->mm.flushing_list)) {
-		struct drm_i915_gem_object *obj_priv;
+		struct drm_gem_object *obj;
 
-		obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
-					    struct drm_i915_gem_object,
-					    list);
-		obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
-		i915_gem_object_move_to_inactive(obj_priv->obj);
+		obj = list_first_entry(&dev_priv->mm.flushing_list,
+				       struct drm_i915_gem_object,
+				       list)->obj;
+		obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+		i915_gem_object_move_to_inactive(obj);
+
+		trace_i915_gem_object_change_domain(obj);
 	}
 
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7ba23a6..95b95d3 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -26,11 +26,13 @@
  *
  */
 
+#include <linux/errno.h>
 #include <linux/sysrq.h>
 #include "drmP.h"
 #include "drm.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include "i915_trace.h"
 #include "intel_drv.h"
 
 #define MAX_NOPID ((u32)~0)
@@ -279,7 +281,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev)
 		}
 
 		if (gt_iir & GT_USER_INTERRUPT) {
-			dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+			u32 seqno = i915_get_gem_seqno(dev);
+			dev_priv->mm.irq_gem_seqno = seqno;
+			trace_i915_gem_request_complete(dev, seqno);
 			DRM_WAKEUP(&dev_priv->irq_queue);
 		}
 
@@ -494,7 +498,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 		}
 
 		if (iir & I915_USER_INTERRUPT) {
-			dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+			u32 seqno = i915_get_gem_seqno(dev);
+			dev_priv->mm.irq_gem_seqno = seqno;
+			trace_i915_gem_request_complete(dev, seqno);
 			DRM_WAKEUP(&dev_priv->irq_queue);
 		}
 
diff --git a/drivers/gpu/drm/i915/i915_trace.c b/drivers/gpu/drm/i915/i915_trace.c
new file mode 100644
index 0000000..aeb2960
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright © 2009 Chris Wilson
+ *
+ * Tracing infrastructure for i915 performance monitoring
+ *
+ * See intel-gpu-tools/trace
+ */
+
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/ring_buffer.h>
+
+#include "drm/drmP.h"
+#include "drm/i915_drm.h"
+#include "i915_drv.h"
+
+#define CREATE_TRACE_POINTS
+#include "i915_trace.h"
+
+#define RING_BUFFER_SIZE (16*4096)
+
+struct i915_trace {
+	struct drm_device *dev;
+	struct dentry *dentry;
+	struct ring_buffer *ring_buffer;
+
+	unsigned long flags;
+	wait_queue_head_t wait;
+};
+
+enum {
+	TRACE_ACTIVE_FLAG,
+	TRACE_PROBED_FLAG,
+	TRACE_IRQ_FLAG
+};
+
+static void
+i915_trace_header(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_trace *trace = dev_priv->trace;
+	struct i915_trace_event event;
+
+	event.time  = ktime_to_ns(ktime_get());
+	event.minor = dev->primary->index;
+	event.seqno = I915_TRACE_MAGIC;
+	event.id    = I915_TRACE_HEADER;
+	event.arg1  = I915_TRACE_VERSION;
+	event.arg2  = 0;
+
+	ring_buffer_write(trace->ring_buffer, sizeof(event), &event);
+}
+
+/* objects */
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_object_create);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_object_bind);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_object_change_domain);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_object_get_fence);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_object_unbind);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_object_destroy);
+
+static void
+i915_trace_object(struct drm_device *dev,
+		  int event_id,
+		  struct drm_gem_object *obj,
+		  int arg1,
+		  int arg2)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_trace *trace = dev_priv->trace;
+	struct i915_trace_event event;
+
+	if (!test_bit(TRACE_ACTIVE_FLAG, &trace->flags))
+		return;
+
+	event.time = ktime_to_ns(ktime_get());
+	event.minor = dev->primary->index;
+	event.obj = (u64) (uintptr_t) obj;
+	event.id = event_id;
+	event.arg1 = arg1;
+	event.arg2 = arg2;
+
+	ring_buffer_write(trace->ring_buffer, sizeof(event), &event);
+	wake_up(&trace->wait);
+}
+
+static void
+i915_trace_gem_object_create(struct drm_gem_object *obj)
+{
+	i915_trace_object(obj->dev,
+			  I915_TRACE_OBJECT_CREATE, obj, obj->size, 0);
+}
+
+static void
+i915_trace_gem_object_bind(struct drm_gem_object *obj, u32 gtt_offset)
+{
+	i915_trace_object(obj->dev,
+			  I915_TRACE_OBJECT_BIND, obj, gtt_offset, 0);
+}
+
+static void
+i915_trace_gem_object_change_domain(struct drm_gem_object *obj)
+{
+	i915_trace_object(obj->dev,
+			  I915_TRACE_OBJECT_CHANGE_DOMAIN, obj,
+			  obj->read_domains, obj->write_domain);
+}
+
+static void
+i915_trace_gem_object_get_fence(struct drm_gem_object *obj,
+				int fence,
+				int tiling_mode)
+{
+	i915_trace_object(obj->dev,
+			  I915_TRACE_OBJECT_GET_FENCE, obj,
+			  fence, tiling_mode);
+}
+
+static void
+i915_trace_gem_object_unbind(struct drm_gem_object *obj)
+{
+	i915_trace_object(obj->dev,
+			  I915_TRACE_OBJECT_UNBIND, obj, 0, 0);
+}
+
+static void
+i915_trace_gem_object_destroy(struct drm_gem_object *obj)
+{
+	i915_trace_object(obj->dev,
+			  I915_TRACE_OBJECT_DESTROY, obj, 0, 0);
+}
+
+/* requests */
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_request_submit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_request_flush);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_request_retire);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_request_complete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_request_wait_begin);
+EXPORT_TRACEPOINT_SYMBOL_GPL(i915_gem_request_wait_end);
+
+static void
+i915_trace_request(struct drm_device *dev,
+		   int event_id, u32 seqno,
+		   u32 arg1, u32 arg2)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_trace *trace = dev_priv->trace;
+	struct i915_trace_event event;
+
+	if (!test_bit(TRACE_ACTIVE_FLAG, &trace->flags))
+		return;
+
+	event.time = ktime_to_ns(ktime_get());
+	event.minor = dev->primary->index;
+	event.seqno = seqno;
+	event.id = event_id;
+	event.arg1 = arg1;
+	event.arg2 = arg2;
+
+	ring_buffer_write(trace->ring_buffer, sizeof(event), &event);
+	wake_up(&trace->wait);
+}
+
+static void
+i915_trace_gem_request_submit(struct drm_device *dev, u32 seqno)
+{
+	i915_trace_request(dev, I915_TRACE_REQUEST_SUBMIT, seqno, 0, 0);
+}
+
+static void
+i915_trace_gem_request_flush(struct drm_device *dev, u32 seqno,
+			     u32 flush_domains, u32 invalidate_domains)
+{
+	i915_trace_request(dev, I915_TRACE_REQUEST_FLUSH, seqno,
+			   flush_domains, invalidate_domains);
+}
+
+static void
+i915_trace_gem_request_complete(struct drm_device *dev, u32 seqno)
+{
+	i915_trace_request(dev, I915_TRACE_REQUEST_COMPLETE, seqno, 0, 0);
+}
+
+static void
+i915_trace_gem_request_retire(struct drm_device *dev, u32 seqno)
+{
+	i915_trace_request(dev, I915_TRACE_REQUEST_RETIRE, seqno, 0, 0);
+}
+
+static void
+i915_trace_gem_request_wait_begin(struct drm_device *dev, u32 seqno)
+{
+	i915_trace_request(dev, I915_TRACE_REQUEST_WAIT_BEGIN, seqno, 0, 0);
+
+}
+
+static void
+i915_trace_gem_request_wait_end(struct drm_device *dev, u32 seqno)
+{
+	i915_trace_request(dev, I915_TRACE_REQUEST_WAIT_END, seqno, 0, 0);
+}
+
+/* debugfs interface */
+static atomic_t probes_refcnt = ATOMIC_INIT(-1);
+
+static int
+i915_probes_register(void)
+{
+	int ret;
+
+	if (atomic_inc_return(&probes_refcnt))
+		return 0;
+
+#define R(x) ret = register_trace_i915_##x(i915_trace_##x); if (ret) return ret
+
+	R(gem_object_create);
+	R(gem_object_bind);
+	R(gem_object_change_domain);
+	R(gem_object_get_fence);
+	R(gem_object_unbind);
+	R(gem_object_destroy);
+
+	R(gem_request_submit);
+	R(gem_request_flush);
+	R(gem_request_complete);
+	R(gem_request_retire);
+	R(gem_request_wait_begin);
+	R(gem_request_wait_end);
+
+#undef R
+
+	return 0;
+}
+
+static void
+i915_probes_unregister(void)
+{
+	if (!atomic_add_negative(-1, &probes_refcnt))
+		return;
+
+#define U(x)  unregister_trace_i915_##x(i915_trace_##x)
+
+	U(gem_object_create);
+	U(gem_object_bind);
+	U(gem_object_change_domain);
+	U(gem_object_get_fence);
+	U(gem_object_unbind);
+	U(gem_object_destroy);
+
+	U(gem_request_submit);
+	U(gem_request_flush);
+	U(gem_request_complete);
+	U(gem_request_retire);
+	U(gem_request_wait_begin);
+	U(gem_request_wait_end);
+
+#undef U
+
+	tracepoint_synchronize_unregister();
+}
+
+static int
+i915_trace_open(struct inode *inode, struct file *filp)
+{
+	struct i915_trace *trace = inode->i_private;
+	int err;
+
+	filp->private_data = trace;
+
+	if (test_and_set_bit_lock(TRACE_ACTIVE_FLAG, &trace->flags))
+		return -EBUSY;
+
+	if (!test_and_set_bit(TRACE_PROBED_FLAG, &trace->flags)) {
+		err = i915_probes_register();
+		if (err)
+			goto fail;
+	}
+
+	if (!test_and_set_bit(TRACE_IRQ_FLAG, &trace->flags))
+		i915_user_irq_get(trace->dev);
+
+	ring_buffer_reset(trace->ring_buffer);
+	i915_trace_header(trace->dev);
+
+	return 0;
+
+fail:
+	__clear_bit_unlock(TRACE_ACTIVE_FLAG, &trace->flags);
+	return err;
+}
+
+static struct ring_buffer_event *
+_ring_buffer_consume_next(struct ring_buffer *ring_buffer, u64 *ts_out)
+{
+	int cpu, next_cpu = -1;
+	struct ring_buffer_event *event;
+	u64 next_ts = (u64) - 1, ts;
+
+	for_each_possible_cpu(cpu) {
+		event = ring_buffer_peek(ring_buffer, cpu, &ts);
+		if (event == NULL)
+			continue;
+
+		if (ts < next_ts) {
+			next_cpu = cpu;
+			next_ts = ts;
+		}
+	}
+
+	if (next_cpu < 0)
+		return NULL;
+
+	return ring_buffer_consume(ring_buffer, next_cpu, ts_out);
+}
+
+static ssize_t
+i915_trace_read(struct file *filp, char __user *ubuf,
+		size_t max, loff_t *ppos)
+{
+	struct i915_trace *trace = filp->private_data;
+	u64 ts;
+	size_t copied;
+
+	/* ignore partial reads */
+	if (*ppos || max < sizeof(struct i915_trace_event))
+		return -EINVAL;
+
+	wait_event_interruptible(trace->wait,
+				 !ring_buffer_empty(trace->ring_buffer));
+
+	if (signal_pending(current))
+		return -EINTR;
+
+	copied = 0;
+	do {
+		struct ring_buffer_event *rb_event;
+
+		rb_event = _ring_buffer_consume_next(trace->ring_buffer, &ts);
+		if (rb_event == NULL)
+		    break;
+
+		if (copy_to_user(ubuf,
+				 ring_buffer_event_data(rb_event),
+				 sizeof(struct i915_trace_event)))
+			return -EFAULT;
+
+		copied += sizeof(struct i915_trace_event);
+	} while (copied + sizeof(struct i915_trace_event) <= max);
+
+	return copied;
+}
+
+static ssize_t
+i915_trace_write(struct file *filp, const char __user *ubuf,
+		 size_t cnt, loff_t *ppos)
+{
+	size_t read = 0;
+	int i, set = 1;
+	ssize_t ret;
+	char buf[128];
+	char *event;
+	char ch;
+
+	if (!cnt || cnt < 0)
+		return 0;
+
+	ret = get_user(ch, ubuf++);
+	if (ret)
+		return ret;
+	read++;
+	cnt--;
+
+	/* skip white space */
+	while (cnt && isspace(ch)) {
+		ret = get_user(ch, ubuf++);
+		if (ret)
+			return ret;
+
+		read++;
+		cnt--;
+	}
+	if (cnt == 0) {
+		filp->f_pos += read;
+		return read;
+	}
+
+	i = 0;
+	while (cnt && !isspace(ch)) {
+		if (!i && ch == '!')
+			set = 0;
+		else
+			buf[i++] = ch;
+
+		ret = get_user(ch, ubuf++);
+		if (ret)
+			return ret;
+
+		read++;
+		cnt--;
+
+		if (i == sizeof (buf) - 1)
+			break;
+	}
+	buf[i] = 0;
+
+	event = buf;
+	if (i == 0 || (i == 1 && buf[0] == '*'))
+		event = NULL;
+
+	ret = trace_set_clr_event(TRACE_SYSTEM_STRING, event, set);
+	if (ret)
+		return ret;
+
+	filp->f_pos += read;
+	return read;
+}
+
+static unsigned int
+i915_trace_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	struct i915_trace *trace = filp->private_data;
+	unsigned int mask = POLLOUT | POLLWRNORM;
+
+	if (!ring_buffer_empty(trace->ring_buffer))
+		return mask | POLLIN | POLLRDNORM;
+
+	poll_wait(filp, &trace->wait, wait);
+
+	if (!ring_buffer_empty(trace->ring_buffer))
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+static int
+i915_trace_release(struct inode *inode, struct file *filp)
+{
+    struct i915_trace *trace = filp->private_data;
+
+    if (test_and_clear_bit(TRACE_IRQ_FLAG, &trace->flags))
+	    i915_user_irq_put(trace->dev);
+
+    __clear_bit_unlock(TRACE_ACTIVE_FLAG, &trace->flags);
+
+    return 0;
+}
+
+static const struct file_operations i915_trace_fops = {
+    .open    = i915_trace_open,
+    .read    = i915_trace_read,
+    .write   = i915_trace_write,
+    .poll    = i915_trace_poll,
+    .release = i915_trace_release,
+};
+
+int
+i915_trace_init(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_trace *trace;
+
+	trace = kcalloc(1, sizeof(*trace), GFP_KERNEL);
+	if (trace == NULL)
+		return -ENOMEM;
+
+	trace->dev = dev;
+
+	init_waitqueue_head(&trace->wait);
+
+	trace->ring_buffer = ring_buffer_alloc(RING_BUFFER_SIZE, 0);
+	if (trace->ring_buffer == NULL) {
+		kfree(trace);
+		return -ENOMEM;
+	}
+
+	trace->dentry = debugfs_create_file("i915_trace", S_IRUGO | S_IWUGO,
+					     dev->primary->debugfs_root,
+					     trace,
+					     &i915_trace_fops);
+	if (IS_ERR(trace->dentry)) {
+		int err = PTR_ERR(trace->dentry);
+		ring_buffer_free(trace->ring_buffer);
+		kfree(trace);
+		return err;
+	}
+
+	trace->flags = 0;
+
+	dev_priv->trace = trace;
+
+	return 0;
+}
+
+void
+i915_trace_cleanup(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_trace *trace = dev_priv->trace;
+
+	if (trace == NULL)
+		return;
+
+	dev_priv->trace = NULL;
+
+	if (test_bit(TRACE_PROBED_FLAG, &trace->flags))
+		i915_probes_unregister();
+
+	ring_buffer_free(trace->ring_buffer);
+	debugfs_remove(trace->dentry);
+	kfree(trace);
+}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
new file mode 100644
index 0000000..66e4f24
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -0,0 +1,264 @@
+#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _I915_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <drm/drmP.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM i915
+#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
+#define TRACE_INCLUDE_FILE i915_trace
+
+/* object tracking */
+
+TRACE_EVENT(i915_gem_object_create,
+
+	    TP_PROTO(struct drm_gem_object *obj),
+
+	    TP_ARGS(obj),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_gem_object *, obj)
+			     __field(u32, size)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->obj = obj;
+			   __entry->size = obj->size;
+			   ),
+
+	    TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
+);
+
+TRACE_EVENT(i915_gem_object_bind,
+
+	    TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset),
+
+	    TP_ARGS(obj, gtt_offset),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_gem_object *, obj)
+			     __field(u32, gtt_offset)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->obj = obj;
+			   __entry->gtt_offset = gtt_offset;
+			   ),
+
+	    TP_printk("obj=%p, gtt_offset=%08x",
+		      __entry->obj, __entry->gtt_offset)
+);
+
+TRACE_EVENT(i915_gem_object_change_domain,
+
+	    TP_PROTO(struct drm_gem_object *obj),
+
+	    TP_ARGS(obj),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_gem_object *, obj)
+			     __field(u32, read_domains)
+			     __field(u32, write_domain)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->obj = obj;
+			   __entry->read_domains = obj->read_domains;
+			   __entry->write_domain = obj->write_domain;
+			   ),
+
+	    TP_printk("obj=%p, read=%04x, write=%04x",
+		      __entry->obj,
+		      __entry->read_domains, __entry->write_domain)
+);
+
+TRACE_EVENT(i915_gem_object_get_fence,
+
+	    TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode),
+
+	    TP_ARGS(obj, fence, tiling_mode),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_gem_object *, obj)
+			     __field(int, fence)
+			     __field(int, tiling_mode)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->obj = obj;
+			   __entry->fence = fence;
+			   __entry->tiling_mode = tiling_mode;
+			   ),
+
+	    TP_printk("obj=%p, fence=%d, tiling=%d",
+		      __entry->obj, __entry->fence, __entry->tiling_mode)
+);
+
+TRACE_EVENT(i915_gem_object_unbind,
+
+	    TP_PROTO(struct drm_gem_object *obj),
+
+	    TP_ARGS(obj),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_gem_object *, obj)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->obj = obj;
+			   ),
+
+	    TP_printk("obj=%p", __entry->obj)
+);
+
+TRACE_EVENT(i915_gem_object_destroy,
+
+	    TP_PROTO(struct drm_gem_object *obj),
+
+	    TP_ARGS(obj),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_gem_object *, obj)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->obj = obj;
+			   ),
+
+	    TP_printk("obj=%p", __entry->obj)
+);
+
+/* batch tracing */
+
+TRACE_EVENT(i915_gem_request_submit,
+
+	    TP_PROTO(struct drm_device *dev, u32 seqno),
+
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_device *, dev)
+			     __field(u32, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_flush,
+
+	    TP_PROTO(struct drm_device *dev, u32 seqno,
+		     u32 flush_domains, u32 invalidate_domains),
+
+	    TP_ARGS(dev, seqno, flush_domains, invalidate_domains),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_device *, dev)
+			     __field(u32, seqno)
+			     __field(u32, flush_domains)
+			     __field(u32, invalidate_domains)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev;
+			   __entry->seqno = seqno;
+			   __entry->flush_domains = flush_domains;
+			   __entry->invalidate_domains = invalidate_domains;
+			   ),
+
+	    TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x",
+		      __entry->dev, __entry->seqno,
+		      __entry->flush_domains, __entry->invalidate_domains)
+);
+
+
+TRACE_EVENT(i915_gem_request_complete,
+
+	    TP_PROTO(struct drm_device *dev, u32 seqno),
+
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_device *, dev)
+			     __field(u32, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_retire,
+
+	    TP_PROTO(struct drm_device *dev, u32 seqno),
+
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_device *, dev)
+			     __field(u32, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_begin,
+
+	    TP_PROTO(struct drm_device *dev, u32 seqno),
+
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_device *, dev)
+			     __field(u32, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_end,
+
+	    TP_PROTO(struct drm_device *dev, u32 seqno),
+
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(struct drm_device *, dev)
+			     __field(u32, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+#endif /* _I915_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915
+#include <trace/define_trace.h>
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8e1e925..b768b70 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -667,4 +667,39 @@ struct drm_i915_get_pipe_from_crtc_id {
 	__u32 pipe;
 };
 
+/* i915 tracing interface */
+
+enum {
+	I915_TRACE_HEADER,
+
+	I915_TRACE_OBJECT_CREATE,
+	I915_TRACE_OBJECT_BIND,
+	I915_TRACE_OBJECT_CHANGE_DOMAIN,
+	I915_TRACE_OBJECT_GET_FENCE,
+	I915_TRACE_OBJECT_UNBIND,
+	I915_TRACE_OBJECT_DESTROY,
+
+	I915_TRACE_REQUEST_SUBMIT,
+	I915_TRACE_REQUEST_FLUSH,
+	I915_TRACE_REQUEST_COMPLETE,
+	I915_TRACE_REQUEST_RETIRE,
+	I915_TRACE_REQUEST_WAIT_BEGIN,
+	I915_TRACE_REQUEST_WAIT_END,
+};
+
+#define I915_TRACE_VERSION 0
+#define I915_TRACE_MAGIC 0xdeadbeef
+
+struct i915_trace_event {
+	__u64 time;
+	__u32 minor;
+	__u32 id;
+	union {
+		__u64 obj;
+		__u32 seqno;
+	};
+	__u32 arg1;
+	__u32 arg2;
+};
+
 #endif				/* _I915_DRM_H_ */
-- 
1.6.3.3




More information about the Intel-gfx mailing list