[Intel-gfx] [PATCH] [DRM] Implement wait-time accounting for i915
Ben Gamari
bgamari.foss at gmail.com
Tue May 26 19:09:32 CEST 2009
Implement a mechanism for tracking CPU waits. This will hopefully aid in
identifying GPU stalls. Likely wait-points are surrounded with a pair
of I915_BEGIN_WAIT/I915_END_WAIT macros which time the enclosed region,
crediting the wait to a given wait source. Thanks to ickle for the
guidance in creating this patch.
---
drivers/gpu/drm/Kconfig | 8 +++
drivers/gpu/drm/i915/i915_drv.h | 44 ++++++++++++++-
drivers/gpu/drm/i915/i915_gem.c | 93 ++++++++++++++++++++----------
drivers/gpu/drm/i915/i915_gem_debugfs.c | 32 +++++++++++
drivers/gpu/drm/i915/i915_gem_tiling.c | 3 +-
drivers/gpu/drm/i915/intel_display.c | 3 +-
drivers/gpu/drm/i915/intel_fb.c | 2 +-
7 files changed, 149 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 4cd35d8..1bb7724 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -97,6 +97,14 @@ config DRM_I915_KMS
the driver to bind to PCI devices, which precludes loading things
like intelfb.
+config DRM_I915_WAIT_ACCOUNTING
+ bool "Enable i915 wait accounting"
+ depends on DRM_I915
+ help
+ Choose this option if you want to enable wait accounting in the i915
+ driver. This is used to identify performance problems within the
+ driver. If unsure, say N.
+
endchoice
config DRM_MGA
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9b149fe..3249c4f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -33,6 +33,7 @@
#include "i915_reg.h"
#include "intel_bios.h"
#include <linux/io-mapping.h>
+#include <linux/time.h>
/* General customization:
*/
@@ -126,6 +127,40 @@ struct drm_i915_fence_reg {
struct drm_gem_object *obj;
};
+enum drm_i915_wait_srcs {
+ I915_WAITSRC_RELOCATE,
+ I915_WAITSRC_EVICT,
+ I915_WAITSRC_EVICT_WAIT,
+ I915_WAITSRC_EVICT_FLUSH,
+ I915_WAITSRC_PWRITE,
+ I915_WAITSRC_PREAD,
+ I915_WAITSRC_CHANGE_DOMAIN,
+ I915_WAITSRC_FREE,
+ I915_WAITSRC_GET_FENCE,
+ I915_WAITSRC_PUT_FENCE,
+ I915_WAITSRC_CHANGE_TILING,
+ I915_WAITSRC_THROTTLE,
+ I915_WAITSRC_LEAVEVT,
+ I915_WAITSRC_FBO,
+ I915_WAITSRC_LAST
+};
+
+#ifdef CONFIG_DRM_I915_WAIT_ACCOUNTING
+#define I915_MIGHT_WAIT() struct timeval _wait_ts_begin, _wait_ts_end;
+#define I915_BEGIN_WAIT() do_gettimeofday(&_wait_ts_begin);
+#define I915_END_WAIT(priv, src) do { \
+ drm_i915_private_t *dev_priv__ = (priv); \
+ do_gettimeofday(&_wait_ts_end); \
+ dev_priv__->mm.wait_time[src] += _wait_ts_end.tv_usec - _wait_ts_begin.tv_usec; \
+ dev_priv__->mm.wait_time[src] += 1000000*(_wait_ts_end.tv_sec - _wait_ts_begin.tv_sec); \
+ dev_priv__->mm.wait_count[src]++; \
+} while(0)
+#else
+#define I915_MIGHT_WAIT() {}
+#define I915_BEGIN_WAIT() {}
+#define I915_END_WAIT(priv, src) {}
+#endif
+
typedef struct drm_i915_private {
struct drm_device *dev;
@@ -380,6 +415,10 @@ typedef struct drm_i915_private {
/* storage for physical objects */
struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
+
+ /* wait source accumulators */
+ long int wait_time[I915_WAITSRC_LAST];
+ long int wait_count[I915_WAITSRC_LAST];
} mm;
} drm_i915_private_t;
@@ -617,7 +656,7 @@ int i915_gem_init_object(struct drm_gem_object *obj);
void i915_gem_free_object(struct drm_gem_object *obj);
int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment);
void i915_gem_object_unpin(struct drm_gem_object *obj);
-int i915_gem_object_unbind(struct drm_gem_object *obj);
+int i915_gem_object_unbind(struct drm_gem_object *obj, int wait_reason);
void i915_gem_lastclose(struct drm_device *dev);
uint32_t i915_get_gem_seqno(struct drm_device *dev);
void i915_gem_retire_requests(struct drm_device *dev);
@@ -633,7 +672,8 @@ int i915_gem_do_init(struct drm_device *dev, unsigned long start,
int i915_gem_idle(struct drm_device *dev);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
- int write);
+ int write,
+ int reason);
int i915_gem_attach_phys_object(struct drm_device *dev,
struct drm_gem_object *obj, int id);
void i915_gem_detach_phys_object(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b189b49..252ffdc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,12 +38,15 @@ static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
- int write);
+ int write,
+ int reason);
static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
uint64_t offset,
- uint64_t size);
+ uint64_t size,
+ int reason);
static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
-static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
+static int i915_gem_object_wait_rendering(struct drm_gem_object *obj,
+ int wait_reason);
static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
unsigned alignment);
static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write);
@@ -276,7 +279,8 @@ i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
goto fail_unlock;
ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
- args->size);
+ args->size,
+ I915_WAITSRC_PREAD);
if (ret != 0)
goto fail_put_pages;
@@ -371,7 +375,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
goto fail_unlock;
ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
- args->size);
+ args->size,
+ I915_WAITSRC_PREAD);
if (ret != 0)
goto fail_put_pages;
@@ -573,7 +578,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
mutex_unlock(&dev->struct_mutex);
return ret;
}
- ret = i915_gem_object_set_to_gtt_domain(obj, 1);
+ ret = i915_gem_object_set_to_gtt_domain(obj, 1, I915_WAITSRC_PWRITE);
if (ret)
goto fail;
@@ -667,7 +672,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
if (ret)
goto out_unlock;
- ret = i915_gem_object_set_to_gtt_domain(obj, 1);
+ ret = i915_gem_object_set_to_gtt_domain(obj, 1, I915_WAITSRC_PWRITE);
if (ret)
goto out_unpin_object;
@@ -749,7 +754,7 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
if (ret != 0)
goto fail_unlock;
- ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1, I915_WAITSRC_PWRITE);
if (ret != 0)
goto fail_put_pages;
@@ -845,7 +850,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
if (ret != 0)
goto fail_unlock;
- ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1, I915_WAITSRC_PWRITE);
if (ret != 0)
goto fail_put_pages;
@@ -1011,7 +1016,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
obj, obj->size, read_domains, write_domain);
#endif
if (read_domains & I915_GEM_DOMAIN_GTT) {
- ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+ ret = i915_gem_object_set_to_gtt_domain(obj,
+ write_domain != 0,
+ I915_WAITSRC_CHANGE_DOMAIN);
/* Silently promote "you're not bound, there was nothing to do"
* to success, since the client was just asking us to
@@ -1020,7 +1027,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (ret == -EINVAL)
ret = 0;
} else {
- ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+ ret = i915_gem_object_set_to_cpu_domain(obj,
+ write_domain != 0,
+ I915_WAITSRC_CHANGE_DOMAIN);
}
drm_gem_object_unreference(obj);
@@ -1688,7 +1697,7 @@ i915_gem_retire_work_handler(struct work_struct *work)
* request and object lists appropriately for that event.
*/
static int
-i915_wait_request(struct drm_device *dev, uint32_t seqno)
+i915_wait_request(struct drm_device *dev, uint32_t seqno, int reason)
{
drm_i915_private_t *dev_priv = dev->dev_private;
u32 ier;
@@ -1697,6 +1706,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
BUG_ON(seqno == 0);
if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
+ I915_MIGHT_WAIT();
+
ier = I915_READ(IER);
if (!ier) {
DRM_ERROR("something (likely vbetool) disabled "
@@ -1705,6 +1716,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
i915_driver_irq_postinstall(dev);
}
+ I915_BEGIN_WAIT();
+
dev_priv->mm.waiting_gem_seqno = seqno;
i915_user_irq_get(dev);
ret = wait_event_interruptible(dev_priv->irq_queue,
@@ -1713,6 +1726,9 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
dev_priv->mm.wedged);
i915_user_irq_put(dev);
dev_priv->mm.waiting_gem_seqno = 0;
+
+ if (ret == 0)
+ I915_END_WAIT(dev_priv, reason);
}
if (dev_priv->mm.wedged)
ret = -EIO;
@@ -1809,7 +1825,7 @@ i915_gem_flush(struct drm_device *dev,
* safe to unbind from the GTT or access from the CPU.
*/
static int
-i915_gem_object_wait_rendering(struct drm_gem_object *obj)
+i915_gem_object_wait_rendering(struct drm_gem_object *obj, int wait_reason)
{
struct drm_device *dev = obj->dev;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -1828,7 +1844,9 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
DRM_INFO("%s: object %p wait for seqno %08x\n",
__func__, obj, obj_priv->last_rendering_seqno);
#endif
- ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
+ ret = i915_wait_request(dev,
+ obj_priv->last_rendering_seqno,
+ wait_reason);
if (ret != 0)
return ret;
}
@@ -1840,7 +1858,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
* Unbinds an object from the GTT aperture.
*/
int
-i915_gem_object_unbind(struct drm_gem_object *obj)
+i915_gem_object_unbind(struct drm_gem_object *obj, int wait_reason)
{
struct drm_device *dev = obj->dev;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -1865,7 +1883,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
* also ensure that all pending GPU writes are finished
* before we unbind.
*/
- ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1, wait_reason);
if (ret) {
if (ret != -ERESTARTSYS)
DRM_ERROR("set_domain failed: %d\n", ret);
@@ -1912,6 +1930,7 @@ i915_gem_evict_something(struct drm_device *dev)
struct drm_gem_object *obj;
struct drm_i915_gem_object *obj_priv;
int ret = 0;
+ int wait_reason = I915_WAITSRC_EVICT_WAIT;
for (;;) {
/* If there's an inactive buffer available now, grab it
@@ -1929,7 +1948,8 @@ i915_gem_evict_something(struct drm_device *dev)
BUG_ON(obj_priv->active);
/* Wait on the rendering and unbind the buffer. */
- ret = i915_gem_object_unbind(obj);
+ ret = i915_gem_object_unbind(obj,
+ I915_WAITSRC_EVICT);
break;
}
@@ -1944,7 +1964,9 @@ i915_gem_evict_something(struct drm_device *dev)
struct drm_i915_gem_request,
list);
- ret = i915_wait_request(dev, request->seqno);
+ ret = i915_wait_request(dev,
+ request->seqno,
+ wait_reason);
if (ret)
break;
@@ -1968,7 +1990,8 @@ i915_gem_evict_something(struct drm_device *dev)
struct drm_i915_gem_object,
list);
obj = obj_priv->obj;
-
+
+ wait_reason = I915_WAITSRC_EVICT_FLUSH;
i915_gem_flush(dev,
obj->write_domain,
obj->write_domain);
@@ -2239,6 +2262,7 @@ try_again:
*/
if (i == dev_priv->num_fence_regs) {
if (seqno == dev_priv->mm.next_gem_seqno) {
+
i915_gem_flush(dev,
I915_GEM_GPU_DOMAINS,
I915_GEM_GPU_DOMAINS);
@@ -2248,7 +2272,8 @@ try_again:
return -ENOMEM;
}
- ret = i915_wait_request(dev, seqno);
+ ret = i915_wait_request(dev, seqno,
+ I915_WAITSRC_GET_FENCE);
if (ret)
return ret;
goto try_again;
@@ -2479,7 +2504,9 @@ i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
* flushes to occur.
*/
int
-i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
+i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
+ int write,
+ int reason)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
int ret;
@@ -2490,7 +2517,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
i915_gem_object_flush_gpu_write_domain(obj);
/* Wait on any GPU rendering and flushing to occur. */
- ret = i915_gem_object_wait_rendering(obj);
+ ret = i915_gem_object_wait_rendering(obj, reason);
if (ret != 0)
return ret;
@@ -2522,13 +2549,15 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
* flushes to occur.
*/
static int
-i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
+i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
+ int write,
+ int reason)
{
int ret;
i915_gem_object_flush_gpu_write_domain(obj);
/* Wait on any GPU rendering and flushing to occur. */
- ret = i915_gem_object_wait_rendering(obj);
+ ret = i915_gem_object_wait_rendering(obj, reason);
if (ret != 0)
return ret;
@@ -2792,17 +2821,18 @@ i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
*/
static int
i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
- uint64_t offset, uint64_t size)
+ uint64_t offset, uint64_t size,
+ int reason)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
int i, ret;
if (offset == 0 && size == obj->size)
- return i915_gem_object_set_to_cpu_domain(obj, 0);
+ return i915_gem_object_set_to_cpu_domain(obj, 0, reason);
i915_gem_object_flush_gpu_write_domain(obj);
/* Wait on any GPU rendering and flushing to occur. */
- ret = i915_gem_object_wait_rendering(obj);
+ ret = i915_gem_object_wait_rendering(obj, reason);
if (ret != 0)
return ret;
i915_gem_object_flush_gtt_write_domain(obj);
@@ -2970,7 +3000,8 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
continue;
}
- ret = i915_gem_object_set_to_gtt_domain(obj, 1);
+ ret = i915_gem_object_set_to_gtt_domain(obj, 1,
+ I915_WAITSRC_RELOCATE);
if (ret != 0) {
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
@@ -3091,7 +3122,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
i915_file_priv->mm.last_gem_throttle_seqno =
i915_file_priv->mm.last_gem_seqno;
if (seqno)
- ret = i915_wait_request(dev, seqno);
+ ret = i915_wait_request(dev, seqno, I915_WAITSRC_THROTTLE);
mutex_unlock(&dev->struct_mutex);
return ret;
}
@@ -3721,7 +3752,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
if (obj_priv->phys_obj)
i915_gem_detach_phys_object(dev, obj);
- i915_gem_object_unbind(obj);
+ i915_gem_object_unbind(obj, I915_WAITSRC_FREE);
i915_gem_free_mmap_offset(obj);
@@ -3750,7 +3781,7 @@ i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
return -EINVAL;
}
- ret = i915_gem_object_unbind(obj);
+ ret = i915_gem_object_unbind(obj, I915_WAITSRC_LEAVEVT);
if (ret != 0) {
DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
ret);
diff --git a/drivers/gpu/drm/i915/i915_gem_debugfs.c b/drivers/gpu/drm/i915/i915_gem_debugfs.c
index 1bd3c66..c77f74f 100644
--- a/drivers/gpu/drm/i915/i915_gem_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_gem_debugfs.c
@@ -323,6 +323,35 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data)
return 0;
}
+#ifdef CONFIG_DRM_I915_WAIT_ACCOUNTING
+static int i915_wait_source_info(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+
+#define WAITSRC(SRC) seq_printf(m, "%15s %12ld %6ld\n", #SRC, \
+ dev_priv->mm.wait_time[I915_WAITSRC_##SRC], \
+ dev_priv->mm.wait_count[I915_WAITSRC_##SRC])
+ WAITSRC(RELOCATE);
+ WAITSRC(EVICT);
+ WAITSRC(EVICT_WAIT);
+ WAITSRC(EVICT_FLUSH);
+ WAITSRC(PWRITE);
+ WAITSRC(PREAD);
+ WAITSRC(CHANGE_DOMAIN);
+ WAITSRC(FREE);
+ WAITSRC(GET_FENCE);
+ WAITSRC(PUT_FENCE);
+ WAITSRC(CHANGE_TILING);
+ WAITSRC(THROTTLE);
+ WAITSRC(LEAVEVT);
+ WAITSRC(FBO);
+#undef WAITSRC
+
+ return 0;
+}
+#endif
static struct drm_info_list i915_gem_debugfs_list[] = {
{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
@@ -336,6 +365,9 @@ static struct drm_info_list i915_gem_debugfs_list[] = {
{"i915_ringbuffer_data", i915_ringbuffer_data, 0},
{"i915_ringbuffer_info", i915_ringbuffer_info, 0},
{"i915_batchbuffers", i915_batchbuffer_info, 0},
+#ifdef CONFIG_DRM_I915_WAIT_ACCOUNTING
+ {"i915_wait_sources", i915_wait_source_info, 0},
+#endif
};
#define I915_GEM_DEBUGFS_ENTRIES ARRAY_SIZE(i915_gem_debugfs_list)
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 52a0593..eb9559f 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -314,7 +314,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
/* Unbind the object, as switching tiling means we're
* switching the cache organization due to fencing, probably.
*/
- ret = i915_gem_object_unbind(obj);
+ ret = i915_gem_object_unbind(obj,
+ I915_WAITSRC_CHANGE_TILING);
if (ret != 0) {
WARN(ret != -ERESTARTSYS,
"failed to unbind object for tiling switch");
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3387cf3..976f942 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -704,7 +704,8 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
return ret;
}
- ret = i915_gem_object_set_to_gtt_domain(intel_fb->obj, 1);
+ ret = i915_gem_object_set_to_gtt_domain(intel_fb->obj, 1,
+ I915_WAITSRC_FBO);
if (ret != 0) {
i915_gem_object_unpin(intel_fb->obj);
mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index e4652dc..1e1aeb0 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -468,7 +468,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
}
/* Flush everything out, we'll be doing GTT only from now on */
- i915_gem_object_set_to_gtt_domain(fbo, 1);
+ i915_gem_object_set_to_gtt_domain(fbo, 1, I915_WAITSRC_FBO);
ret = intel_framebuffer_create(dev, &mode_cmd, &fb, fbo);
if (ret) {
--
1.6.2.2
More information about the Intel-gfx
mailing list