[Intel-gfx] [PATCH] drm/i915: Implement wait-time accounting for i915
Ben Gamari
bgamari.foss at gmail.com
Wed Jul 1 04:11:04 CEST 2009
Implement a mechanism for tracking CPU waits. This will hopefully aid in
identifying GPU stalls. Likely wait-points are surrounded with a pair
of I915_BEGIN_WAIT/I915_END_WAIT macros which time the enclosed region,
crediting the wait to a given wait source. Thanks to ickle for the
guidance in creating this patch.
Signed-Off-By: Ben Gamari <bgamari.foss at gmail.com>
---
drivers/gpu/drm/Kconfig | 8 +++
drivers/gpu/drm/i915/i915_drv.h | 45 ++++++++++++++-
drivers/gpu/drm/i915/i915_gem.c | 96 ++++++++++++++++++++-----------
drivers/gpu/drm/i915/i915_gem_debug.c | 6 ++
drivers/gpu/drm/i915/i915_gem_debugfs.c | 1 -
drivers/gpu/drm/i915/i915_gem_tiling.c | 4 +-
drivers/gpu/drm/i915/intel_display.c | 2 +-
drivers/gpu/drm/i915/intel_fb.c | 2 +-
8 files changed, 125 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index c961fe4..73482e5 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -110,6 +110,14 @@ config DRM_I915_KMS
the driver to bind to PCI devices, which precludes loading things
like intelfb.
+config DRM_I915_WAIT_ACCOUNTING
+ bool "Enable i915 wait accounting"
+ depends on DRM_I915
+ help
+ Choose this option if you want to enable wait accounting in the i915
+ driver. This is used to identify performance problems within the
+ driver. If unsure, say N.
+
endchoice
config DRM_MGA
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7a84f04..02f623a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -33,6 +33,7 @@
#include "i915_reg.h"
#include "intel_bios.h"
#include <linux/io-mapping.h>
+#include <linux/time.h>
/* General customization:
*/
@@ -133,6 +134,42 @@ struct sdvo_device_mapping {
u8 initialized;
};
+enum drm_i915_wait_srcs {
+ I915_WAITSRC_RELOCATE,
+ I915_WAITSRC_EVICT,
+ I915_WAITSRC_EVICT_WAIT,
+ I915_WAITSRC_EVICT_FLUSH,
+ I915_WAITSRC_PWRITE,
+ I915_WAITSRC_PREAD,
+ I915_WAITSRC_CHANGE_DOMAIN_TO_CPU,
+ I915_WAITSRC_CHANGE_DOMAIN_TO_GTT,
+ I915_WAITSRC_FREE,
+ I915_WAITSRC_GET_FENCE,
+ I915_WAITSRC_PUT_FENCE,
+ I915_WAITSRC_CHANGE_TILING,
+ I915_WAITSRC_THROTTLE,
+ I915_WAITSRC_LEAVEVT,
+ I915_WAITSRC_FBO,
+ I915_WAITSRC_LAST
+};
+
+#ifdef CONFIG_DRM_I915_WAIT_ACCOUNTING
+#define I915_MIGHT_WAIT() struct timeval _wait_ts_begin, _wait_ts_end;
+#define I915_BEGIN_WAIT() do_gettimeofday(&_wait_ts_begin);
+#define I915_END_WAIT(priv, src) do { \
+ long int t; \
+ drm_i915_private_t *dev_priv__ = (priv); \
+ t = _wait_ts_end.tv_usec - _wait_ts_begin.tv_usec \
+ + 1000000*(_wait_ts_end.tv_sec - _wait_ts_begin.tv_sec);\
+ do_gettimeofday(&_wait_ts_end); \
+ i915_wait_end(src, t); \
+} while(0)
+#else
+#define I915_MIGHT_WAIT() {}
+#define I915_BEGIN_WAIT() {}
+#define I915_END_WAIT(priv, src) {}
+#endif
+
typedef struct drm_i915_private {
struct drm_device *dev;
@@ -643,7 +680,7 @@ int i915_gem_init_object(struct drm_gem_object *obj);
void i915_gem_free_object(struct drm_gem_object *obj);
int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment);
void i915_gem_object_unpin(struct drm_gem_object *obj);
-int i915_gem_object_unbind(struct drm_gem_object *obj);
+int i915_gem_object_unbind(struct drm_gem_object *obj, int wait_reason);
void i915_gem_lastclose(struct drm_device *dev);
uint32_t i915_get_gem_seqno(struct drm_device *dev);
int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
@@ -661,7 +698,8 @@ int i915_gem_do_init(struct drm_device *dev, unsigned long start,
int i915_gem_idle(struct drm_device *dev);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
- int write);
+ int write,
+ int reason);
int i915_gem_attach_phys_object(struct drm_device *dev,
struct drm_gem_object *obj, int id);
void i915_gem_detach_phys_object(struct drm_device *dev,
@@ -677,6 +715,9 @@ void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj);
/* i915_gem_debug.c */
+#ifdef CONFIG_DRM_I915_WAIT_ACCOUNTING
+void i915_wait_end(drm_i915_wait_srcs src, int time);
+#endif
void i915_gem_dump_object(struct drm_gem_object *obj, int len,
const char *where, uint32_t mark);
#if WATCH_INACTIVE
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fd2b8bd..bdc0dcf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,12 +38,15 @@ static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
- int write);
+ int write,
+ int wait_reason);
static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
uint64_t offset,
- uint64_t size);
+ uint64_t size,
+ int wait_reason);
static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
-static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
+static int i915_gem_object_wait_rendering(struct drm_gem_object *obj,
+ int wait_reason);
static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
unsigned alignment);
static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
@@ -275,7 +278,8 @@ i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
goto fail_unlock;
ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
- args->size);
+ args->size,
+ I915_WAITSRC_PREAD);
if (ret != 0)
goto fail_put_pages;
@@ -370,7 +374,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
goto fail_unlock;
ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
- args->size);
+ args->size,
+ I915_WAITSRC_PREAD);
if (ret != 0)
goto fail_put_pages;
@@ -572,7 +577,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
mutex_unlock(&dev->struct_mutex);
return ret;
}
- ret = i915_gem_object_set_to_gtt_domain(obj, 1);
+ ret = i915_gem_object_set_to_gtt_domain(obj, 1, I915_WAITSRC_PWRITE);
if (ret)
goto fail;
@@ -666,7 +671,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
if (ret)
goto out_unlock;
- ret = i915_gem_object_set_to_gtt_domain(obj, 1);
+ ret = i915_gem_object_set_to_gtt_domain(obj, 1, I915_WAITSRC_PWRITE);
if (ret)
goto out_unpin_object;
@@ -748,7 +753,7 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
if (ret != 0)
goto fail_unlock;
- ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1, I915_WAITSRC_PWRITE);
if (ret != 0)
goto fail_put_pages;
@@ -844,7 +849,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
if (ret != 0)
goto fail_unlock;
- ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1, I915_WAITSRC_PWRITE);
if (ret != 0)
goto fail_put_pages;
@@ -1010,7 +1015,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
obj, obj->size, read_domains, write_domain);
#endif
if (read_domains & I915_GEM_DOMAIN_GTT) {
- ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+ ret = i915_gem_object_set_to_gtt_domain(obj,
+ write_domain != 0,
+ I915_WAITSRC_CHANGE_DOMAIN_TO_GTT);
/* Silently promote "you're not bound, there was nothing to do"
* to success, since the client was just asking us to
@@ -1019,7 +1026,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (ret == -EINVAL)
ret = 0;
} else {
- ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+ ret = i915_gem_object_set_to_cpu_domain(obj,
+ write_domain != 0,
+ I915_WAITSRC_CHANGE_DOMAIN_TO_CPU);
}
drm_gem_object_unreference(obj);
@@ -1145,7 +1154,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
- ret = i915_gem_object_set_to_gtt_domain(obj, write);
+ ret = i915_gem_object_set_to_gtt_domain(obj, write, I915_WAITSRC_CHANGE_DOMAIN_TO_GTT);
if (ret) {
mutex_unlock(&dev->struct_mutex);
return VM_FAULT_SIGBUS;
@@ -1703,7 +1712,7 @@ i915_gem_retire_work_handler(struct work_struct *work)
* request and object lists appropriately for that event.
*/
static int
-i915_wait_request(struct drm_device *dev, uint32_t seqno)
+i915_wait_request(struct drm_device *dev, uint32_t seqno, int reason)
{
drm_i915_private_t *dev_priv = dev->dev_private;
u32 ier;
@@ -1712,6 +1721,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
BUG_ON(seqno == 0);
if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
+ I915_MIGHT_WAIT();
+
if (IS_IGDNG(dev))
ier = I915_READ(DEIER) | I915_READ(GTIER);
else
@@ -1723,6 +1734,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
i915_driver_irq_postinstall(dev);
}
+ I915_BEGIN_WAIT();
+
dev_priv->mm.waiting_gem_seqno = seqno;
i915_user_irq_get(dev);
ret = wait_event_interruptible(dev_priv->irq_queue,
@@ -1731,6 +1744,9 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
dev_priv->mm.wedged);
i915_user_irq_put(dev);
dev_priv->mm.waiting_gem_seqno = 0;
+
+ if (ret == 0)
+ I915_END_WAIT(dev_priv, reason);
}
if (dev_priv->mm.wedged)
ret = -EIO;
@@ -1826,7 +1842,7 @@ i915_gem_flush(struct drm_device *dev,
* safe to unbind from the GTT or access from the CPU.
*/
static int
-i915_gem_object_wait_rendering(struct drm_gem_object *obj)
+i915_gem_object_wait_rendering(struct drm_gem_object *obj, int wait_reason)
{
struct drm_device *dev = obj->dev;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -1845,7 +1861,9 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
DRM_INFO("%s: object %p wait for seqno %08x\n",
__func__, obj, obj_priv->last_rendering_seqno);
#endif
- ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
+ ret = i915_wait_request(dev,
+ obj_priv->last_rendering_seqno,
+ wait_reason);
if (ret != 0)
return ret;
}
@@ -1857,7 +1875,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
* Unbinds an object from the GTT aperture.
*/
int
-i915_gem_object_unbind(struct drm_gem_object *obj)
+i915_gem_object_unbind(struct drm_gem_object *obj, int wait_reason)
{
struct drm_device *dev = obj->dev;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -1882,7 +1900,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
* also ensure that all pending GPU writes are finished
* before we unbind.
*/
- ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1, wait_reason);
if (ret) {
if (ret != -ERESTARTSYS)
DRM_ERROR("set_domain failed: %d\n", ret);
@@ -1929,6 +1947,7 @@ i915_gem_evict_something(struct drm_device *dev)
struct drm_gem_object *obj;
struct drm_i915_gem_object *obj_priv;
int ret = 0;
+ int wait_reason = I915_WAITSRC_EVICT_WAIT;
for (;;) {
/* If there's an inactive buffer available now, grab it
@@ -1946,7 +1965,8 @@ i915_gem_evict_something(struct drm_device *dev)
BUG_ON(obj_priv->active);
/* Wait on the rendering and unbind the buffer. */
- ret = i915_gem_object_unbind(obj);
+ ret = i915_gem_object_unbind(obj,
+ I915_WAITSRC_EVICT);
break;
}
@@ -1961,7 +1981,9 @@ i915_gem_evict_something(struct drm_device *dev)
struct drm_i915_gem_request,
list);
- ret = i915_wait_request(dev, request->seqno);
+ ret = i915_wait_request(dev,
+ request->seqno,
+ wait_reason);
if (ret)
break;
@@ -1985,7 +2007,8 @@ i915_gem_evict_something(struct drm_device *dev)
struct drm_i915_gem_object,
list);
obj = obj_priv->obj;
-
+
+ wait_reason = I915_WAITSRC_EVICT_FLUSH;
i915_gem_flush(dev,
obj->write_domain,
obj->write_domain);
@@ -2264,7 +2287,8 @@ try_again:
return -ENOMEM;
}
- ret = i915_wait_request(dev, seqno);
+ ret = i915_wait_request(dev, seqno,
+ I915_WAITSRC_GET_FENCE);
if (ret)
return ret;
goto try_again;
@@ -2352,7 +2376,7 @@ i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
i915_gem_object_flush_gpu_write_domain(obj);
i915_gem_object_flush_gtt_write_domain(obj);
- ret = i915_gem_object_wait_rendering(obj);
+ ret = i915_gem_object_wait_rendering(obj, I915_WAITSRC_PUT_FENCE);
if (ret != 0)
return ret;
}
@@ -2538,7 +2562,9 @@ i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
* flushes to occur.
*/
int
-i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
+i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
+ int write,
+ int wait_reason)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
int ret;
@@ -2549,7 +2575,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
i915_gem_object_flush_gpu_write_domain(obj);
/* Wait on any GPU rendering and flushing to occur. */
- ret = i915_gem_object_wait_rendering(obj);
+ ret = i915_gem_object_wait_rendering(obj, wait_reason);
if (ret != 0)
return ret;
@@ -2581,13 +2607,15 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
* flushes to occur.
*/
static int
-i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
+i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
+ int write,
+ int wait_reason)
{
int ret;
i915_gem_object_flush_gpu_write_domain(obj);
/* Wait on any GPU rendering and flushing to occur. */
- ret = i915_gem_object_wait_rendering(obj);
+ ret = i915_gem_object_wait_rendering(obj, wait_reason);
if (ret != 0)
return ret;
@@ -2850,17 +2878,18 @@ i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
*/
static int
i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
- uint64_t offset, uint64_t size)
+ uint64_t offset, uint64_t size,
+ int wait_reason)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
int i, ret;
if (offset == 0 && size == obj->size)
- return i915_gem_object_set_to_cpu_domain(obj, 0);
+ return i915_gem_object_set_to_cpu_domain(obj, 0, wait_reason);
i915_gem_object_flush_gpu_write_domain(obj);
/* Wait on any GPU rendering and flushing to occur. */
- ret = i915_gem_object_wait_rendering(obj);
+ ret = i915_gem_object_wait_rendering(obj, wait_reason);
if (ret != 0)
return ret;
i915_gem_object_flush_gtt_write_domain(obj);
@@ -3028,7 +3057,8 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
continue;
}
- ret = i915_gem_object_set_to_gtt_domain(obj, 1);
+ ret = i915_gem_object_set_to_gtt_domain(obj, 1,
+ I915_WAITSRC_RELOCATE);
if (ret != 0) {
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
@@ -3151,7 +3181,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
if (time_after_eq(request->emitted_jiffies, recent_enough))
break;
- ret = i915_wait_request(dev, request->seqno);
+ ret = i915_wait_request(dev, request->seqno, I915_WAITSRC_THROTTLE);
if (ret != 0)
break;
}
@@ -3803,7 +3833,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
if (obj_priv->phys_obj)
i915_gem_detach_phys_object(dev, obj);
- i915_gem_object_unbind(obj);
+ i915_gem_object_unbind(obj, I915_WAITSRC_FREE);
i915_gem_free_mmap_offset(obj);
@@ -3832,7 +3862,7 @@ i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
return -EINVAL;
}
- ret = i915_gem_object_unbind(obj);
+ ret = i915_gem_object_unbind(obj, I915_WAITSRC_LEAVEVT);
if (ret != 0) {
DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
ret);
diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c
index 8d0b943..1a141ba 100644
--- a/drivers/gpu/drm/i915/i915_gem_debug.c
+++ b/drivers/gpu/drm/i915/i915_gem_debug.c
@@ -30,6 +30,12 @@
#include "i915_drm.h"
#include "i915_drv.h"
+#ifdef CONFIG_DRM_I915_WAIT_ACCOUNTING
+void i915_wait_end(drm_i915_wait_srcs src, int time) {
+ trace_mark(i915_wait, "source %d time %d", src, time);
+}
+#endif
+
#if WATCH_INACTIVE
void
i915_verify_inactive(struct drm_device *dev, char *file, int line)
diff --git a/drivers/gpu/drm/i915/i915_gem_debugfs.c b/drivers/gpu/drm/i915/i915_gem_debugfs.c
index 28146e4..cb91698 100644
--- a/drivers/gpu/drm/i915/i915_gem_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_gem_debugfs.c
@@ -323,7 +323,6 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data)
return 0;
}
-
static struct drm_info_list i915_gem_debugfs_list[] = {
{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
{"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST},
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 5c1ceec..83cb84f 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -508,9 +508,11 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
* need to ensure that any fence register is cleared.
*/
if (!i915_gem_object_fence_offset_ok(obj, args->tiling_mode))
- ret = i915_gem_object_unbind(obj);
+ ret = i915_gem_object_unbind(obj,
+ I915_WAITSRC_CHANGE_TILING);
else
ret = i915_gem_object_put_fence_reg(obj);
+
if (ret != 0) {
WARN(ret != -ERESTARTSYS,
"failed to reset object for tiling switch");
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3e1c781..b34b976 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -834,7 +834,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
return ret;
}
- ret = i915_gem_object_set_to_gtt_domain(obj, 1);
+ ret = i915_gem_object_set_to_gtt_domain(obj, 1, I915_WAITSRC_FBO);
if (ret != 0) {
i915_gem_object_unpin(obj);
mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 1af7d68..1e64748 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -468,7 +468,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
}
/* Flush everything out, we'll be doing GTT only from now on */
- i915_gem_object_set_to_gtt_domain(fbo, 1);
+ i915_gem_object_set_to_gtt_domain(fbo, 1, I915_WAITSRC_FBO);
ret = intel_framebuffer_create(dev, &mode_cmd, &fb, fbo);
if (ret) {
--
1.6.3.1
More information about the Intel-gfx
mailing list