[Intel-gfx] [RFC 08/13] drm/i915: TDR loose ends
Lister, Ian
ian.lister at intel.com
Mon Dec 16 17:03:18 CET 2013
From b6c143ee1f0b65053186be2602f876cc4fd944ab Mon Sep 17 00:00:00 2001
Message-Id: <b6c143ee1f0b65053186be2602f876cc4fd944ab.1387201899.git.ian.lister at intel.com>
In-Reply-To: <cover.1387201899.git.ian.lister at intel.com>
References: <cover.1387201899.git.ian.lister at intel.com>
From: ian-lister <ian.lister at intel.com>
Date: Tue, 10 Dec 2013 15:01:55 +0000
Subject: [RFC 08/13] drm/i915: TDR loose ends
Added check for HANGCHECK_IDLE in i915_set_reset_status as a request
should not be guilty if the ring is recorded as idle.
Cancel the gpu_error.work item on calling i915_suspend.
Take gpu_error.lock when clearing wedged condition in entervt_ioctl.
Signed-off-by: ian-lister <ian.lister at intel.com>
---
drivers/gpu/drm/i915/i915_dma.c | 2 ++
drivers/gpu/drm/i915/i915_drv.c | 46 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_drv.h | 15 +++++++++++
drivers/gpu/drm/i915/i915_gem.c | 14 +++++++---
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
5 files changed, 74 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e17a5d20..d45e936 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1661,6 +1661,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
acpi_video_register();
}
+ i915_init_watchdog(dev);
+
if (IS_GEN5(dev))
intel_gpu_ips_init(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 38cd27d..42ccfee 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -154,6 +154,11 @@ module_param_named(prefault_disable, i915_prefault_disable, bool, 0600);
MODULE_PARM_DESC(prefault_disable,
"Disable page prefaulting for pread/pwrite/reloc (default:false). For developers only.");
+int i915_enable_watchdog __read_mostly = 1;
+module_param_named(i915_enable_watchdog, i915_enable_watchdog, int, 0644);
+MODULE_PARM_DESC(i915_enable_watchdog,
+ "Enable watchdog timers (default: true)");
+
static struct drm_driver driver;
#if IS_ENABLED(CONFIG_AGP_INTEL)
extern int intel_agp_enabled;
@@ -736,6 +741,47 @@ int i915_resume(struct drm_device *dev)
return 0;
}
+void i915_init_watchdog(struct drm_device *dev)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ struct intel_ring_buffer *ring;
+ int freq;
+ int i;
+ u32 tout;
+
+ /* Based on pre-defined time out value (60ms or 30ms) calculate
+ * timer count thresholds needed based on core frequency.
+ *
+ * For RCS:
+ * The timestamp resolution changed in Gen7 and beyond to 80ns
+ * for all pipes. Before that it was 640ns.
+ */
+ if (INTEL_INFO(dev)->gen >= 7)
+ freq = KM_TIMESTAMP_CNTS_PER_SEC_80NS;
+ else
+ freq = KM_TIMESTAMP_CNTS_PER_SEC_640NS;
+
+ for_each_ring(ring, dev_priv, i) {
+ switch (ring->id) {
+ case RCS:
+ tout = KM_MEDIA_ENGINE_TIMEOUT_VALUE_IN_MS;
+ break;
+ case VCS:
+ tout = KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS;
+ break;
+ default:
+ tout = 0;
+ break;
+ }
+
+ ring->hangcheck.watchdog_threshold =
+ (tout * (freq / KM_TIMER_MILLISECOND));
+
+ DRM_DEBUG_TDR("%s watchdog threshold = %d\n",
+ ring->name, ring->hangcheck.watchdog_threshold);
+ }
+}
+
/**
* i915_reset - reset chip after a hang
* @dev: drm device to reset
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1087e4e..6e379f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1893,6 +1893,19 @@ struct drm_i915_file_private {
#define GT_FREQUENCY_MULTIPLIER 50
+#define KM_MEDIA_ENGINE_TIMEOUT_VALUE_IN_MS 60
+#define KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS 60
+#define KM_TIMER_MILLISECOND 1000
+
+/* Timestamp timer resolution = 0.080 uSec, or 12500000 counts per second*/
+#define KM_TIMESTAMP_CNTS_PER_SEC_80NS 12500000
+
+/* Timestamp timer resolution = 0.640 uSec, or 1562500 counts per second*/
+#define KM_TIMESTAMP_CNTS_PER_SEC_640NS 1562500
+
+#define KM_TIMER_MHZ 1000000
+#define KM_CD_CLK_FREQ (450 * KM_TIMER_MHZ)
+
#include "i915_trace.h"
extern const struct drm_ioctl_desc i915_ioctls[];
@@ -1908,6 +1921,7 @@ extern int i915_vbt_sdvo_panel_type __read_mostly;
extern int i915_enable_rc6 __read_mostly;
extern int i915_enable_fbc __read_mostly;
extern bool i915_enable_hangcheck __read_mostly;
+extern int i915_enable_watchdog __read_mostly;
extern int i915_enable_ppgtt __read_mostly;
extern int i915_enable_psr __read_mostly;
extern unsigned int i915_preliminary_hw_support __read_mostly;
@@ -1942,6 +1956,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
extern int i915_emit_box(struct drm_device *dev,
struct drm_clip_rect *box,
int DR1, int DR4);
+extern void i915_init_watchdog(struct drm_device *dev);
extern int intel_gpu_reset(struct drm_device *dev);
extern int intel_gpu_engine_reset(struct drm_device *dev,
enum intel_ring_id engine);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5b76da4..a762444 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2251,7 +2251,7 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request,
if (i915_head_inside_object(acthd, request->batch_obj,
request_to_vm(request))) {
*inside = true;
- DRM_DEBUG_TDR("Head in obj 0x%08x\n",
+ DRM_DEBUG_TDR("Head in obj %p\n",
request->batch_obj);
return true;
}
@@ -2259,7 +2259,7 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request,
if (i915_head_inside_request(acthd, request->head, request->tail)) {
*inside = false;
- DRM_DEBUG_TDR("Head in req 0x%08x (HD: 0x%08x TL: 0x%08x)\n",
+ DRM_DEBUG_TDR("Head in req %p (HD: 0x%08x TL: 0x%08x)\n",
request, request->head, request->tail);
return true;
}
@@ -2297,8 +2297,9 @@ void i915_set_reset_status(struct intel_ring_buffer *ring,
offset = i915_gem_obj_offset(request->batch_obj,
request_to_vm(request));
- if (ring->hangcheck.action != HANGCHECK_WAIT &&
- i915_request_guilty(request, acthd, &inside)) {
+ if (!((ring->hangcheck.action == HANGCHECK_WAIT)
+ || (ring->hangcheck.action == HANGCHECK_IDLE))
+ && i915_request_guilty(request, acthd, &inside)) {
DRM_ERROR("%s hung %s bo (0x%lx ctx %d) at 0x%x\n",
ring->name,
inside ? "inside" : "flushing",
@@ -4300,6 +4301,7 @@ i915_gem_suspend(struct drm_device *dev)
del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
atomic_set(&dev_priv->gpu_error.active, 0);
+ cancel_work_sync(&dev_priv->gpu_error.work);
cancel_delayed_work_sync(&dev_priv->mm.retire_work);
cancel_delayed_work_sync(&dev_priv->mm.idle_work);
@@ -4527,6 +4529,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_i915_private *dev_priv = dev->dev_private;
+ unsigned long irqflags;
int ret;
if (drm_core_check_feature(dev, DRIVER_MODESET))
@@ -4534,7 +4537,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
if (i915_reset_in_progress(&dev_priv->gpu_error)) {
DRM_ERROR("Reenabling wedged hardware, good luck\n");
+ spin_lock_irqsave(&dev_priv->gpu_error.lock, irqflags);
+ dev_priv->gpu_error.reset_requests = 0;
atomic_set(&dev_priv->gpu_error.reset_counter, 0);
+ spin_unlock_irqrestore(&dev_priv->gpu_error.lock, irqflags);
}
mutex_lock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7235720..c3d6cbb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -69,6 +69,7 @@ struct intel_ring_hangcheck {
u32 total; /* Total resets applied to this ring/engine*/
u32 last_head; /* Head value recorded at last hang */
u32 status_updated;
+ u32 watchdog_threshold;
};
struct intel_ring_buffer {
--
1.8.5.1
More information about the Intel-gfx
mailing list