[PATCH 5/5] drm/i915: Rely on spinlock protection for GPU error capture
Chris Wilson
chris at chris-wilson.co.uk
Tue Jul 9 19:54:14 UTC 2019
Trust that we now have adequate protection over the low level structures
via the engine->active.lock to allow ourselves to capture the GPU error
state without the heavy hammer of stop_machine().
A useful side-effect is that this allows us to restore error capturing
for Braswell and Broxton.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 5 --
drivers/gpu/drm/i915/i915_gpu_error.c | 78 ++++++++++++---------------
2 files changed, 35 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 236c964dd761..e0645ce4fb84 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3135,11 +3135,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
if (ggtt->vm.clear_range != nop_clear_range)
ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
-
- /* Prevent recursively calling stop_machine() and deadlocks. */
- dev_info(dev_priv->drm.dev,
- "Disabling error capture for VT-d workaround\n");
- i915_disable_error_state(dev_priv, -ENODEV);
}
ggtt->invalidate = gen6_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f297a43df1e9..5374e57a56f8 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,7 +30,6 @@
#include <linux/ascii85.h>
#include <linux/nmi.h>
#include <linux/scatterlist.h>
-#include <linux/stop_machine.h>
#include <linux/utsname.h>
#include <linux/zlib.h>
@@ -46,6 +45,8 @@
#include "i915_scatterlist.h"
#include "intel_csr.h"
+#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+
static inline const struct intel_engine_cs *
engine_lookup(const struct drm_i915_private *i915, unsigned int id)
{
@@ -114,7 +115,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
if (e->cur == e->end) {
struct scatterlist *sgl;
- sgl = (typeof(sgl))__get_free_page(GFP_KERNEL);
+ sgl = (typeof(sgl))__get_free_page(ALLOW_FAIL);
if (!sgl) {
e->err = -ENOMEM;
return false;
@@ -134,7 +135,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
}
e->size = ALIGN(len + 1, SZ_64K);
- e->buf = kmalloc(e->size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+ e->buf = kmalloc(e->size, ALLOW_FAIL);
if (!e->buf) {
e->size = PAGE_ALIGN(len + 1);
e->buf = kmalloc(e->size, GFP_KERNEL);
@@ -224,7 +225,7 @@ static bool compress_init(struct compress *c)
zstream->workspace =
kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
- GFP_ATOMIC | __GFP_NOWARN);
+ ALLOW_FAIL);
if (!zstream->workspace)
return false;
@@ -235,7 +236,7 @@ static bool compress_init(struct compress *c)
c->tmp = NULL;
if (i915_has_memcpy_from_wc())
- c->tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+ c->tmp = (void *)__get_free_page(ALLOW_FAIL);
return true;
}
@@ -247,7 +248,7 @@ static void *compress_next_page(struct drm_i915_error_object *dst)
if (dst->page_count >= dst->num_pages)
return ERR_PTR(-ENOSPC);
- page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+ page = __get_free_page(GFP_KERNEL | __GFP_NOWARN);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -349,7 +350,7 @@ static int compress_page(struct compress *c,
unsigned long page;
void *ptr;
- page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+ page = __get_free_page(ALLOW_FAIL);
if (!page)
return -ENOMEM;
@@ -1006,8 +1007,7 @@ i915_error_object_create(struct drm_i915_private *i915,
num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
- dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
- GFP_ATOMIC | __GFP_NOWARN);
+ dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL);
if (!dst)
return NULL;
@@ -1281,7 +1281,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
if (!count)
return;
- ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
+ ee->requests = kcalloc(count, sizeof(*ee->requests), ALLOW_FAIL);
if (!ee->requests)
return;
@@ -1362,11 +1362,11 @@ static void request_record_user_bo(struct i915_request *request,
if (!max)
return;
- bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
+ bo = kmalloc_array(max, sizeof(*bo), ALLOW_FAIL);
if (!bo) {
/* If we can't capture everything, try to capture something. */
max = min_t(long, max, PAGE_SIZE / sizeof(*bo));
- bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
+ bo = kmalloc_array(max, sizeof(*bo), ALLOW_FAIL);
}
if (!bo)
return;
@@ -1491,7 +1491,7 @@ static void gem_capture_vm(struct i915_gpu_state *error,
active_bo = NULL;
if (count)
- active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
+ active_bo = kcalloc(count, sizeof(*active_bo), ALLOW_FAIL);
if (active_bo)
count = capture_error_bo(active_bo,
count, &vm->bound_list,
@@ -1541,7 +1541,7 @@ static void capture_pinned_buffers(struct i915_gpu_state *error)
bo = NULL;
if (count)
- bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
+ bo = kcalloc(count, sizeof(*bo), ALLOW_FAIL);
if (!bo)
return;
@@ -1566,8 +1566,8 @@ static void capture_uc_state(struct i915_gpu_state *error)
* As modparams are generally accesible from the userspace make
* explicit copies of the firmware paths.
*/
- error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC);
- error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC);
+ error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, ALLOW_FAIL);
+ error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, ALLOW_FAIL);
error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma);
}
@@ -1752,9 +1752,26 @@ static void capture_finish(struct i915_gpu_state *error)
ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
}
-static int capture(void *data)
+#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
+
+struct i915_gpu_state *
+i915_capture_gpu_state(struct drm_i915_private *i915)
{
- struct i915_gpu_state *error = data;
+ struct i915_gpu_state *error;
+
+ /* Check if GPU capture has been disabled */
+ error = READ_ONCE(i915->gpu_error.first_error);
+ if (IS_ERR(error))
+ return error;
+
+ error = kzalloc(sizeof(*error), GFP_KERNEL | __GFP_NOWARN);
+ if (!error) {
+ i915_disable_error_state(i915, -ENOMEM);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ kref_init(&error->ref);
+ error->i915 = i915;
error->time = ktime_get_real();
error->boottime = ktime_get_boottime();
@@ -1777,31 +1794,6 @@ static int capture(void *data)
error->epoch = capture_find_epoch(error);
capture_finish(error);
- return 0;
-}
-
-#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
-
-struct i915_gpu_state *
-i915_capture_gpu_state(struct drm_i915_private *i915)
-{
- struct i915_gpu_state *error;
-
- /* Check if GPU capture has been disabled */
- error = READ_ONCE(i915->gpu_error.first_error);
- if (IS_ERR(error))
- return error;
-
- error = kzalloc(sizeof(*error), GFP_ATOMIC);
- if (!error) {
- i915_disable_error_state(i915, -ENOMEM);
- return ERR_PTR(-ENOMEM);
- }
-
- kref_init(&error->ref);
- error->i915 = i915;
-
- stop_machine(capture, error, NULL);
return error;
}
--
2.22.0
More information about the Intel-gfx-trybot
mailing list