[Intel-gfx] [PATCH 1/2] drm/i915: Check for scratch page scribbling
Mika Kuoppala
mika.kuoppala at linux.intel.com
Wed Feb 10 10:49:55 UTC 2021
Chris Wilson <chris at chris-wilson.co.uk> writes:
> Periodically check, for example when idling and upon closing user
> contexts, whether or not some client has written into unallocated PTE in
> their ppGTT.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> .../drm/i915/gem/selftests/i915_gem_context.c | 19 +++++++--
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 31 +-------------
> drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 ++
> drivers/gpu/drm/i915/gt/intel_gtt.c | 42 +++++++++++++++++++
> drivers/gpu/drm/i915/gt/intel_gtt.h | 1 +
> drivers/gpu/drm/i915/i915_scheduler.c | 33 +--------------
> drivers/gpu/drm/i915/i915_utils.c | 29 +++++++++++++
> drivers/gpu/drm/i915/i915_utils.h | 3 ++
> 8 files changed, 98 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index df949320f2b5..b0c349a46e6a 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -1737,7 +1737,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
> return err;
> }
>
> -static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
> +static int check_ctx_scratch(struct i915_gem_context *ctx, u32 *out)
> {
> struct i915_address_space *vm;
> struct page *page;
> @@ -1770,6 +1770,17 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
> return err;
> }
>
> +static void reset_ctx_scratch(struct i915_gem_context *ctx, u32 value)
> +{
> + struct i915_address_space *vm = ctx_vm(ctx);
> + struct page *page = __px_page(vm->scratch[0]);
> + u32 *vaddr;
> +
> + vaddr = kmap(page);
> + memset32(vaddr, value, PAGE_SIZE / sizeof(value));
> + kunmap(page);
> +}
> +
> static int igt_vm_isolation(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> @@ -1816,11 +1827,11 @@ static int igt_vm_isolation(void *arg)
> goto out_file;
>
> /* Read the initial state of the scratch page */
> - err = check_scratch_page(ctx_a, &expected);
> + err = check_ctx_scratch(ctx_a, &expected);
> if (err)
> goto out_file;
>
> - err = check_scratch_page(ctx_b, &expected);
> + err = check_ctx_scratch(ctx_b, &expected);
> if (err)
> goto out_file;
>
> @@ -1876,6 +1887,8 @@ static int igt_vm_isolation(void *arg)
> count, num_engines);
>
> out_file:
> + /* As we deliberately write into scratch, cover up our tracks */
> + reset_ctx_scratch(ctx_a, expected);
> if (igt_live_test_end(&t))
> err = -EIO;
> fput(file);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 577ebd4a324f..8443794df3ee 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1265,35 +1265,6 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
> }
> }
>
> -static void hexdump(struct drm_printer *m, const void *buf, size_t len)
> -{
> - const size_t rowsize = 8 * sizeof(u32);
> - const void *prev = NULL;
> - bool skip = false;
> - size_t pos;
> -
> - for (pos = 0; pos < len; pos += rowsize) {
> - char line[128];
> -
> - if (prev && !memcmp(prev, buf + pos, rowsize)) {
> - if (!skip) {
> - drm_printf(m, "*\n");
> - skip = true;
> - }
> - continue;
> - }
> -
> - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
> - rowsize, sizeof(u32),
> - line, sizeof(line),
> - false) >= sizeof(line));
> - drm_printf(m, "[%04zx] %s\n", pos, line);
> -
> - prev = buf + pos;
> - skip = false;
> - }
> -}
> -
> static void intel_engine_print_registers(struct intel_engine_cs *engine,
> struct drm_printer *m)
> {
> @@ -1450,7 +1421,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> }
>
> drm_printf(m, "HWSP:\n");
> - hexdump(m, engine->status_page.addr, PAGE_SIZE);
> + i915_hexdump(m, engine->status_page.addr, PAGE_SIZE);
>
> drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index 0bd303d2823e..38375a006a99 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -11,6 +11,7 @@
> #include "intel_context.h"
> #include "intel_engine_pm.h"
> #include "intel_gt.h"
> +#include "intel_gtt.h"
> #include "intel_gt_clock_utils.h"
> #include "intel_gt_pm.h"
> #include "intel_gt_requests.h"
> @@ -100,6 +101,9 @@ static int __gt_park(struct intel_wakeref *wf)
> runtime_end(gt);
> intel_gt_park_requests(gt);
>
> + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> + check_scratch_page(gt->vm);
> +
> i915_vma_parked(gt);
> i915_pmu_gt_parked(i915);
> intel_rps_park(>->rps);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
> index d34770ae4c9a..5ac9eb4a3a92 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> @@ -158,10 +158,49 @@ static void poison_scratch_page(struct drm_i915_gem_object *scratch)
>
> vaddr = kmap(page);
> memset(vaddr, val, PAGE_SIZE);
> + set_page_dirty(page); /* keep the poisoned contents */
Should we use locked version in here?
-Mika
> kunmap(page);
> }
> }
>
> +void check_scratch_page(const struct i915_address_space *vm)
> +{
> + struct drm_i915_gem_object *scratch;
> + struct sgt_iter sgt;
> + struct page *page;
> + void *vaddr;
> + u8 val;
> +
> + scratch = vm->scratch[0];
> + if (!scratch || !i915_gem_object_has_struct_page(scratch))
> + return;
> +
> + val = 0;
> + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> + val = POISON_FREE;
> +
> + for_each_sgt_page(page, sgt, scratch->mm.pages) {
> + vaddr = kmap(page);
> + drm_clflush_virt_range(vaddr, PAGE_SIZE);
> + if (memchr_inv(vaddr, val, PAGE_SIZE)) {
> + struct drm_printer p = drm_err_printer(__func__);
> +
> + drm_err(&vm->i915->drm,
> + "%s scratch page overwitten!\n",
> + i915_is_ggtt(vm) ? "Global" : "Per-process");
> + i915_hexdump(&p, vaddr, PAGE_SIZE);
> + vaddr = NULL;
> + }
> + kunmap(page);
> + if (!vaddr)
> + break;
> + }
> +
> + /* Restore the poison, so fresh errors will be detected */
> + if (!vaddr)
> + poison_scratch_page(scratch);
> +}
> +
> int setup_scratch_page(struct i915_address_space *vm)
> {
> unsigned long size;
> @@ -229,6 +268,9 @@ void free_scratch(struct i915_address_space *vm)
> {
> int i;
>
> + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> + check_scratch_page(vm);
> +
> for (i = 0; i <= vm->top; i++)
> i915_gem_object_put(vm->scratch[i]);
> }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 24b5808df16d..a5b312c6485a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -519,6 +519,7 @@ fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count);
> } while (0)
>
> int setup_scratch_page(struct i915_address_space *vm);
> +void check_scratch_page(const struct i915_address_space *vm);
> void free_scratch(struct i915_address_space *vm);
>
> struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz);
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index a8fb787278e6..7241f85c9967 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -1095,35 +1095,6 @@ void i915_request_show_with_schedule(struct drm_printer *m,
> rcu_read_unlock();
> }
>
> -static void hexdump(struct drm_printer *m, const void *buf, size_t len)
> -{
> - const size_t rowsize = 8 * sizeof(u32);
> - const void *prev = NULL;
> - bool skip = false;
> - size_t pos;
> -
> - for (pos = 0; pos < len; pos += rowsize) {
> - char line[128];
> -
> - if (prev && !memcmp(prev, buf + pos, rowsize)) {
> - if (!skip) {
> - drm_printf(m, "*\n");
> - skip = true;
> - }
> - continue;
> - }
> -
> - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
> - rowsize, sizeof(u32),
> - line, sizeof(line),
> - false) >= sizeof(line));
> - drm_printf(m, "[%04zx] %s\n", pos, line);
> -
> - prev = buf + pos;
> - skip = false;
> - }
> -}
> -
> static void
> print_request_ring(struct drm_printer *m, const struct i915_request *rq)
> {
> @@ -1153,7 +1124,7 @@ print_request_ring(struct drm_printer *m, const struct i915_request *rq)
> }
> memcpy(ring + len, vaddr + head, size - len);
>
> - hexdump(m, ring, size);
> + i915_hexdump(m, ring, size);
> kfree(ring);
> }
> }
> @@ -1195,7 +1166,7 @@ void i915_sched_show(struct drm_printer *m,
>
> if (rq->context->lrc_reg_state) {
> drm_printf(m, "Logical Ring Context:\n");
> - hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
> + i915_hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
> }
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c
> index 894de60833ec..432ad0926586 100644
> --- a/drivers/gpu/drm/i915/i915_utils.c
> +++ b/drivers/gpu/drm/i915/i915_utils.c
> @@ -49,6 +49,35 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level,
> }
> }
>
> +void i915_hexdump(struct drm_printer *m, const void *buf, size_t len)
> +{
> + const size_t rowsize = 8 * sizeof(u32);
> + const void *prev = NULL;
> + bool skip = false;
> + size_t pos;
> +
> + for (pos = 0; pos < len; pos += rowsize) {
> + char line[128];
> +
> + if (prev && !memcmp(prev, buf + pos, rowsize)) {
> + if (!skip) {
> + drm_printf(m, "*\n");
> + skip = true;
> + }
> + continue;
> + }
> +
> + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
> + rowsize, sizeof(u32),
> + line, sizeof(line),
> + false) >= sizeof(line));
> + drm_printf(m, "[%04zx] %s\n", pos, line);
> +
> + prev = buf + pos;
> + skip = false;
> + }
> +}
> +
> void add_taint_for_CI(struct drm_i915_private *i915, unsigned int taint)
> {
> __i915_printk(i915, KERN_NOTICE, "CI tainted:%#x by %pS\n",
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index 4618fe8aacb5..c82461d6ae71 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -32,6 +32,7 @@
> #include <linux/workqueue.h>
>
> struct drm_i915_private;
> +struct drm_printer;
> struct timer_list;
>
> #define FDO_BUG_URL "https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs"
> @@ -82,6 +83,8 @@ bool i915_error_injected(void);
> __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
> fmt, ##__VA_ARGS__)
>
> +void i915_hexdump(struct drm_printer *m, const void *buf, size_t len);
> +
> #if defined(GCC_VERSION) && GCC_VERSION >= 70000
> #define add_overflows_t(T, A, B) \
> __builtin_add_overflow_p((A), (B), (T)0)
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list