[Intel-gfx] [PATCH 2/2] drm/i915: Exclude low pages (128KiB) of stolen from use
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Oct 20 08:11:59 UTC 2020
Chris Wilson <chris at chris-wilson.co.uk> writes:
> The GPU is trashing the low pages of its reserved memory upon reset. If
> we are using this memory for ringbuffers, then we will dutiful resubmit
> the trashed rings after the reset causing further resets, and worse. We
> must exclude this range from our own use. The value of 128KiB was found
> by empirical measurement (and verified now with a selftest) on gen9.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: stable at vger.kernel.org
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/Kconfig.debug | 1 +
> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 6 +-
> drivers/gpu/drm/i915/gem/i915_gem_stolen.h | 2 +
> drivers/gpu/drm/i915/gt/selftest_reset.c | 196 +++++++++++++++++++++
> 4 files changed, 203 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
> index 206882e154bc..0fb7fd0ef717 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -162,6 +162,7 @@ config DRM_I915_SELFTEST
> select DRM_EXPORT_FOR_TESTS if m
> select FAULT_INJECTION
> select PRIME_NUMBERS
> + select CRC32
> help
> Choose this option to allow the driver to perform selftests upon
> loading; also requires the i915.selftest=1 module parameter. To
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index 3954ec9981f0..4f923b8c43fb 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -53,8 +53,10 @@ int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
> struct drm_mm_node *node, u64 size,
> unsigned alignment)
> {
> - return i915_gem_stolen_insert_node_in_range(i915, node, size,
> - alignment, 0, U64_MAX);
> + return i915_gem_stolen_insert_node_in_range(i915, node,
> + size, alignment,
> + I915_GEM_STOLEN_BIAS,
> + U64_MAX);
> }
>
> void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
> index e15c0adad8af..61e028063f9f 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
> @@ -30,4 +30,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
> resource_size_t stolen_offset,
> resource_size_t size);
>
> +#define I915_GEM_STOLEN_BIAS SZ_128K
> +
> #endif /* __I915_GEM_STOLEN_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
> index 35406ecdf0b2..ef5aeebbeeb0 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_reset.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
> @@ -3,9 +3,203 @@
> * Copyright © 2018 Intel Corporation
> */
>
> +#include <linux/crc32.h>
> +
> +#include "gem/i915_gem_stolen.h"
> +
> +#include "i915_memcpy.h"
> #include "i915_selftest.h"
> #include "selftests/igt_reset.h"
> #include "selftests/igt_atomic.h"
> +#include "selftests/igt_spinner.h"
> +
> +static int
> +__igt_reset_stolen(struct intel_gt *gt,
> + intel_engine_mask_t mask,
> + const char *msg)
> +{
> + struct i915_ggtt *ggtt = >->i915->ggtt;
> + const struct resource *dsm = >->i915->dsm;
> + resource_size_t num_pages, page;
> + struct intel_engine_cs *engine;
> + intel_wakeref_t wakeref;
> + enum intel_engine_id id;
> + struct igt_spinner spin;
> + long max, count;
> + void *tmp;
> + u32 *crc;
> + int err;
> +
> + if (!drm_mm_node_allocated(&ggtt->error_capture))
> + return 0;
> +
> + num_pages = resource_size(dsm) >> PAGE_SHIFT;
> + if (!num_pages)
> + return 0;
> +
> + crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL);
> + if (!crc)
> + return -ENOMEM;
> +
> + tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
> + if (!tmp) {
> + err = -ENOMEM;
> + goto err_crc;
> + }
> +
> + igt_global_reset_lock(gt);
> + wakeref = intel_runtime_pm_get(gt->uncore->rpm);
> +
> + err = igt_spinner_init(&spin, gt);
> + if (err)
> + goto err_lock;
> +
> + for_each_engine(engine, gt, id) {
> + struct intel_context *ce;
> + struct i915_request *rq;
> +
> + if (!(mask & engine->mask))
> + continue;
> +
> + if (!intel_engine_can_store_dword(engine))
> + continue;
> +
> + ce = intel_context_create(engine);
> + if (IS_ERR(ce)) {
> + err = PTR_ERR(ce);
> + goto err_spin;
> + }
> + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
> + intel_context_put(ce);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + goto err_spin;
> + }
> + i915_request_add(rq);
> + }
> +
> + for (page = 0; page < num_pages; page++) {
> + dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
> + void __iomem *s;
> + void *in;
> +
> + ggtt->vm.insert_page(&ggtt->vm, dma,
> + ggtt->error_capture.start,
> + I915_CACHE_NONE, 0);
> + mb();
> +
> + s = io_mapping_map_wc(&ggtt->iomap,
> + ggtt->error_capture.start,
> + PAGE_SIZE);
> +
> + if (!__drm_mm_interval_first(>->i915->mm.stolen,
> + page << PAGE_SHIFT,
> + ((page + 1) << PAGE_SHIFT) - 1))
> + memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
> +
> + in = s;
> + if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
> + in = tmp;
> + crc[page] = crc32_le(0, in, PAGE_SIZE);
> +
> + io_mapping_unmap(s);
> + }
> + mb();
> + ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
> +
> + if (mask == ALL_ENGINES) {
> + intel_gt_reset(gt, mask, NULL);
> + } else {
> + for_each_engine(engine, gt, id) {
> + if (mask & engine->mask)
> + intel_engine_reset(engine, NULL);
> + }
> + }
> +
> + max = -1;
> + count = 0;
> + for (page = 0; page < num_pages; page++) {
> + dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
> + void __iomem *s;
> + void *in;
> + u32 x;
> +
> + ggtt->vm.insert_page(&ggtt->vm, dma,
> + ggtt->error_capture.start,
> + I915_CACHE_NONE, 0);
> + mb();
> +
> + s = io_mapping_map_wc(&ggtt->iomap,
> + ggtt->error_capture.start,
> + PAGE_SIZE);
> +
> + in = s;
> + if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
> + in = tmp;
> + x = crc32_le(0, in, PAGE_SIZE);
> +
> + if (x != crc[page] &&
> + !__drm_mm_interval_first(>->i915->mm.stolen,
> + page << PAGE_SHIFT,
> + ((page + 1) << PAGE_SHIFT) - 1)) {
> + pr_debug("unused stolen page %pa modified by GPU reset\n",
> + &page);
> + if (count++ == 0)
> + igt_hexdump(in, PAGE_SIZE);
> + max = page;
> + }
> +
> + io_mapping_unmap(s);
> + }
> + mb();
> + ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
> +
> + if (count > 0) {
> + pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n",
> + msg, count, max);
> + }
> + if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) {
> + pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n",
> + msg, I915_GEM_STOLEN_BIAS);
> + err = -EINVAL;
> + }
> +
> +err_spin:
> + igt_spinner_fini(&spin);
> +
> +err_lock:
> + intel_runtime_pm_put(gt->uncore->rpm, wakeref);
> + igt_global_reset_unlock(gt);
> +
> + kfree(tmp);
> +err_crc:
> + kfree(crc);
> + return err;
> +}
> +
> +static int igt_reset_device_stolen(void *arg)
> +{
> + return __igt_reset_stolen(arg, ALL_ENGINES, "device");
> +}
> +
> +static int igt_reset_engines_stolen(void *arg)
> +{
> + struct intel_gt *gt = arg;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + int err;
> +
> + if (!intel_has_reset_engine(gt))
> + return 0;
> +
> + for_each_engine(engine, gt, id) {
> + err = __igt_reset_stolen(gt, engine->mask, engine->name);
> + if (err)
> + return err;
> + }
> +
> + return 0;
> +}
>
> static int igt_global_reset(void *arg)
> {
> @@ -164,6 +358,8 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
> {
> static const struct i915_subtest tests[] = {
> SUBTEST(igt_global_reset), /* attempt to recover GPU first */
> + SUBTEST(igt_reset_device_stolen),
> + SUBTEST(igt_reset_engines_stolen),
> SUBTEST(igt_wedged_reset),
> SUBTEST(igt_atomic_reset),
> SUBTEST(igt_atomic_engine_reset),
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list