[Intel-gfx] [PATCH 1/2] drm/i915/gt: Keep a no-frills swappable copy of the default context state
Matthew Auld
matthew.auld at intel.com
Wed Apr 29 16:11:38 UTC 2020
On 28/04/2020 22:55, Chris Wilson wrote:
> We need to keep the default context state around to instantiate new
> contexts (aka golden rendercontext), and we also keep it pinned while
> the engine is active so that we can quickly reset a hanging context.
> However, the default contexts are large enough to merit keeping in
> swappable memory as opposed to kernel memory, so we store them inside
> shmemfs. Currently, we use the normal GEM objects to create the default
> context image, but we can throw away all but the shmemfs file.
>
> This greatly simplifies the tricky power management code which wants to
> run underneath the normal GT locking, and we definitely do not want to
> use any high level objects that may appear to recurse back into the GT.
> Though perhaps the primary advantage of the complex GEM object is that
> we aggressively cache the mapping, but here we are recreating the
> vm_area everytime time we unpark. At the worst, we add a lightweight
> cache, but first find a microbenchmark that is impacted.
>
> Having started to create some utility functions to make working with
> shmemfs objects easier, we can start putting them to wider use, where
> GEM objects are overkill, such as storing persistent error state.
Is there any point in having the default state in device local-memory,
and if so does this change the story at all? I'm guessing not...
Anyway, the shmem helper stuff sounds like just what we need.
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Ramalingam C <ramalingam.c at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/Makefile | 1 +
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +-
> drivers/gpu/drm/i915/gt/intel_engine_pm.c | 10 +-
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +-
> drivers/gpu/drm/i915/gt/intel_gt.c | 4 +-
> drivers/gpu/drm/i915/gt/intel_lrc.c | 25 +--
> .../gpu/drm/i915/gt/intel_ring_submission.c | 16 +-
> drivers/gpu/drm/i915/gt/selftest_context.c | 2 +-
> drivers/gpu/drm/i915/gt/selftest_lrc.c | 10 +-
> drivers/gpu/drm/i915/gt/shmem_utils.c | 176 ++++++++++++++++++
> drivers/gpu/drm/i915/gt/shmem_utils.h | 22 +++
> drivers/gpu/drm/i915/i915_gpu_error.c | 26 ---
> 12 files changed, 224 insertions(+), 72 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/gt/shmem_utils.c
> create mode 100644 drivers/gpu/drm/i915/gt/shmem_utils.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 21bb2fb5a6b8..caf00d92ea9d 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -111,6 +111,7 @@ gt-y += \
> gt/intel_sseu.o \
> gt/intel_timeline.o \
> gt/intel_workarounds.o \
> + gt/shmem_utils.o \
> gt/sysfs_engines.o
> # autogenerated null render state
> gt-y += \
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 7c3cb5aedfdf..80bf71636c0f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -834,7 +834,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
> intel_engine_cleanup_cmd_parser(engine);
>
> if (engine->default_state)
> - i915_gem_object_put(engine->default_state);
> + fput(engine->default_state);
>
> if (engine->kernel_context) {
> intel_context_unpin(engine->kernel_context);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 3be679741d22..446e35ac0224 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -15,6 +15,7 @@
> #include "intel_gt_pm.h"
> #include "intel_rc6.h"
> #include "intel_ring.h"
> +#include "shmem_utils.h"
>
> static int __engine_unpark(struct intel_wakeref *wf)
> {
> @@ -30,10 +31,8 @@ static int __engine_unpark(struct intel_wakeref *wf)
> /* Pin the default state for fast resets from atomic context. */
> map = NULL;
> if (engine->default_state)
> - map = i915_gem_object_pin_map(engine->default_state,
> - I915_MAP_WB);
> - if (!IS_ERR_OR_NULL(map))
> - engine->pinned_default_state = map;
> + map = shmem_pin_map(engine->default_state);
> + engine->pinned_default_state = map;
>
> /* Discard stale context state from across idling */
> ce = engine->kernel_context;
> @@ -264,7 +263,8 @@ static int __engine_park(struct intel_wakeref *wf)
> engine->park(engine);
>
> if (engine->pinned_default_state) {
> - i915_gem_object_unpin_map(engine->default_state);
> + shmem_unpin_map(engine->default_state,
> + engine->pinned_default_state);
> engine->pinned_default_state = NULL;
> }
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index cfe4feaee982..483d8ff39a0d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -339,7 +339,7 @@ struct intel_engine_cs {
>
> unsigned long wakeref_serial;
> struct intel_wakeref wakeref;
> - struct drm_i915_gem_object *default_state;
> + struct file *default_state;
> void *pinned_default_state;
>
> struct {
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index d9cf8194c997..c3f25a2a5706 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -16,6 +16,7 @@
> #include "intel_rps.h"
> #include "intel_uncore.h"
> #include "intel_pm.h"
> +#include "shmem_utils.h"
>
> void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
> {
> @@ -501,7 +502,8 @@ static int __engines_record_defaults(struct intel_gt *gt)
> goto out;
> }
>
> - rq->engine->default_state = i915_gem_object_get(state->obj);
> + rq->engine->default_state =
> + shmem_create_from_object(state->obj);
> i915_gem_object_unpin_map(state->obj);
> }
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 90acc3d2440b..7fc4081c34fe 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -147,6 +147,7 @@
> #include "intel_reset.h"
> #include "intel_ring.h"
> #include "intel_workarounds.h"
> +#include "shmem_utils.h"
>
> #define RING_EXECLIST_QFULL (1 << 0x2)
> #define RING_EXECLIST1_VALID (1 << 0x3)
> @@ -5083,30 +5084,18 @@ populate_lr_context(struct intel_context *ce,
> {
> bool inhibit = true;
> void *vaddr;
> - int ret;
>
> vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
> if (IS_ERR(vaddr)) {
> - ret = PTR_ERR(vaddr);
> - drm_dbg(&engine->i915->drm,
> - "Could not map object pages! (%d)\n", ret);
> - return ret;
> + drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
> + return PTR_ERR(vaddr);
> }
>
> set_redzone(vaddr, engine);
>
> if (engine->default_state) {
> - void *defaults;
> -
> - defaults = i915_gem_object_pin_map(engine->default_state,
> - I915_MAP_WB);
> - if (IS_ERR(defaults)) {
> - ret = PTR_ERR(defaults);
> - goto err_unpin_ctx;
> - }
> -
> - memcpy(vaddr, defaults, engine->context_size);
> - i915_gem_object_unpin_map(engine->default_state);
> + shmem_read(engine->default_state, 0,
> + vaddr, engine->context_size);
> __set_bit(CONTEXT_VALID_BIT, &ce->flags);
> inhibit = false;
> }
> @@ -5121,11 +5110,9 @@ populate_lr_context(struct intel_context *ce,
> execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
> ce, engine, ring, inhibit);
>
> - ret = 0;
> -err_unpin_ctx:
> __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
> i915_gem_object_unpin_map(ctx_obj);
> - return ret;
> + return 0;
> }
>
> static int __execlists_context_alloc(struct intel_context *ce,
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index d015f7b8b28e..ca7286e58409 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -42,6 +42,7 @@
> #include "intel_reset.h"
> #include "intel_ring.h"
> #include "intel_workarounds.h"
> +#include "shmem_utils.h"
>
> /* Rough estimate of the typical request size, performing a flush,
> * set-context and then emitting the batch.
> @@ -1241,7 +1242,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
> i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
>
> if (engine->default_state) {
> - void *defaults, *vaddr;
> + void *vaddr;
>
> vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> if (IS_ERR(vaddr)) {
> @@ -1249,15 +1250,8 @@ alloc_context_vma(struct intel_engine_cs *engine)
> goto err_obj;
> }
>
> - defaults = i915_gem_object_pin_map(engine->default_state,
> - I915_MAP_WB);
> - if (IS_ERR(defaults)) {
> - err = PTR_ERR(defaults);
> - goto err_map;
> - }
> -
> - memcpy(vaddr, defaults, engine->context_size);
> - i915_gem_object_unpin_map(engine->default_state);
> + shmem_read(engine->default_state, 0,
> + vaddr, engine->context_size);
>
> i915_gem_object_flush_map(obj);
> i915_gem_object_unpin_map(obj);
> @@ -1271,8 +1265,6 @@ alloc_context_vma(struct intel_engine_cs *engine)
>
> return vma;
>
> -err_map:
> - i915_gem_object_unpin_map(obj);
> err_obj:
> i915_gem_object_put(obj);
> return ERR_PTR(err);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
> index e874dfaa5316..b8ed3cbe1277 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_context.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_context.c
> @@ -155,7 +155,7 @@ static int live_context_size(void *arg)
>
> for_each_engine(engine, gt, id) {
> struct {
> - struct drm_i915_gem_object *state;
> + struct file *state;
> void *pinned;
> } saved;
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index d3fa91aed7de..d900bbccd5db 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -4452,8 +4452,7 @@ static int live_lrc_layout(void *arg)
> if (!engine->default_state)
> continue;
>
> - hw = i915_gem_object_pin_map(engine->default_state,
> - I915_MAP_WB);
> + hw = shmem_pin_map(engine->default_state);
> if (IS_ERR(hw)) {
> err = PTR_ERR(hw);
> break;
> @@ -4525,7 +4524,7 @@ static int live_lrc_layout(void *arg)
> hexdump(lrc, PAGE_SIZE);
> }
>
> - i915_gem_object_unpin_map(engine->default_state);
> + shmem_unpin_map(engine->default_state, hw);
> if (err)
> break;
> }
> @@ -4630,8 +4629,7 @@ static int live_lrc_fixed(void *arg)
> if (!engine->default_state)
> continue;
>
> - hw = i915_gem_object_pin_map(engine->default_state,
> - I915_MAP_WB);
> + hw = shmem_pin_map(engine->default_state);
> if (IS_ERR(hw)) {
> err = PTR_ERR(hw);
> break;
> @@ -4652,7 +4650,7 @@ static int live_lrc_fixed(void *arg)
> }
> }
>
> - i915_gem_object_unpin_map(engine->default_state);
> + shmem_unpin_map(engine->default_state, hw);
> }
>
> return err;
> diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
> new file mode 100644
> index 000000000000..8cfdfddba643
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
> @@ -0,0 +1,176 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/pagemap.h>
> +#include <linux/shmem_fs.h>
> +
> +#include "gem/i915_gem_object.h"
> +#include "shmem_utils.h"
> +
> +struct file *shmem_create_from_data(const char *name, void *data, size_t len)
> +{
> + struct file *file;
> + unsigned long pfn;
> + unsigned long sz;
> + int err;
> +
> + sz = PAGE_ALIGN(len);
> + file = shmem_file_setup(name, sz, VM_NORESERVE);
> + if (IS_ERR(file))
> + return file;
> +
> + for (pfn = 0; pfn < sz >> PAGE_SHIFT; pfn++) {
> + unsigned int this = min_t(size_t, PAGE_SIZE, len);
> + struct page *page;
> + void *vaddr;
> +
> + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> + GFP_KERNEL);
> + if (IS_ERR(page)) {
> + err = PTR_ERR(page);
> + goto err_file;
> + }
> +
> + vaddr = kmap(page);
> + memcpy(vaddr, data, this);
> + kunmap(page);
> + put_page(page);
> +
> + len -= this;
> + data += this;
> + }
> +
> + return file;
> +
> +err_file:
> + fput(file);
> + return ERR_PTR(err);
> +}
> +
> +struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
> +{
> + struct file *file;
> + void *ptr;
> +
> + if (obj->ops == &i915_gem_shmem_ops) {
> + file = obj->base.filp;
> + atomic_long_inc(&file->f_count);
> + return file;
> + }
> +
> + ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> + if (IS_ERR(ptr))
> + return ERR_CAST(ptr);
> +
> + file = shmem_create_from_data("", ptr, obj->base.size);
> + i915_gem_object_unpin_map(obj);
> +
> + return file;
> +}
> +
> +static size_t shmem_npte(struct file *file)
> +{
> + return file->f_mapping->host->i_size >> PAGE_SHIFT;
> +}
> +
> +static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
> +{
> + unsigned long pfn;
> +
> + vunmap(ptr);
> +
> + for (pfn = 0; pfn < n_pte; pfn++) {
> + struct page *page;
> +
> + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> + GFP_KERNEL);
> + if (!WARN_ON(IS_ERR(page))) {
> + put_page(page);
> + put_page(page);
> + }
> + }
> +}
> +
> +void *shmem_pin_map(struct file *file)
> +{
> + const size_t n_pte = shmem_npte(file);
> + pte_t *stack[32], **ptes, **mem;
> + struct vm_struct *area;
> + unsigned long pfn;
> +
> + mem = stack;
> + if (n_pte > ARRAY_SIZE(stack)) {
> + mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
> + if (!mem)
> + return NULL;
> + }
> +
> + area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
> + if (!area) {
> + if (mem != stack)
> + kvfree(mem);
> + return NULL;
> + }
> +
> + ptes = mem;
> + for (pfn = 0; pfn < n_pte; pfn++) {
> + struct page *page;
> +
> + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> + GFP_KERNEL);
> + if (IS_ERR(page))
> + goto err_page;
> +
> + **ptes++ = mk_pte(page, PAGE_KERNEL);
> + }
> +
> + if (mem != stack)
> + kvfree(mem);
> +
> + mapping_set_unevictable(file->f_mapping);
> + return area->addr;
> +
> +err_page:
> + if (mem != stack)
> + kvfree(mem);
> +
> + __shmem_unpin_map(file, area->addr, pfn);
> + return NULL;
> +}
> +
> +void shmem_unpin_map(struct file *file, void *ptr)
> +{
> + mapping_clear_unevictable(file->f_mapping);
> + __shmem_unpin_map(file, ptr, shmem_npte(file));
> +}
> +
> +int shmem_read(struct file *file, loff_t off, void *dst, size_t len)
> +{
> + unsigned long pfn;
> +
> + for (pfn = off >> PAGE_SHIFT; len; pfn++) {
> + unsigned int this =
> + min_t(size_t, PAGE_SIZE - offset_in_page(off), len);
> + struct page *page;
> + void *vaddr;
> +
> + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> + GFP_KERNEL);
> + if (IS_ERR(page))
> + return PTR_ERR(page);
> +
> + vaddr = kmap(page);
> + memcpy(dst, vaddr + offset_in_page(off), this);
> + kunmap(page);
> + put_page(page);
> +
> + len -= this;
> + dst += this;
> + off = 0;
> + }
> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h b/drivers/gpu/drm/i915/gt/shmem_utils.h
> new file mode 100644
> index 000000000000..213fae8a943a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/shmem_utils.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +#ifndef SHMEM_UTILS_H
> +#define SHMEM_UTILS_H
> +
> +#include <linux/types.h>
> +
> +struct drm_i915_gem_object;
> +struct file;
> +
> +struct file *shmem_create_from_data(const char *name, void *data, size_t len);
> +struct file *shmem_create_from_object(struct drm_i915_gem_object *obj);
> +
> +void *shmem_pin_map(struct file *file);
> +void shmem_unpin_map(struct file *file, void *ptr);
> +
> +int shmem_read(struct file *file, loff_t off, void *dst, size_t len);
> +
> +#endif /* SHMEM_UTILS_H */
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 4d54dba35302..8506ca43eca6 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1318,26 +1318,6 @@ capture_user(struct intel_engine_capture_vma *capture,
> return capture;
> }
>
> -static struct i915_vma_coredump *
> -capture_object(const struct intel_gt *gt,
> - struct drm_i915_gem_object *obj,
> - const char *name,
> - struct i915_vma_compress *compress)
> -{
> - if (obj && i915_gem_object_has_pages(obj)) {
> - struct i915_vma fake = {
> - .node = { .start = U64_MAX, .size = obj->base.size },
> - .size = obj->base.size,
> - .pages = obj->mm.pages,
> - .obj = obj,
> - };
> -
> - return i915_vma_coredump_create(gt, &fake, name, compress);
> - } else {
> - return NULL;
> - }
> -}
> -
> static void add_vma(struct intel_engine_coredump *ee,
> struct i915_vma_coredump *vma)
> {
> @@ -1426,12 +1406,6 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
> engine->wa_ctx.vma,
> "WA context",
> compress));
> -
> - add_vma(ee,
> - capture_object(engine->gt,
> - engine->default_state,
> - "NULL context",
> - compress));
> }
>
> static struct intel_engine_coredump *
>
More information about the Intel-gfx
mailing list