[Intel-gfx] [PATCH 1/2] drm/i915/gt: Keep a no-frills swappable copy of the default context state

Matthew Auld matthew.auld at intel.com
Wed Apr 29 16:11:38 UTC 2020


On 28/04/2020 22:55, Chris Wilson wrote:
> We need to keep the default context state around to instantiate new
> contexts (aka golden rendercontext), and we also keep it pinned while
> the engine is active so that we can quickly reset a hanging context.
> However, the default contexts are large enough to merit keeping in
> swappable memory as opposed to kernel memory, so we store them inside
> shmemfs. Currently, we use the normal GEM objects to create the default
> context image, but we can throw away all but the shmemfs file.
> 
> This greatly simplifies the tricky power management code which wants to
> run underneath the normal GT locking, and we definitely do not want to
> use any high level objects that may appear to recurse back into the GT.
> Though perhaps the primary advantage of the complex GEM object is that
> we aggressively cache the mapping, but here we are recreating the
> vm_area everytime time we unpark. At the worst, we add a lightweight
> cache, but first find a microbenchmark that is impacted.
> 
> Having started to create some utility functions to make working with
> shmemfs objects easier, we can start putting them to wider use, where
> GEM objects are overkill, such as storing persistent error state.

Is there any point in having the default state in device local-memory, 
and if so does this change the story at all? I'm guessing not...

Anyway, the shmem helper stuff sounds like just what we need.
Reviewed-by: Matthew Auld <matthew.auld at intel.com>

> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Ramalingam C <ramalingam.c at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile                 |   1 +
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  10 +-
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   2 +-
>   drivers/gpu/drm/i915/gt/intel_gt.c            |   4 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  25 +--
>   .../gpu/drm/i915/gt/intel_ring_submission.c   |  16 +-
>   drivers/gpu/drm/i915/gt/selftest_context.c    |   2 +-
>   drivers/gpu/drm/i915/gt/selftest_lrc.c        |  10 +-
>   drivers/gpu/drm/i915/gt/shmem_utils.c         | 176 ++++++++++++++++++
>   drivers/gpu/drm/i915/gt/shmem_utils.h         |  22 +++
>   drivers/gpu/drm/i915/i915_gpu_error.c         |  26 ---
>   12 files changed, 224 insertions(+), 72 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/gt/shmem_utils.c
>   create mode 100644 drivers/gpu/drm/i915/gt/shmem_utils.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 21bb2fb5a6b8..caf00d92ea9d 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -111,6 +111,7 @@ gt-y += \
>   	gt/intel_sseu.o \
>   	gt/intel_timeline.o \
>   	gt/intel_workarounds.o \
> +	gt/shmem_utils.o \
>   	gt/sysfs_engines.o
>   # autogenerated null render state
>   gt-y += \
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 7c3cb5aedfdf..80bf71636c0f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -834,7 +834,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>   	intel_engine_cleanup_cmd_parser(engine);
>   
>   	if (engine->default_state)
> -		i915_gem_object_put(engine->default_state);
> +		fput(engine->default_state);
>   
>   	if (engine->kernel_context) {
>   		intel_context_unpin(engine->kernel_context);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 3be679741d22..446e35ac0224 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -15,6 +15,7 @@
>   #include "intel_gt_pm.h"
>   #include "intel_rc6.h"
>   #include "intel_ring.h"
> +#include "shmem_utils.h"
>   
>   static int __engine_unpark(struct intel_wakeref *wf)
>   {
> @@ -30,10 +31,8 @@ static int __engine_unpark(struct intel_wakeref *wf)
>   	/* Pin the default state for fast resets from atomic context. */
>   	map = NULL;
>   	if (engine->default_state)
> -		map = i915_gem_object_pin_map(engine->default_state,
> -					      I915_MAP_WB);
> -	if (!IS_ERR_OR_NULL(map))
> -		engine->pinned_default_state = map;
> +		map = shmem_pin_map(engine->default_state);
> +	engine->pinned_default_state = map;
>   
>   	/* Discard stale context state from across idling */
>   	ce = engine->kernel_context;
> @@ -264,7 +263,8 @@ static int __engine_park(struct intel_wakeref *wf)
>   		engine->park(engine);
>   
>   	if (engine->pinned_default_state) {
> -		i915_gem_object_unpin_map(engine->default_state);
> +		shmem_unpin_map(engine->default_state,
> +				engine->pinned_default_state);
>   		engine->pinned_default_state = NULL;
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index cfe4feaee982..483d8ff39a0d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -339,7 +339,7 @@ struct intel_engine_cs {
>   
>   	unsigned long wakeref_serial;
>   	struct intel_wakeref wakeref;
> -	struct drm_i915_gem_object *default_state;
> +	struct file *default_state;
>   	void *pinned_default_state;
>   
>   	struct {
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index d9cf8194c997..c3f25a2a5706 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -16,6 +16,7 @@
>   #include "intel_rps.h"
>   #include "intel_uncore.h"
>   #include "intel_pm.h"
> +#include "shmem_utils.h"
>   
>   void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
>   {
> @@ -501,7 +502,8 @@ static int __engines_record_defaults(struct intel_gt *gt)
>   			goto out;
>   		}
>   
> -		rq->engine->default_state = i915_gem_object_get(state->obj);
> +		rq->engine->default_state =
> +			shmem_create_from_object(state->obj);
>   		i915_gem_object_unpin_map(state->obj);
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 90acc3d2440b..7fc4081c34fe 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -147,6 +147,7 @@
>   #include "intel_reset.h"
>   #include "intel_ring.h"
>   #include "intel_workarounds.h"
> +#include "shmem_utils.h"
>   
>   #define RING_EXECLIST_QFULL		(1 << 0x2)
>   #define RING_EXECLIST1_VALID		(1 << 0x3)
> @@ -5083,30 +5084,18 @@ populate_lr_context(struct intel_context *ce,
>   {
>   	bool inhibit = true;
>   	void *vaddr;
> -	int ret;
>   
>   	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
>   	if (IS_ERR(vaddr)) {
> -		ret = PTR_ERR(vaddr);
> -		drm_dbg(&engine->i915->drm,
> -			"Could not map object pages! (%d)\n", ret);
> -		return ret;
> +		drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
> +		return PTR_ERR(vaddr);
>   	}
>   
>   	set_redzone(vaddr, engine);
>   
>   	if (engine->default_state) {
> -		void *defaults;
> -
> -		defaults = i915_gem_object_pin_map(engine->default_state,
> -						   I915_MAP_WB);
> -		if (IS_ERR(defaults)) {
> -			ret = PTR_ERR(defaults);
> -			goto err_unpin_ctx;
> -		}
> -
> -		memcpy(vaddr, defaults, engine->context_size);
> -		i915_gem_object_unpin_map(engine->default_state);
> +		shmem_read(engine->default_state, 0,
> +			   vaddr, engine->context_size);
>   		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
>   		inhibit = false;
>   	}
> @@ -5121,11 +5110,9 @@ populate_lr_context(struct intel_context *ce,
>   	execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
>   				 ce, engine, ring, inhibit);
>   
> -	ret = 0;
> -err_unpin_ctx:
>   	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
>   	i915_gem_object_unpin_map(ctx_obj);
> -	return ret;
> +	return 0;
>   }
>   
>   static int __execlists_context_alloc(struct intel_context *ce,
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index d015f7b8b28e..ca7286e58409 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -42,6 +42,7 @@
>   #include "intel_reset.h"
>   #include "intel_ring.h"
>   #include "intel_workarounds.h"
> +#include "shmem_utils.h"
>   
>   /* Rough estimate of the typical request size, performing a flush,
>    * set-context and then emitting the batch.
> @@ -1241,7 +1242,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
>   		i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
>   
>   	if (engine->default_state) {
> -		void *defaults, *vaddr;
> +		void *vaddr;
>   
>   		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
>   		if (IS_ERR(vaddr)) {
> @@ -1249,15 +1250,8 @@ alloc_context_vma(struct intel_engine_cs *engine)
>   			goto err_obj;
>   		}
>   
> -		defaults = i915_gem_object_pin_map(engine->default_state,
> -						   I915_MAP_WB);
> -		if (IS_ERR(defaults)) {
> -			err = PTR_ERR(defaults);
> -			goto err_map;
> -		}
> -
> -		memcpy(vaddr, defaults, engine->context_size);
> -		i915_gem_object_unpin_map(engine->default_state);
> +		shmem_read(engine->default_state, 0,
> +			   vaddr, engine->context_size);
>   
>   		i915_gem_object_flush_map(obj);
>   		i915_gem_object_unpin_map(obj);
> @@ -1271,8 +1265,6 @@ alloc_context_vma(struct intel_engine_cs *engine)
>   
>   	return vma;
>   
> -err_map:
> -	i915_gem_object_unpin_map(obj);
>   err_obj:
>   	i915_gem_object_put(obj);
>   	return ERR_PTR(err);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
> index e874dfaa5316..b8ed3cbe1277 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_context.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_context.c
> @@ -155,7 +155,7 @@ static int live_context_size(void *arg)
>   
>   	for_each_engine(engine, gt, id) {
>   		struct {
> -			struct drm_i915_gem_object *state;
> +			struct file *state;
>   			void *pinned;
>   		} saved;
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index d3fa91aed7de..d900bbccd5db 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -4452,8 +4452,7 @@ static int live_lrc_layout(void *arg)
>   		if (!engine->default_state)
>   			continue;
>   
> -		hw = i915_gem_object_pin_map(engine->default_state,
> -					     I915_MAP_WB);
> +		hw = shmem_pin_map(engine->default_state);
>   		if (IS_ERR(hw)) {
>   			err = PTR_ERR(hw);
>   			break;
> @@ -4525,7 +4524,7 @@ static int live_lrc_layout(void *arg)
>   			hexdump(lrc, PAGE_SIZE);
>   		}
>   
> -		i915_gem_object_unpin_map(engine->default_state);
> +		shmem_unpin_map(engine->default_state, hw);
>   		if (err)
>   			break;
>   	}
> @@ -4630,8 +4629,7 @@ static int live_lrc_fixed(void *arg)
>   		if (!engine->default_state)
>   			continue;
>   
> -		hw = i915_gem_object_pin_map(engine->default_state,
> -					     I915_MAP_WB);
> +		hw = shmem_pin_map(engine->default_state);
>   		if (IS_ERR(hw)) {
>   			err = PTR_ERR(hw);
>   			break;
> @@ -4652,7 +4650,7 @@ static int live_lrc_fixed(void *arg)
>   			}
>   		}
>   
> -		i915_gem_object_unpin_map(engine->default_state);
> +		shmem_unpin_map(engine->default_state, hw);
>   	}
>   
>   	return err;
> diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
> new file mode 100644
> index 000000000000..8cfdfddba643
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
> @@ -0,0 +1,176 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/pagemap.h>
> +#include <linux/shmem_fs.h>
> +
> +#include "gem/i915_gem_object.h"
> +#include "shmem_utils.h"
> +
> +struct file *shmem_create_from_data(const char *name, void *data, size_t len)
> +{
> +	struct file *file;
> +	unsigned long pfn;
> +	unsigned long sz;
> +	int err;
> +
> +	sz = PAGE_ALIGN(len);
> +	file = shmem_file_setup(name, sz, VM_NORESERVE);
> +	if (IS_ERR(file))
> +		return file;
> +
> +	for (pfn = 0; pfn < sz >> PAGE_SHIFT; pfn++) {
> +		unsigned int this = min_t(size_t, PAGE_SIZE, len);
> +		struct page *page;
> +		void *vaddr;
> +
> +		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> +						   GFP_KERNEL);
> +		if (IS_ERR(page)) {
> +			err = PTR_ERR(page);
> +			goto err_file;
> +		}
> +
> +		vaddr = kmap(page);
> +		memcpy(vaddr, data, this);
> +		kunmap(page);
> +		put_page(page);
> +
> +		len -= this;
> +		data += this;
> +	}
> +
> +	return file;
> +
> +err_file:
> +	fput(file);
> +	return ERR_PTR(err);
> +}
> +
> +struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
> +{
> +	struct file *file;
> +	void *ptr;
> +
> +	if (obj->ops == &i915_gem_shmem_ops) {
> +		file = obj->base.filp;
> +		atomic_long_inc(&file->f_count);
> +		return file;
> +	}
> +
> +	ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +	if (IS_ERR(ptr))
> +		return ERR_CAST(ptr);
> +
> +	file = shmem_create_from_data("", ptr, obj->base.size);
> +	i915_gem_object_unpin_map(obj);
> +
> +	return file;
> +}
> +
> +static size_t shmem_npte(struct file *file)
> +{
> +	return file->f_mapping->host->i_size >> PAGE_SHIFT;
> +}
> +
> +static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
> +{
> +	unsigned long pfn;
> +
> +	vunmap(ptr);
> +
> +	for (pfn = 0; pfn < n_pte; pfn++) {
> +		struct page *page;
> +
> +		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> +						   GFP_KERNEL);
> +		if (!WARN_ON(IS_ERR(page))) {
> +			put_page(page);
> +			put_page(page);
> +		}
> +	}
> +}
> +
> +void *shmem_pin_map(struct file *file)
> +{
> +	const size_t n_pte = shmem_npte(file);
> +	pte_t *stack[32], **ptes, **mem;
> +	struct vm_struct *area;
> +	unsigned long pfn;
> +
> +	mem = stack;
> +	if (n_pte > ARRAY_SIZE(stack)) {
> +		mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
> +		if (!mem)
> +			return NULL;
> +	}
> +
> +	area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
> +	if (!area) {
> +		if (mem != stack)
> +			kvfree(mem);
> +		return NULL;
> +	}
> +
> +	ptes = mem;
> +	for (pfn = 0; pfn < n_pte; pfn++) {
> +		struct page *page;
> +
> +		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> +						   GFP_KERNEL);
> +		if (IS_ERR(page))
> +			goto err_page;
> +
> +		**ptes++ = mk_pte(page,  PAGE_KERNEL);
> +	}
> +
> +	if (mem != stack)
> +		kvfree(mem);
> +
> +	mapping_set_unevictable(file->f_mapping);
> +	return area->addr;
> +
> +err_page:
> +	if (mem != stack)
> +		kvfree(mem);
> +
> +	__shmem_unpin_map(file, area->addr, pfn);
> +	return NULL;
> +}
> +
> +void shmem_unpin_map(struct file *file, void *ptr)
> +{
> +	mapping_clear_unevictable(file->f_mapping);
> +	__shmem_unpin_map(file, ptr, shmem_npte(file));
> +}
> +
> +int shmem_read(struct file *file, loff_t off, void *dst, size_t len)
> +{
> +	unsigned long pfn;
> +
> +	for (pfn = off >> PAGE_SHIFT; len; pfn++) {
> +		unsigned int this =
> +			min_t(size_t, PAGE_SIZE - offset_in_page(off), len);
> +		struct page *page;
> +		void *vaddr;
> +
> +		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> +						   GFP_KERNEL);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +
> +		vaddr = kmap(page);
> +		memcpy(dst, vaddr + offset_in_page(off), this);
> +		kunmap(page);
> +		put_page(page);
> +
> +		len -= this;
> +		dst += this;
> +		off = 0;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h b/drivers/gpu/drm/i915/gt/shmem_utils.h
> new file mode 100644
> index 000000000000..213fae8a943a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/shmem_utils.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +#ifndef SHMEM_UTILS_H
> +#define SHMEM_UTILS_H
> +
> +#include <linux/types.h>
> +
> +struct drm_i915_gem_object;
> +struct file;
> +
> +struct file *shmem_create_from_data(const char *name, void *data, size_t len);
> +struct file *shmem_create_from_object(struct drm_i915_gem_object *obj);
> +
> +void *shmem_pin_map(struct file *file);
> +void shmem_unpin_map(struct file *file, void *ptr);
> +
> +int shmem_read(struct file *file, loff_t off, void *dst, size_t len);
> +
> +#endif /* SHMEM_UTILS_H */
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 4d54dba35302..8506ca43eca6 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1318,26 +1318,6 @@ capture_user(struct intel_engine_capture_vma *capture,
>   	return capture;
>   }
>   
> -static struct i915_vma_coredump *
> -capture_object(const struct intel_gt *gt,
> -	       struct drm_i915_gem_object *obj,
> -	       const char *name,
> -	       struct i915_vma_compress *compress)
> -{
> -	if (obj && i915_gem_object_has_pages(obj)) {
> -		struct i915_vma fake = {
> -			.node = { .start = U64_MAX, .size = obj->base.size },
> -			.size = obj->base.size,
> -			.pages = obj->mm.pages,
> -			.obj = obj,
> -		};
> -
> -		return i915_vma_coredump_create(gt, &fake, name, compress);
> -	} else {
> -		return NULL;
> -	}
> -}
> -
>   static void add_vma(struct intel_engine_coredump *ee,
>   		    struct i915_vma_coredump *vma)
>   {
> @@ -1426,12 +1406,6 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
>   					 engine->wa_ctx.vma,
>   					 "WA context",
>   					 compress));
> -
> -	add_vma(ee,
> -		capture_object(engine->gt,
> -			       engine->default_state,
> -			       "NULL context",
> -			       compress));
>   }
>   
>   static struct intel_engine_coredump *
> 


More information about the Intel-gfx mailing list