[Intel-gfx] [PATCH] drm/i915: Fix a bug calling sleep function in atomic context

Mon Nov 18 15:59:24 UTC 2019

On Wed, 13 Nov 2019, Bruce Chang <yu.bruce.chang at intel.com> wrote:
> below is the call trace when this issue is hit
>
> <3> [113.316247] BUG: sleeping function called from invalid context at mm/page_alloc.c:4653
> <3> [113.318190] in_atomic(): 1, irqs_disabled(): 0, pid: 678, name: debugfs_test
> <4> [113.319900] no locks held by debugfs_test/678.
> <3> [113.321002] Preemption disabled at:
> <4> [113.321130] [<ffffffffa02506d4>] i915_error_object_create+0x494/0x610 [i915]
> <4> [113.327259] Call Trace:
> <4> [113.327871] dump_stack+0x67/0x9b
> <4> [113.328683] ___might_sleep+0x167/0x250
> <4> [113.329618] __alloc_pages_nodemask+0x26b/0x1110
> <4> [113.330731] ? ___slab_alloc.constprop.34+0x21c/0x380
> <4> [113.331943] ? ___slab_alloc.constprop.34+0x21c/0x380
> <4> [113.333169] ? __slab_alloc.isra.28.constprop.33+0x4d/0x70
> <4> [113.334614] pool_alloc.constprop.19+0x14/0x60 [i915]
> <4> [113.335951] compress_page+0x7c/0x100 [i915]
> <4> [113.337110] i915_error_object_create+0x4bd/0x610 [i915]
> <4> [113.338515] i915_capture_gpu_state+0x384/0x1680 [i915]
> <4> [113.339771] ? __lock_acquire+0x4ac/0x1e90
> <4> [113.340785] ? _raw_spin_lock_irqsave_nested+0x1/0x50
> <4> [113.342127] i915_gpu_info_open+0x44/0x70 [i915]
> <4> [113.343243] full_proxy_open+0x139/0x1b0
> <4> [113.344196] ? open_proxy_open+0xc0/0xc0
> <4> [113.345149] do_dentry_open+0x1ce/0x3a0
> <4> [113.346084] path_openat+0x4c9/0xac0
> <4> [113.346967] do_filp_open+0x96/0x110
> <4> [113.347848] ? __alloc_fd+0xe0/0x1f0
> <4> [113.348736] ? do_sys_open+0x1b8/0x250
> <4> [113.349647] do_sys_open+0x1b8/0x250
> <4> [113.350526] do_syscall_64+0x55/0x1c0
> <4> [113.351418] entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> After the io_mapping_map_atomic_wc/kmap_atomic, the kernel enters atomic context
> but after that, compress_page calls pool_alloc with GFP_KERNEL flag which can
> potentially go to sleep. When the kernel is in atomic context, sleeping is not
> allowed. This is why this bug got triggered.
>
> In order to fix this issue, we either
> 	1) not enter into atomic context, i.e., to use non atomic version of
> 		functions like io_mapping_map_wc/kmap,
> or
> 	2) make compress_page run in atomic context.
>
> But it is not a good idea to run slow compression in atomic context, so,
> 1) above is preferred solution which is the implementation of this patch.
>
> Signed-off-by: Bruce Chang <yu.bruce.chang at intel.com>
> Reviewed-by: Brian Welty <brian.welty at intel.com>
> Fixes: 895d8ebeaa924 ("drm/i915: error capture with no ggtt slot")
> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 1f2f266f26af..7118ecb7f144 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1007,67 +1007,67 @@ i915_error_object_create(struct drm_i915_private *i915,
>  	compress->wc = i915_gem_object_is_lmem(vma->obj) ||
>  		       drm_mm_node_allocated(&ggtt->error_capture);
>  
>  	ret = -EINVAL;
>  	if (drm_mm_node_allocated(&ggtt->error_capture)) {
>  		void __iomem *s;
>  		dma_addr_t dma;
>  
>  		for_each_sgt_daddr(dma, iter, vma->pages) {
>  			ggtt->vm.insert_page(&ggtt->vm, dma, slot,
>  					     I915_CACHE_NONE, 0);
>  
>  			s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE);
>  			ret = compress_page(compress, (void  __force *)s, dst);
>  			io_mapping_unmap(s);
>  			if (ret)
>  				break;
>  		}
>  	} else if (i915_gem_object_is_lmem(vma->obj)) {
>  		struct intel_memory_region *mem = vma->obj->mm.region;
>  		dma_addr_t dma;
>  
>  		for_each_sgt_daddr(dma, iter, vma->pages) {
>  			void __iomem *s;
>  
> -			s = io_mapping_map_atomic_wc(&mem->iomap, dma);
> +			s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE);
>  			ret = compress_page(compress, (void __force *)s, dst);
> -			io_mapping_unmap_atomic(s);
> +			io_mapping_unmap(s);
>  			if (ret)
>  				break;
>  		}
>  	} else {
>  		struct page *page;
>  
>  		for_each_sgt_page(page, iter, vma->pages) {
>  			void *s;
>  
>  			drm_clflush_pages(&page, 1);
>  
> -			s = kmap_atomic(page);
> +			s = kmap(page);
>  			ret = compress_page(compress, s, dst);
> -			kunmap_atomic(s);
> +			kunmap(s);
>  
>  			drm_clflush_pages(&page, 1);
>  
>  			if (ret)
>  				break;
>  		}
>  	}
>  
>  	if (ret || compress_flush(compress, dst)) {
>  		while (dst->page_count--)
>  			pool_free(&compress->pool, dst->pages[dst->page_count]);
>  		kfree(dst);
>  		dst = NULL;
>  	}
>  	compress_finish(compress);
>  
>  	return dst;
>  }
>  
>  /*
>   * Generate a semi-unique error code. The code is not meant to have meaning, The
>   * code's only purpose is to try to prevent false duplicated bug reports by
>   * grossly estimating a GPU error state.
>   *
>   * TODO Ideally, hashing the batchbuffer would be a very nice way to determine

For future reference, please just use the default patch context size.

BR,
Jani.

-- 
Jani Nikula, Intel Open Source Graphics Center