[Intel-gfx] [PATCH v4] drm/i915/guc: Clean up locks in GuC

Fri Jan 22 02:54:42 PST 2016

On 03/12/15 00:56, yu.dai at intel.com wrote:
> From: Alex Dai <yu.dai at intel.com>
> 
> For now, remove the spinlocks that protected the GuC's
> statistics block and work queue; they are only accessed
> by code that already holds the global struct_mutex, and
> so are redundant (until the big struct_mutex rewrite!).
> 
> The specific problem that the spinlocks caused was that
> if the work queue was full, the driver would try to
> spinwait for one jiffy, but with interrupts disabled the
> jiffy count would not advance, leading to a system hang.
> The issue was found using test case igt/gem_close_race.
> 
> The new version will usleep() instead, still holding
> the struct_mutex but without any spinlocks.
> 
> v4: Reorganize commit message (Dave Gordon)
> v3: Remove unnecessary whitespace churn
> v2: Clean up wq_lock too
> v1: Clean up host2guc lock as well
> 
> Signed-off-by: Alex Dai <yu.dai at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 12 ++++++------
>   drivers/gpu/drm/i915/i915_guc_submission.c | 31 ++++++------------------------
>   drivers/gpu/drm/i915/intel_guc.h           |  4 ----
>   3 files changed, 12 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index a728ff1..d6b7817 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2473,15 +2473,15 @@ static int i915_guc_info(struct seq_file *m, void *data)
>   	if (!HAS_GUC_SCHED(dev_priv->dev))
>   		return 0;
>   
> +	if (mutex_lock_interruptible(&dev->struct_mutex))
> +		return 0;
> +
>   	/* Take a local copy of the GuC data, so we can dump it at leisure */
> -	spin_lock(&dev_priv->guc.host2guc_lock);
>   	guc = dev_priv->guc;
> -	if (guc.execbuf_client) {
> -		spin_lock(&guc.execbuf_client->wq_lock);
> +	if (guc.execbuf_client)
>   		client = *guc.execbuf_client;
> -		spin_unlock(&guc.execbuf_client->wq_lock);
> -	}
> -	spin_unlock(&dev_priv->guc.host2guc_lock);
> +
> +	mutex_unlock(&dev->struct_mutex);
>   
>   	seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
>   	seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index ed9f100..a7f9785 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -86,7 +86,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
>   		return -EINVAL;
>   
>   	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -	spin_lock(&dev_priv->guc.host2guc_lock);
>   
>   	dev_priv->guc.action_count += 1;
>   	dev_priv->guc.action_cmd = data[0];
> @@ -119,7 +118,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
>   	}
>   	dev_priv->guc.action_status = status;
>   
> -	spin_unlock(&dev_priv->guc.host2guc_lock);
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   
>   	return ret;
> @@ -292,16 +290,12 @@ static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
>   	const uint32_t cacheline_size = cache_line_size();
>   	uint32_t offset;
>   
> -	spin_lock(&guc->host2guc_lock);
> -
>   	/* Doorbell uses a single cache line within a page */
>   	offset = offset_in_page(guc->db_cacheline);
>   
>   	/* Moving to next cache line to reduce contention */
>   	guc->db_cacheline += cacheline_size;
>   
> -	spin_unlock(&guc->host2guc_lock);
> -
>   	DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
>   			offset, guc->db_cacheline, cacheline_size);
>   
> @@ -322,13 +316,11 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
>   	const uint16_t end = start + half;
>   	uint16_t id;
>   
> -	spin_lock(&guc->host2guc_lock);
>   	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
>   	if (id == end)
>   		id = GUC_INVALID_DOORBELL_ID;
>   	else
>   		bitmap_set(guc->doorbell_bitmap, id, 1);
> -	spin_unlock(&guc->host2guc_lock);
>   
>   	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
>   			hi_pri ? "high" : "normal", id);
> @@ -338,9 +330,7 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
>   
>   static void release_doorbell(struct intel_guc *guc, uint16_t id)
>   {
> -	spin_lock(&guc->host2guc_lock);
>   	bitmap_clear(guc->doorbell_bitmap, id, 1);
> -	spin_unlock(&guc->host2guc_lock);
>   }
>   
>   /*
> @@ -487,16 +477,13 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
>   	struct guc_process_desc *desc;
>   	void *base;
>   	u32 size = sizeof(struct guc_wq_item);
> -	int ret = 0, timeout_counter = 200;
> +	int ret = -ETIMEDOUT, timeout_counter = 200;
>   
>   	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
>   	desc = base + gc->proc_desc_offset;
>   
>   	while (timeout_counter-- > 0) {
> -		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
> -				gc->wq_size) >= size, 1);
> -
> -		if (!ret) {
> +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>   			*offset = gc->wq_tail;
>   
>   			/* advance the tail for next workqueue item */
> @@ -505,7 +492,11 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
>   
>   			/* this will break the loop */
>   			timeout_counter = 0;
> +			ret = 0;
>   		}
> +
> +		if (timeout_counter)
> +			usleep_range(1000, 2000);

Sleeping is illegal while holding kmap_atomic mapping:

[   34.098798] BUG: scheduling while atomic: gem_close_race/1941/0x00000002
[   34.098822] Modules linked in: hid_generic usbhid i915 asix usbnet libphy mii i2c_algo_bit drm_kms_helper cfbfillrect syscopyarea cfbimgblt sysfillrect sysimgblt fb_sys_fops cfbcopyarea drm coretemp i2c_hid hid video pinctrl_sunrisepoint pinctrl_intel acpi_pad nls_iso8859_1 e1000e ptp psmouse pps_core ahci libahci
[   34.098824] CPU: 0 PID: 1941 Comm: gem_close_race Tainted: G     U          4.4.0-160121+ #123
[   34.098824] Hardware name: Intel Corporation Skylake Client platform/Skylake AIO DDR3L RVP10, BIOS SKLSE2R1.R00.X100.B01.1509220551 09/22/2015
[   34.098825]  0000000000013e40 ffff880166c27a78 ffffffff81280d02 ffff880172c13e40
[   34.098826]  ffff880166c27a88 ffffffff810c203a ffff880166c27ac8 ffffffff814ec808
[   34.098827]  ffff88016b7c6000 ffff880166c28000 00000000000f4240 0000000000000001
[   34.098827] Call Trace:
[   34.098831]  [<ffffffff81280d02>] dump_stack+0x4b/0x79
[   34.098833]  [<ffffffff810c203a>] __schedule_bug+0x41/0x4f
[   34.098834]  [<ffffffff814ec808>] __schedule+0x5a8/0x690
[   34.098835]  [<ffffffff814ec927>] schedule+0x37/0x80
[   34.098836]  [<ffffffff814ef3fd>] schedule_hrtimeout_range_clock+0xad/0x130
[   34.098837]  [<ffffffff81090be0>] ? hrtimer_init+0x10/0x10
[   34.098838]  [<ffffffff814ef3f1>] ? schedule_hrtimeout_range_clock+0xa1/0x130
[   34.098839]  [<ffffffff814ef48e>] schedule_hrtimeout_range+0xe/0x10
[   34.098840]  [<ffffffff814eef9b>] usleep_range+0x3b/0x40
[   34.098853]  [<ffffffffa01ec109>] i915_guc_wq_check_space+0x119/0x210 [i915]
[   34.098861]  [<ffffffffa01da47c>] intel_logical_ring_alloc_request_extras+0x5c/0x70 [i915]
[   34.098869]  [<ffffffffa01cdbf1>] i915_gem_request_alloc+0x91/0x170 [i915]
[   34.098875]  [<ffffffffa01c1c07>] i915_gem_do_execbuffer.isra.25+0xbc7/0x12a0 [i915]
[   34.098882]  [<ffffffffa01cb785>] ? i915_gem_object_get_pages_gtt+0x225/0x3c0 [i915]
[   34.098889]  [<ffffffffa01d1fb6>] ? i915_gem_pwrite_ioctl+0xd6/0x9f0 [i915]
[   34.098895]  [<ffffffffa01c2e68>] i915_gem_execbuffer2+0xa8/0x250 [i915]
[   34.098900]  [<ffffffffa00f65d8>] drm_ioctl+0x258/0x4f0 [drm]
[   34.098906]  [<ffffffffa01c2dc0>] ? i915_gem_execbuffer+0x340/0x340 [i915]
[   34.098908]  [<ffffffff8111590d>] do_vfs_ioctl+0x2cd/0x4a0
[   34.098909]  [<ffffffff8111eac2>] ? __fget+0x72/0xb0
[   34.098910]  [<ffffffff81115b1c>] SyS_ioctl+0x3c/0x70
[   34.098911]  [<ffffffff814effd7>] entry_SYSCALL_64_fastpath+0x12/0x6a
[   34.100208] ------------[ cut here ]------------

I don't know the code well enough to suggest a suitable fix.

mdelay certainly seems out of the question. So maybe move the kmap so it holds
the mapping across the lifetime of the page it is mapping, or whethever it is
convenient?

Regards,

Tvrtko