[Intel-gfx] [01/11 v3] drm/i915/guc: Sanitize GuC client initialization

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Mon Mar 13 20:19:00 UTC 2017



On 10/03/17 08:28, Oscar Mateo wrote:
> Started adding proper teardown to guc_client_alloc, ended up removing
> quite a few dead ends where errors communicating with the GuC were
> silently ignored. There also seemed to be quite a few erronous
> teardown actions performed in case of an error (ordering wrong).
>
> v2:
>   - Increase function symmetry/proximity (Michal/Daniele)
>   - Fix __reserve_doorbell accounting for high priority (Daniele)
>   - Call __update_doorbell_desc! (Daniele)
>   - Isolate __guc_{,de}allocate_doorbell (Michal/Daniele)
>
> v3:
>   - "Select" a cacheline is a more accurate verb than "reserve" (Daniele).
>   - We cannot update & create the doorbell without reserving it first, so
>     move the whole doorbell creation for execbuf_client to the submission
>     enable (Oscar).i
>   - Add a fixme for ignoring possible doorbell destroy errors.
>
> Signed-off-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |   4 +-
>  drivers/gpu/drm/i915/i915_guc_submission.c | 391 ++++++++++++++++-------------
>  drivers/gpu/drm/i915/intel_guc_fwif.h      |   4 +-
>  drivers/gpu/drm/i915/intel_uc.h            |  11 +-
>  4 files changed, 229 insertions(+), 181 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 56674df..c395ccf 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2469,7 +2469,7 @@ static void i915_guc_client_info(struct seq_file *m,
>
>  	seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
>  		client->priority, client->ctx_index, client->proc_desc_offset);
> -	seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n",
> +	seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n",
>  		client->doorbell_id, client->doorbell_offset, client->doorbell_cookie);
>  	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>  		client->wq_size, client->wq_offset, client->wq_tail);
> @@ -2504,7 +2504,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
>  	}
>
>  	seq_printf(m, "Doorbell map:\n");
> -	seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap);
> +	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
>  	seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
>
>  	seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 41f2dd8..ceeb1fa 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -62,27 +62,73 @@
>   *
>   */
>
> +static inline bool is_high_priority(struct i915_guc_client* client)
> +{
> +	return client->priority <= GUC_CTX_PRIORITY_HIGH;
> +}
> +
> +static int __reserve_doorbell(struct i915_guc_client *client)
> +{
> +	unsigned long offset;
> +	unsigned long end;
> +	u16 id;
> +
> +	GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID);
> +
> +	/*
> +	 * The bitmap tracks which doorbell registers are currently in use.
> +	 * It is split into two halves; the first half is used for normal
> +	 * priority contexts, the second half for high-priority ones.
> +	 * Note that logically higher priorities are numerically less than
> +	 * normal ones, so the test below means "is it high-priority?"

The comment about the test doesn't apply anymore so we can either remove 
it or move it in the is_high_priority() function.

> +	 */
> +
> +	offset = 0;
> +	end = GUC_NUM_DOORBELLS/2;
> +	if (is_high_priority(client)) {
> +		offset = end;
> +		end += offset;
> +        }
> +
> +	id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end);
> +	if (id == end)
> +		return -ENOSPC;
> +
> +	__set_bit(id, client->guc->doorbell_bitmap);
> +	client->doorbell_id = id;
> +	DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell %d: \n",

"%d:" -> ": %d" ?

> +			 client->ctx_index, yesno(is_high_priority(client)),
> +			 id);
> +	return 0;
> +}
> +
> +static void __unreserve_doorbell(struct i915_guc_client *client)
> +{
> +	GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
> +
> +	__clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
> +	client->doorbell_id = GUC_DOORBELL_INVALID;
> +}
> +
>  /*
>   * Tell the GuC to allocate or deallocate a specific doorbell
>   */
>
> -static int guc_allocate_doorbell(struct intel_guc *guc,
> -				 struct i915_guc_client *client)
> +static int __guc_allocate_doorbell(struct intel_guc *guc, u32 ctx_index)
>  {
>  	u32 action[] = {
>  		INTEL_GUC_ACTION_ALLOCATE_DOORBELL,
> -		client->ctx_index
> +		ctx_index
>  	};
>
>  	return intel_guc_send(guc, action, ARRAY_SIZE(action));
>  }
>
> -static int guc_release_doorbell(struct intel_guc *guc,
> -				struct i915_guc_client *client)
> +static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index)
>  {
>  	u32 action[] = {
>  		INTEL_GUC_ACTION_DEALLOCATE_DOORBELL,
> -		client->ctx_index
> +		ctx_index
>  	};
>
>  	return intel_guc_send(guc, action, ARRAY_SIZE(action));
> @@ -95,104 +141,100 @@ static int guc_release_doorbell(struct intel_guc *guc,
>   * client object which contains the page being used for the doorbell
>   */
>
> -static int guc_update_doorbell_id(struct intel_guc *guc,
> -				  struct i915_guc_client *client,
> -				  u16 new_id)
> +static int __update_doorbell_desc(struct i915_guc_client *client, u16 new_id)
>  {
> -	struct sg_table *sg = guc->ctx_pool_vma->pages;
> -	void *doorbell_bitmap = guc->doorbell_bitmap;
> -	struct guc_doorbell_info *doorbell;
> +	struct sg_table *sg = client->guc->ctx_pool_vma->pages;
>  	struct guc_context_desc desc;
>  	size_t len;
>
> -	doorbell = client->vaddr + client->doorbell_offset;
> -
> -	if (client->doorbell_id != GUC_INVALID_DOORBELL_ID &&
> -	    test_bit(client->doorbell_id, doorbell_bitmap)) {
> -		/* Deactivate the old doorbell */
> -		doorbell->db_status = GUC_DOORBELL_DISABLED;
> -		(void)guc_release_doorbell(guc, client);
> -		__clear_bit(client->doorbell_id, doorbell_bitmap);
> -	}
> -
>  	/* Update the GuC's idea of the doorbell ID */
>  	len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
> -			     sizeof(desc) * client->ctx_index);
> +				 sizeof(desc) * client->ctx_index);
>  	if (len != sizeof(desc))
>  		return -EFAULT;
> +
>  	desc.db_id = new_id;
>  	len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
> -			     sizeof(desc) * client->ctx_index);
> +				   sizeof(desc) * client->ctx_index);
>  	if (len != sizeof(desc))
>  		return -EFAULT;
>
> -	client->doorbell_id = new_id;
> -	if (new_id == GUC_INVALID_DOORBELL_ID)
> -		return 0;
> +	return 0;
> +}
>
> -	/* Activate the new doorbell */
> -	__set_bit(new_id, doorbell_bitmap);
> +static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client)

from what I can see this could also be used from guc_ring_doorbell and 
possibly from guc_ctx_desc_init

> +{
> +	return client->vaddr + client->doorbell_offset;

Not introduced by this patch, but arithmetic on void pointers doesn't 
really look clean IMO. Can we cast the pointer to uintptr_t or char* or 
whatever else is appropriate before incrementing it? I'm happy for it to 
be done in a follow up.

> +}
> +
> +static bool has_doorbell(struct i915_guc_client *client)
> +{
> +	if (client->doorbell_id == GUC_DOORBELL_INVALID)
> +		return false;
> +
> +	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
> +}
> +
> +static int __create_doorbell(struct i915_guc_client *client)
> +{
> +	struct guc_doorbell_info *doorbell;
> +	int err;
> +
> +	doorbell = __get_doorbell(client);
>  	doorbell->db_status = GUC_DOORBELL_ENABLED;
>  	doorbell->cookie = client->doorbell_cookie;
> -	return guc_allocate_doorbell(guc, client);
> +
> +	err = __guc_allocate_doorbell(client->guc, client->ctx_index);
> +	if (err) {
> +		doorbell->db_status = GUC_DOORBELL_DISABLED;
> +		doorbell->cookie = 0;
> +	}
> +	return err;
>  }
>
> -static void guc_disable_doorbell(struct intel_guc *guc,
> -				 struct i915_guc_client *client)
> +static int __destroy_doorbell(struct i915_guc_client *client)
>  {
> -	(void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
> +	struct guc_doorbell_info *doorbell;
>
> -	/* XXX: wait for any interrupts */
> -	/* XXX: wait for workqueue to drain */
> +	doorbell = __get_doorbell(client);
> +	doorbell->db_status = GUC_DOORBELL_DISABLED;
> +	doorbell->cookie = 0;
> +
> +	return __guc_deallocate_doorbell(client->guc, client->ctx_index);
>  }
>
> -static uint16_t
> -select_doorbell_register(struct intel_guc *guc, uint32_t priority)
> +static int destroy_doorbell(struct i915_guc_client *client)
>  {
> -	/*
> -	 * The bitmap tracks which doorbell registers are currently in use.
> -	 * It is split into two halves; the first half is used for normal
> -	 * priority contexts, the second half for high-priority ones.
> -	 * Note that logically higher priorities are numerically less than
> -	 * normal ones, so the test below means "is it high-priority?"
> -	 */
> -	const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH);
> -	const uint16_t half = GUC_MAX_DOORBELLS / 2;
> -	const uint16_t start = hi_pri ? half : 0;
> -	const uint16_t end = start + half;
> -	uint16_t id;
> +	int err;
>
> -	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
> -	if (id == end)
> -		id = GUC_INVALID_DOORBELL_ID;
> +	GEM_BUG_ON(!has_doorbell(client));
>
> -	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
> -			hi_pri ? "high" : "normal", id);
> +	/* XXX: wait for any interrupts */
> +	/* XXX: wait for workqueue to drain */
>
> -	return id;
> -}
> +	err = __destroy_doorbell(client);
> +	if (err)
> +		return err;
>
> -/*
> - * Select, assign and relase doorbell cachelines
> - *
> - * These functions track which doorbell cachelines are in use.
> - * The data they manipulate is protected by the intel_guc_send lock.
> - */
> +	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
> +
> +	__unreserve_doorbell(client);
>
> -static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
> +	return 0;
> +}
> +
> +static unsigned long __select_cacheline(struct intel_guc* guc)
>  {
> -	const uint32_t cacheline_size = cache_line_size();
> -	uint32_t offset;
> +	unsigned long offset;
>
>  	/* Doorbell uses a single cache line within a page */
>  	offset = offset_in_page(guc->db_cacheline);
>
>  	/* Moving to next cache line to reduce contention */
> -	guc->db_cacheline += cacheline_size;
> -
> -	DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
> -			offset, guc->db_cacheline, cacheline_size);
> +	guc->db_cacheline += cache_line_size();
>
> +	DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n",
> +			offset, guc->db_cacheline, cache_line_size());
>  	return offset;
>  }
>
> @@ -594,93 +636,96 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
>  	return vma;
>  }
>
> -static void
> -guc_client_free(struct drm_i915_private *dev_priv,
> -		struct i915_guc_client *client)
> +static void guc_client_free(struct i915_guc_client *client)
>  {
> -	struct intel_guc *guc = &dev_priv->guc;
> -
> -	if (!client)
> -		return;
> -
>  	/*
>  	 * XXX: wait for any outstanding submissions before freeing memory.
>  	 * Be sure to drop any locks
>  	 */
> -
> -	if (client->vaddr) {
> -		/*
> -		 * If we got as far as setting up a doorbell, make sure we
> -		 * shut it down before unmapping & deallocating the memory.
> -		 */
> -		guc_disable_doorbell(guc, client);
> -
> -		i915_gem_object_unpin_map(client->vma->obj);
> -	}
> -
> +	guc_ctx_desc_fini(client->guc, client);
> +	i915_gem_object_unpin_map(client->vma->obj);
>  	i915_vma_unpin_and_release(&client->vma);
> -
> -	if (client->ctx_index != GUC_INVALID_CTX_ID) {
> -		guc_ctx_desc_fini(guc, client);
> -		ida_simple_remove(&guc->ctx_ids, client->ctx_index);
> -	}
> -
> +	ida_simple_remove(&client->guc->ctx_ids, client->ctx_index);
>  	kfree(client);
>  }
>
>  /* Check that a doorbell register is in the expected state */
> -static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
> +static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
>  {
>  	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> -	i915_reg_t drbreg = GEN8_DRBREGL(db_id);
> -	uint32_t value = I915_READ(drbreg);
> -	bool enabled = (value & GUC_DOORBELL_ENABLED) != 0;
> -	bool expected = test_bit(db_id, guc->doorbell_bitmap);
> +	u32 drbregl;
> +	bool valid;
>
> -	if (enabled == expected)
> +	GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
> +
> +	drbregl = I915_READ(GEN8_DRBREGL(db_id));
> +	valid = drbregl & GEN8_DRB_VALID;
> +
> +	if (test_bit(db_id, guc->doorbell_bitmap) == valid)
>  		return true;
>
> -	DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n",
> -			 db_id, drbreg.reg, value,
> -			 expected ? "active" : "inactive");
> +	DRM_DEBUG_DRIVER("Doorbell %d has unexpected state (0x%x): valid=%s\n",
> +			 db_id, drbregl, yesno(valid));
>
>  	return false;
>  }
>
> +static int __reset_doorbell(struct i915_guc_client* client, u16 db_id)

This reset only works if the GuC FW thinks that the doorbell is 
unassigned, otherwise the ALLOCATE_DOORBELL action will fail. We 
currently only use it to reset the doorbell HW status after resetting 
GuC and reloading the FW so it is ok, but we could use a comment to 
explain the limitation of this reset.

The patch looks functionally ok, so with the minor comments addressed:

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>



More information about the Intel-gfx mailing list