[Intel-gfx] [01/11 v3] drm/i915/guc: Sanitize GuC client initialization
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Mon Mar 13 20:19:00 UTC 2017
On 10/03/17 08:28, Oscar Mateo wrote:
> Started adding proper teardown to guc_client_alloc, ended up removing
> quite a few dead ends where errors communicating with the GuC were
> silently ignored. There also seemed to be quite a few erronous
> teardown actions performed in case of an error (ordering wrong).
>
> v2:
> - Increase function symmetry/proximity (Michal/Daniele)
> - Fix __reserve_doorbell accounting for high priority (Daniele)
> - Call __update_doorbell_desc! (Daniele)
> - Isolate __guc_{,de}allocate_doorbell (Michal/Daniele)
>
> v3:
> - "Select" a cacheline is a more accurate verb than "reserve" (Daniele).
> - We cannot update & create the doorbell without reserving it first, so
> move the whole doorbell creation for execbuf_client to the submission
> enable (Oscar).i
> - Add a fixme for ignoring possible doorbell destroy errors.
>
> Signed-off-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 4 +-
> drivers/gpu/drm/i915/i915_guc_submission.c | 391 ++++++++++++++++-------------
> drivers/gpu/drm/i915/intel_guc_fwif.h | 4 +-
> drivers/gpu/drm/i915/intel_uc.h | 11 +-
> 4 files changed, 229 insertions(+), 181 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 56674df..c395ccf 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2469,7 +2469,7 @@ static void i915_guc_client_info(struct seq_file *m,
>
> seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
> client->priority, client->ctx_index, client->proc_desc_offset);
> - seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n",
> + seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n",
> client->doorbell_id, client->doorbell_offset, client->doorbell_cookie);
> seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
> client->wq_size, client->wq_offset, client->wq_tail);
> @@ -2504,7 +2504,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
> }
>
> seq_printf(m, "Doorbell map:\n");
> - seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap);
> + seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
> seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
>
> seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 41f2dd8..ceeb1fa 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -62,27 +62,73 @@
> *
> */
>
> +static inline bool is_high_priority(struct i915_guc_client* client)
> +{
> + return client->priority <= GUC_CTX_PRIORITY_HIGH;
> +}
> +
> +static int __reserve_doorbell(struct i915_guc_client *client)
> +{
> + unsigned long offset;
> + unsigned long end;
> + u16 id;
> +
> + GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID);
> +
> + /*
> + * The bitmap tracks which doorbell registers are currently in use.
> + * It is split into two halves; the first half is used for normal
> + * priority contexts, the second half for high-priority ones.
> + * Note that logically higher priorities are numerically less than
> + * normal ones, so the test below means "is it high-priority?"
The comment about the test doesn't apply anymore so we can either remove
it or move it in the is_high_priority() function.
> + */
> +
> + offset = 0;
> + end = GUC_NUM_DOORBELLS/2;
> + if (is_high_priority(client)) {
> + offset = end;
> + end += offset;
> + }
> +
> + id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end);
> + if (id == end)
> + return -ENOSPC;
> +
> + __set_bit(id, client->guc->doorbell_bitmap);
> + client->doorbell_id = id;
> + DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell %d: \n",
"%d:" -> ": %d" ?
> + client->ctx_index, yesno(is_high_priority(client)),
> + id);
> + return 0;
> +}
> +
> +static void __unreserve_doorbell(struct i915_guc_client *client)
> +{
> + GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
> +
> + __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
> + client->doorbell_id = GUC_DOORBELL_INVALID;
> +}
> +
> /*
> * Tell the GuC to allocate or deallocate a specific doorbell
> */
>
> -static int guc_allocate_doorbell(struct intel_guc *guc,
> - struct i915_guc_client *client)
> +static int __guc_allocate_doorbell(struct intel_guc *guc, u32 ctx_index)
> {
> u32 action[] = {
> INTEL_GUC_ACTION_ALLOCATE_DOORBELL,
> - client->ctx_index
> + ctx_index
> };
>
> return intel_guc_send(guc, action, ARRAY_SIZE(action));
> }
>
> -static int guc_release_doorbell(struct intel_guc *guc,
> - struct i915_guc_client *client)
> +static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index)
> {
> u32 action[] = {
> INTEL_GUC_ACTION_DEALLOCATE_DOORBELL,
> - client->ctx_index
> + ctx_index
> };
>
> return intel_guc_send(guc, action, ARRAY_SIZE(action));
> @@ -95,104 +141,100 @@ static int guc_release_doorbell(struct intel_guc *guc,
> * client object which contains the page being used for the doorbell
> */
>
> -static int guc_update_doorbell_id(struct intel_guc *guc,
> - struct i915_guc_client *client,
> - u16 new_id)
> +static int __update_doorbell_desc(struct i915_guc_client *client, u16 new_id)
> {
> - struct sg_table *sg = guc->ctx_pool_vma->pages;
> - void *doorbell_bitmap = guc->doorbell_bitmap;
> - struct guc_doorbell_info *doorbell;
> + struct sg_table *sg = client->guc->ctx_pool_vma->pages;
> struct guc_context_desc desc;
> size_t len;
>
> - doorbell = client->vaddr + client->doorbell_offset;
> -
> - if (client->doorbell_id != GUC_INVALID_DOORBELL_ID &&
> - test_bit(client->doorbell_id, doorbell_bitmap)) {
> - /* Deactivate the old doorbell */
> - doorbell->db_status = GUC_DOORBELL_DISABLED;
> - (void)guc_release_doorbell(guc, client);
> - __clear_bit(client->doorbell_id, doorbell_bitmap);
> - }
> -
> /* Update the GuC's idea of the doorbell ID */
> len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
> - sizeof(desc) * client->ctx_index);
> + sizeof(desc) * client->ctx_index);
> if (len != sizeof(desc))
> return -EFAULT;
> +
> desc.db_id = new_id;
> len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
> - sizeof(desc) * client->ctx_index);
> + sizeof(desc) * client->ctx_index);
> if (len != sizeof(desc))
> return -EFAULT;
>
> - client->doorbell_id = new_id;
> - if (new_id == GUC_INVALID_DOORBELL_ID)
> - return 0;
> + return 0;
> +}
>
> - /* Activate the new doorbell */
> - __set_bit(new_id, doorbell_bitmap);
> +static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client)
from what I can see this could also be used from guc_ring_doorbell and
possibly from guc_ctx_desc_init
> +{
> + return client->vaddr + client->doorbell_offset;
Not introduced by this patch, but arithmetic on void pointers doesn't
really look clean IMO. Can we cast the pointer to uintptr_t or char* or
whatever else is appropriate before incrementing it? I'm happy for it to
be done in a follow up.
> +}
> +
> +static bool has_doorbell(struct i915_guc_client *client)
> +{
> + if (client->doorbell_id == GUC_DOORBELL_INVALID)
> + return false;
> +
> + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
> +}
> +
> +static int __create_doorbell(struct i915_guc_client *client)
> +{
> + struct guc_doorbell_info *doorbell;
> + int err;
> +
> + doorbell = __get_doorbell(client);
> doorbell->db_status = GUC_DOORBELL_ENABLED;
> doorbell->cookie = client->doorbell_cookie;
> - return guc_allocate_doorbell(guc, client);
> +
> + err = __guc_allocate_doorbell(client->guc, client->ctx_index);
> + if (err) {
> + doorbell->db_status = GUC_DOORBELL_DISABLED;
> + doorbell->cookie = 0;
> + }
> + return err;
> }
>
> -static void guc_disable_doorbell(struct intel_guc *guc,
> - struct i915_guc_client *client)
> +static int __destroy_doorbell(struct i915_guc_client *client)
> {
> - (void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
> + struct guc_doorbell_info *doorbell;
>
> - /* XXX: wait for any interrupts */
> - /* XXX: wait for workqueue to drain */
> + doorbell = __get_doorbell(client);
> + doorbell->db_status = GUC_DOORBELL_DISABLED;
> + doorbell->cookie = 0;
> +
> + return __guc_deallocate_doorbell(client->guc, client->ctx_index);
> }
>
> -static uint16_t
> -select_doorbell_register(struct intel_guc *guc, uint32_t priority)
> +static int destroy_doorbell(struct i915_guc_client *client)
> {
> - /*
> - * The bitmap tracks which doorbell registers are currently in use.
> - * It is split into two halves; the first half is used for normal
> - * priority contexts, the second half for high-priority ones.
> - * Note that logically higher priorities are numerically less than
> - * normal ones, so the test below means "is it high-priority?"
> - */
> - const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH);
> - const uint16_t half = GUC_MAX_DOORBELLS / 2;
> - const uint16_t start = hi_pri ? half : 0;
> - const uint16_t end = start + half;
> - uint16_t id;
> + int err;
>
> - id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
> - if (id == end)
> - id = GUC_INVALID_DOORBELL_ID;
> + GEM_BUG_ON(!has_doorbell(client));
>
> - DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
> - hi_pri ? "high" : "normal", id);
> + /* XXX: wait for any interrupts */
> + /* XXX: wait for workqueue to drain */
>
> - return id;
> -}
> + err = __destroy_doorbell(client);
> + if (err)
> + return err;
>
> -/*
> - * Select, assign and relase doorbell cachelines
> - *
> - * These functions track which doorbell cachelines are in use.
> - * The data they manipulate is protected by the intel_guc_send lock.
> - */
> + __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
> +
> + __unreserve_doorbell(client);
>
> -static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
> + return 0;
> +}
> +
> +static unsigned long __select_cacheline(struct intel_guc* guc)
> {
> - const uint32_t cacheline_size = cache_line_size();
> - uint32_t offset;
> + unsigned long offset;
>
> /* Doorbell uses a single cache line within a page */
> offset = offset_in_page(guc->db_cacheline);
>
> /* Moving to next cache line to reduce contention */
> - guc->db_cacheline += cacheline_size;
> -
> - DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
> - offset, guc->db_cacheline, cacheline_size);
> + guc->db_cacheline += cache_line_size();
>
> + DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n",
> + offset, guc->db_cacheline, cache_line_size());
> return offset;
> }
>
> @@ -594,93 +636,96 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
> return vma;
> }
>
> -static void
> -guc_client_free(struct drm_i915_private *dev_priv,
> - struct i915_guc_client *client)
> +static void guc_client_free(struct i915_guc_client *client)
> {
> - struct intel_guc *guc = &dev_priv->guc;
> -
> - if (!client)
> - return;
> -
> /*
> * XXX: wait for any outstanding submissions before freeing memory.
> * Be sure to drop any locks
> */
> -
> - if (client->vaddr) {
> - /*
> - * If we got as far as setting up a doorbell, make sure we
> - * shut it down before unmapping & deallocating the memory.
> - */
> - guc_disable_doorbell(guc, client);
> -
> - i915_gem_object_unpin_map(client->vma->obj);
> - }
> -
> + guc_ctx_desc_fini(client->guc, client);
> + i915_gem_object_unpin_map(client->vma->obj);
> i915_vma_unpin_and_release(&client->vma);
> -
> - if (client->ctx_index != GUC_INVALID_CTX_ID) {
> - guc_ctx_desc_fini(guc, client);
> - ida_simple_remove(&guc->ctx_ids, client->ctx_index);
> - }
> -
> + ida_simple_remove(&client->guc->ctx_ids, client->ctx_index);
> kfree(client);
> }
>
> /* Check that a doorbell register is in the expected state */
> -static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
> +static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
> {
> struct drm_i915_private *dev_priv = guc_to_i915(guc);
> - i915_reg_t drbreg = GEN8_DRBREGL(db_id);
> - uint32_t value = I915_READ(drbreg);
> - bool enabled = (value & GUC_DOORBELL_ENABLED) != 0;
> - bool expected = test_bit(db_id, guc->doorbell_bitmap);
> + u32 drbregl;
> + bool valid;
>
> - if (enabled == expected)
> + GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
> +
> + drbregl = I915_READ(GEN8_DRBREGL(db_id));
> + valid = drbregl & GEN8_DRB_VALID;
> +
> + if (test_bit(db_id, guc->doorbell_bitmap) == valid)
> return true;
>
> - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n",
> - db_id, drbreg.reg, value,
> - expected ? "active" : "inactive");
> + DRM_DEBUG_DRIVER("Doorbell %d has unexpected state (0x%x): valid=%s\n",
> + db_id, drbregl, yesno(valid));
>
> return false;
> }
>
> +static int __reset_doorbell(struct i915_guc_client* client, u16 db_id)
This reset only works if the GuC FW thinks that the doorbell is
unassigned, otherwise the ALLOCATE_DOORBELL action will fail. We
currently only use it to reset the doorbell HW status after resetting
GuC and reloading the FW so it is ok, but we could use a comment to
explain the limitation of this reset.
The patch looks functionally ok, so with the minor comments addressed:
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
More information about the Intel-gfx
mailing list