[Intel-gfx] [PATCH 5/8] drm/i915/guc: Extract doorbell creation from client allocation
Michel Thierry
michel.thierry at intel.com
Wed Dec 13 19:03:55 UTC 2017
On 13/12/17 04:50, Winiarski, Michal wrote:
> Full GPU reset causes GuC to be reset. This means that every time we're
> doing a reset, we need to talk to GuC and tell it about doorbells.
> Let's separate the communication part (create_doorbell) from our
> internal bookkeeping (reserve_doorbell) so that we can cleanly separate
> the initialization done at module load from reinitialization done at
> reset in the following patch.
> While I'm here, let's also add a proper (although slightly asymetric)
> cleanup that doesn't try to communicate with GuC after it's already
> gone, getting rid of "expected" warnings caused by GuC action failures
> on module unload.
>
> Note that I've also removed one of the tests (bitmap out of sync), since
> it doesn't make much sense anymore - bitmaps are now not expected to
> change during the lifetime of a client.
>
> Signed-off-by: MichaĆ Winiarski <michal.winiarski at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
> ---
> drivers/gpu/drm/i915/intel_guc_submission.c | 151 ++++++++--------------------
> drivers/gpu/drm/i915/selftests/intel_guc.c | 110 +++++++++-----------
> 2 files changed, 88 insertions(+), 173 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 8f4b274d66a7..c74e78b6ba41 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -88,7 +88,7 @@ static inline bool is_high_priority(struct intel_guc_client *client)
> client->priority == GUC_CLIENT_PRIORITY_HIGH);
> }
>
> -static int __reserve_doorbell(struct intel_guc_client *client)
> +static int reserve_doorbell(struct intel_guc_client *client)
> {
> unsigned long offset;
> unsigned long end;
> @@ -120,7 +120,7 @@ static int __reserve_doorbell(struct intel_guc_client *client)
> return 0;
> }
>
> -static void __unreserve_doorbell(struct intel_guc_client *client)
> +static void unreserve_doorbell(struct intel_guc_client *client)
> {
> GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
>
> @@ -188,32 +188,21 @@ static bool has_doorbell(struct intel_guc_client *client)
> return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
> }
>
> -static int __create_doorbell(struct intel_guc_client *client)
> +static void __create_doorbell(struct intel_guc_client *client)
> {
> struct guc_doorbell_info *doorbell;
> - int err;
>
> doorbell = __get_doorbell(client);
> doorbell->db_status = GUC_DOORBELL_ENABLED;
> doorbell->cookie = 0;
> -
> - err = __guc_allocate_doorbell(client->guc, client->stage_id);
> - if (err) {
> - doorbell->db_status = GUC_DOORBELL_DISABLED;
> - DRM_ERROR("Couldn't create client %u doorbell: %d\n",
> - client->stage_id, err);
> - }
> -
> - return err;
> }
__create_doorbell isn't creating anything, and now it is just changing
the db status, but that has nothing to do with this patch.
Reviewed-by: Michel Thierry <michel.thierry at intel.com>
>
> -static int __destroy_doorbell(struct intel_guc_client *client)
> +static void __destroy_doorbell(struct intel_guc_client *client)
> {
> struct drm_i915_private *dev_priv = guc_to_i915(client->guc);
> struct guc_doorbell_info *doorbell;
> u16 db_id = client->doorbell_id;
>
> - GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
>
> doorbell = __get_doorbell(client);
> doorbell->db_status = GUC_DOORBELL_DISABLED;
> @@ -225,50 +214,42 @@ static int __destroy_doorbell(struct intel_guc_client *client)
> */
> if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10))
> WARN_ONCE(true, "Doorbell never became invalid after disable\n");
> -
> - return __guc_deallocate_doorbell(client->guc, client->stage_id);
> }
>
> static int create_doorbell(struct intel_guc_client *client)
> {
> int ret;
>
> - ret = __reserve_doorbell(client);
> - if (ret)
> - return ret;
> -
> __update_doorbell_desc(client, client->doorbell_id);
> + __create_doorbell(client);
>
> - ret = __create_doorbell(client);
> - if (ret)
> - goto err;
> + ret = __guc_allocate_doorbell(client->guc, client->stage_id);
> + if (ret) {
> + __destroy_doorbell(client);
> + __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
> + DRM_ERROR("Couldn't create client %u doorbell: %d\n",
> + client->stage_id, ret);
> + return ret;
> + }
>
> return 0;
> -
> -err:
> - __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
> - __unreserve_doorbell(client);
> - return ret;
> }
>
> static int destroy_doorbell(struct intel_guc_client *client)
> {
> - int err;
> + int ret;
>
> GEM_BUG_ON(!has_doorbell(client));
>
> - /* XXX: wait for any interrupts */
> - /* XXX: wait for workqueue to drain */
> -
> - err = __destroy_doorbell(client);
> - if (err)
> - return err;
> + __destroy_doorbell(client);
> + ret = __guc_deallocate_doorbell(client->guc, client->stage_id);
> + if (ret)
> + DRM_ERROR("Couldn't destroy client %u doorbell: %d\n",
> + client->stage_id, ret);
>
> __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
>
> - __unreserve_doorbell(client);
> -
> - return 0;
> + return ret;
> }
>
> static unsigned long __select_cacheline(struct intel_guc *guc)
> @@ -839,73 +820,18 @@ static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
> return false;
> }
>
> -/*
> - * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and
> - * reloaded the GuC FW) we can use this function to tell the GuC to reassign the
> - * doorbell to the rightful owner.
> - */
> -static int __reset_doorbell(struct intel_guc_client *client, u16 db_id)
> -{
> - int err;
> -
> - __update_doorbell_desc(client, db_id);
> - err = __create_doorbell(client);
> - if (!err)
> - err = __destroy_doorbell(client);
> -
> - return err;
> -}
> -
> -/*
> - * Set up & tear down each unused doorbell in turn, to ensure that all doorbell
> - * HW is (re)initialised. For that end, we might have to borrow the first
> - * client. Also, tell GuC about all the doorbells in use by all clients.
> - * We do this because the KMD, the GuC and the doorbell HW can easily go out of
> - * sync (e.g. we can reset the GuC, but not the doorbel HW).
> - */
> -static int guc_init_doorbell_hw(struct intel_guc *guc)
> +static int guc_clients_doorbell_init(struct intel_guc *guc)
> {
> - struct intel_guc_client *client = guc->execbuf_client;
> - bool recreate_first_client = false;
> u16 db_id;
> int ret;
>
> - /* For unused doorbells, make sure they are disabled */
> - for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) {
> - if (doorbell_ok(guc, db_id))
> - continue;
> -
> - if (has_doorbell(client)) {
> - /* Borrow execbuf_client (we will recreate it later) */
> - destroy_doorbell(client);
> - recreate_first_client = true;
> - }
> -
> - ret = __reset_doorbell(client, db_id);
> - WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret);
> - }
> -
> - if (recreate_first_client) {
> - ret = __reserve_doorbell(client);
> - if (unlikely(ret)) {
> - DRM_ERROR("Couldn't re-reserve first client db: %d\n",
> - ret);
> - return ret;
> - }
> -
> - __update_doorbell_desc(client, client->doorbell_id);
> - }
> -
> - /* Now for every client (and not only execbuf_client) make sure their
> - * doorbells are known by the GuC
> - */
> - ret = __create_doorbell(guc->execbuf_client);
> + ret = create_doorbell(guc->execbuf_client);
> if (ret)
> return ret;
>
> - ret = __create_doorbell(guc->preempt_client);
> + ret = create_doorbell(guc->preempt_client);
> if (ret) {
> - __destroy_doorbell(guc->execbuf_client);
> + destroy_doorbell(guc->execbuf_client);
> return ret;
> }
>
> @@ -916,6 +842,19 @@ static int guc_init_doorbell_hw(struct intel_guc *guc)
> return 0;
> }
>
> +static void guc_clients_doorbell_fini(struct intel_guc *guc)
> +{
> + /*
> + * By the time we're here, GuC has already been reset.
> + * Instead of trying (in vain) to communicate with it, let's just
> + * cleanup the doorbell HW and our internal state.
> + */
> + __destroy_doorbell(guc->preempt_client);
> + __update_doorbell_desc(guc->preempt_client, GUC_DOORBELL_INVALID);
> + __destroy_doorbell(guc->execbuf_client);
> + __update_doorbell_desc(guc->execbuf_client, GUC_DOORBELL_INVALID);
> +}
> +
> /**
> * guc_client_alloc() - Allocate an intel_guc_client
> * @dev_priv: driver private data structure
> @@ -991,7 +930,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
> guc_proc_desc_init(guc, client);
> guc_stage_desc_init(guc, client);
>
> - ret = create_doorbell(client);
> + ret = reserve_doorbell(client);
> if (ret)
> goto err_vaddr;
>
> @@ -1015,16 +954,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
>
> static void guc_client_free(struct intel_guc_client *client)
> {
> - /*
> - * XXX: wait for any outstanding submissions before freeing memory.
> - * Be sure to drop any locks
> - */
> -
> - /* FIXME: in many cases, by the time we get here the GuC has been
> - * reset, so we cannot destroy the doorbell properly. Ignore the
> - * error message for now
> - */
> - destroy_doorbell(client);
> + unreserve_doorbell(client);
> guc_stage_desc_fini(client->guc, client);
> i915_gem_object_unpin_map(client->vma->obj);
> i915_vma_unpin_and_release(&client->vma);
> @@ -1366,7 +1296,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
> if (err)
> goto err_free_clients;
>
> - err = guc_init_doorbell_hw(guc);
> + err = guc_clients_doorbell_init(guc);
> if (err)
> goto err_free_clients;
>
> @@ -1398,6 +1328,7 @@ void intel_guc_submission_disable(struct intel_guc *guc)
> GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */
>
> guc_interrupts_release(dev_priv);
> + guc_clients_doorbell_fini(guc);
>
> /* Revert back to manual ELSP submission */
> intel_engines_reset_default_submission(dev_priv);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
> index 68d6a69c738f..3f9016466dea 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_guc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
> @@ -85,21 +85,26 @@ static int validate_client(struct intel_guc_client *client,
> return 0;
> }
>
> +static bool client_doorbell_in_sync(struct intel_guc_client *client)
> +{
> + return doorbell_ok(client->guc, client->doorbell_id);
> +}
> +
> /*
> - * Check that guc_init_doorbell_hw is doing what it should.
> + * Check that we're able to synchronize guc_clients with their doorbells
> *
> - * During GuC submission enable, we create GuC clients and their doorbells,
> - * but after resetting the microcontroller (resume & gpu reset), these
> - * GuC clients are still around, but the status of their doorbells may be
> - * incorrect. This is the reason behind validating that the doorbells status
> - * expected by the driver matches what the GuC/HW have.
> + * We're creating clients and reserving doorbells once, at module load. During
> + * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to
> + * GuC being reset. In other words - GuC clients are still around, but the
> + * status of their doorbells may be incorrect. This is the reason behind
> + * validating that the doorbells status expected by the driver matches what the
> + * GuC/HW have.
> */
> -static int igt_guc_init_doorbell_hw(void *args)
> +static int igt_guc_clients(void *args)
> {
> struct drm_i915_private *dev_priv = args;
> struct intel_guc *guc;
> - DECLARE_BITMAP(db_bitmap_bk, GUC_NUM_DOORBELLS);
> - int i, err = 0;
> + int err = 0;
>
> GEM_BUG_ON(!HAS_GUC(dev_priv));
> mutex_lock(&dev_priv->drm.struct_mutex);
> @@ -148,10 +153,21 @@ static int igt_guc_init_doorbell_hw(void *args)
> goto out;
> }
>
> - /* each client should have received a doorbell during alloc */
> + /* each client should now have reserved a doorbell */
> if (!has_doorbell(guc->execbuf_client) ||
> !has_doorbell(guc->preempt_client)) {
> - pr_err("guc_clients_create didn't create doorbells\n");
> + pr_err("guc_clients_create didn't reserve doorbells\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> + /* Now create the doorbells */
> + guc_clients_doorbell_init(guc);
> +
> + /* each client should now have received a doorbell */
> + if (!client_doorbell_in_sync(guc->execbuf_client) ||
> + !client_doorbell_in_sync(guc->preempt_client)) {
> + pr_err("failed to initialize the doorbells\n");
> err = -EINVAL;
> goto out;
> }
> @@ -160,25 +176,26 @@ static int igt_guc_init_doorbell_hw(void *args)
> * Basic test - an attempt to reallocate a valid doorbell to the
> * client it is currently assigned should not cause a failure.
> */
> - err = guc_init_doorbell_hw(guc);
> + err = guc_clients_doorbell_init(guc);
> if (err)
> goto out;
>
> /*
> * Negative test - a client with no doorbell (invalid db id).
> - * Each client gets a doorbell when it is created, after destroying
> - * the doorbell, the db id is changed to GUC_DOORBELL_INVALID and the
> - * firmware will reject any attempt to allocate a doorbell with an
> - * invalid id (db has to be reserved before allocation).
> + * After destroying the doorbell, the db id is changed to
> + * GUC_DOORBELL_INVALID and the firmware will reject any attempt to
> + * allocate a doorbell with an invalid id (db has to be reserved before
> + * allocation).
> */
> destroy_doorbell(guc->execbuf_client);
> - if (has_doorbell(guc->execbuf_client)) {
> + if (client_doorbell_in_sync(guc->execbuf_client)) {
> pr_err("destroy db did not work\n");
> err = -EINVAL;
> goto out;
> }
>
> - err = guc_init_doorbell_hw(guc);
> + unreserve_doorbell(guc->execbuf_client);
> + err = guc_clients_doorbell_init(guc);
> if (err != -EIO) {
> pr_err("unexpected (err = %d)", err);
> goto out;
> @@ -191,33 +208,13 @@ static int igt_guc_init_doorbell_hw(void *args)
> }
>
> /* clean after test */
> - err = create_doorbell(guc->execbuf_client);
> - if (err) {
> - pr_err("recreate doorbell failed\n");
> - goto out;
> - }
> -
> - /*
> - * Negative test - doorbell_bitmap out of sync, will trigger a few of
> - * WARN_ON(!doorbell_ok(guc, db_id)) but that's ok as long as the
> - * doorbells from our clients don't fail.
> - */
> - bitmap_copy(db_bitmap_bk, guc->doorbell_bitmap, GUC_NUM_DOORBELLS);
> - for (i = 0; i < GUC_NUM_DOORBELLS; i++)
> - if (i % 2)
> - test_and_change_bit(i, guc->doorbell_bitmap);
> -
> - err = guc_init_doorbell_hw(guc);
> + err = reserve_doorbell(guc->execbuf_client);
> if (err) {
> - pr_err("out of sync doorbell caused an error\n");
> - goto out;
> + pr_err("failed to reserve back the doorbell back\n");
> }
> -
> - /* restore 'correct' db bitmap */
> - bitmap_copy(guc->doorbell_bitmap, db_bitmap_bk, GUC_NUM_DOORBELLS);
> - err = guc_init_doorbell_hw(guc);
> + err = create_doorbell(guc->execbuf_client);
> if (err) {
> - pr_err("restored doorbell caused an error\n");
> + pr_err("recreate doorbell failed\n");
> goto out;
> }
>
> @@ -226,8 +223,11 @@ static int igt_guc_init_doorbell_hw(void *args)
> * Leave clean state for other test, plus the driver always destroy the
> * clients during unload.
> */
> + destroy_doorbell(guc->execbuf_client);
> + destroy_doorbell(guc->preempt_client);
> guc_clients_destroy(guc);
> guc_clients_create(guc);
> + guc_clients_doorbell_init(guc);
> unlock:
> mutex_unlock(&dev_priv->drm.struct_mutex);
> return err;
> @@ -309,25 +309,7 @@ static int igt_guc_doorbells(void *arg)
>
> db_id = clients[i]->doorbell_id;
>
> - /*
> - * Client alloc gives us a doorbell, but we want to exercise
> - * this ourselves (this resembles guc_init_doorbell_hw)
> - */
> - destroy_doorbell(clients[i]);
> - if (clients[i]->doorbell_id != GUC_DOORBELL_INVALID) {
> - pr_err("[%d] destroy db did not work!\n", i);
> - err = -EINVAL;
> - goto out;
> - }
> -
> - err = __reserve_doorbell(clients[i]);
> - if (err) {
> - pr_err("[%d] Failed to reserve a doorbell\n", i);
> - goto out;
> - }
> -
> - __update_doorbell_desc(clients[i], clients[i]->doorbell_id);
> - err = __create_doorbell(clients[i]);
> + err = create_doorbell(clients[i]);
> if (err) {
> pr_err("[%d] Failed to create a doorbell\n", i);
> goto out;
> @@ -348,8 +330,10 @@ static int igt_guc_doorbells(void *arg)
>
> out:
> for (i = 0; i < ATTEMPTS; i++)
> - if (!IS_ERR_OR_NULL(clients[i]))
> + if (!IS_ERR_OR_NULL(clients[i])) {
> + destroy_doorbell(clients[i]);
> guc_client_free(clients[i]);
> + }
> unlock:
> mutex_unlock(&dev_priv->drm.struct_mutex);
> return err;
> @@ -358,7 +342,7 @@ static int igt_guc_doorbells(void *arg)
> int intel_guc_live_selftest(struct drm_i915_private *dev_priv)
> {
> static const struct i915_subtest tests[] = {
> - SUBTEST(igt_guc_init_doorbell_hw),
> + SUBTEST(igt_guc_clients),
> SUBTEST(igt_guc_doorbells),
> };
>
> --
> 2.14.3
>
More information about the Intel-gfx
mailing list