[Intel-gfx] [PATCH 15/33] drm/i915: Track pinned vma inside guc

Thu Aug 11 16:19:43 UTC 2016

On 07/08/16 15:45, Chris Wilson wrote:
> Since the guc allocates and pins and object into the GGTT for its usage,
> it is more natural to use that pinned VMA as our resource cookie.

Well it isn't really any more natural, as we hardly ever care about the 
mapping, whereas we more frequently work with the object. So it just 
seems to introduce an unnecessary extra level of indirection as we go 
from vma to object to whatever we really want.

> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |  10 +--
>  drivers/gpu/drm/i915/i915_guc_submission.c | 131 ++++++++++++++---------------
>  drivers/gpu/drm/i915/intel_guc.h           |   9 +-
>  drivers/gpu/drm/i915/intel_guc_loader.c    |   7 +-
>  4 files changed, 77 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index b41c05767def..e2a9fc353ef3 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2524,15 +2524,15 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
>  	struct drm_info_node *node = m->private;
>  	struct drm_device *dev = node->minor->dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj;
> -	u32 *log;
> +	struct drm_i915_gem_object *obj;

It is completely unnecessary (and undesirable) to rename this local. A 
variable called 'obj' could be any sort of an object, but we know that 
we are dealing with *here* is a *specific* object that holds the pages 
of GuC log data, so it should have it a name that tells us so.

>  	int i = 0, pg;
>
> -	if (!log_obj)
> +	if (!dev_priv->guc.log)
>  		return 0;
>
> -	for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) {
> -		log = kmap_atomic(i915_gem_object_get_page(log_obj, pg));
> +	obj = dev_priv->guc.log->obj;
> +	for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
> +		u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
>
>  		for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
>  			seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 03a5cef353eb..f56d68173ae6 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -183,7 +183,7 @@ static int guc_update_doorbell_id(struct intel_guc *guc,
>  				  struct i915_guc_client *client,
>  				  u16 new_id)
>  {
> -	struct sg_table *sg = guc->ctx_pool_obj->pages;
> +	struct sg_table *sg = guc->ctx_pool->obj->pages;

Hi-ho, hi-ho, it's off to RAM we go.
Notice the extra '->'

>  	void *doorbell_bitmap = guc->doorbell_bitmap;
>  	struct guc_doorbell_info *doorbell;
>  	struct guc_context_desc desc;
> @@ -325,8 +325,8 @@ static void guc_init_proc_desc(struct intel_guc *guc,
>  static void guc_init_ctx_desc(struct intel_guc *guc,
>  			      struct i915_guc_client *client)
>  {
> -	struct drm_i915_gem_object *client_obj = client->client_obj;
>  	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	struct drm_i915_gem_object *client_obj = client->client->obj;

*Ugh*

>  	struct intel_engine_cs *engine;
>  	struct i915_gem_context *ctx = client->owner;
>  	struct guc_context_desc desc;
> @@ -380,7 +380,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>  	 * The doorbell, process descriptor, and workqueue are all parts
>  	 * of the client object, which the GuC will reference via the GGTT
>  	 */
> -	gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
> +	gfx_addr = client->client->node.start;

Insufficient abstraction.

If you want VMAs to be a primary sort of thing for code that isn't 
primarily about mappings to nonetheless work with, there should be an 
abstraction layer (macros or trivial inline accessors) to retrieve the 
things that code cares about from the 'VMA'.

	gfx_addr = i915_vma_ggtt_addr(vma);	// Or something

GuC code shouldn't have to mention 'node' or any other of the internals 
of a VMA or the underlying DRM memory-manager structure.

>  	desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
>  				client->doorbell_offset;
>  	desc.db_trigger_cpu = (uintptr_t)client->client_base +
> @@ -397,7 +397,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>  	desc.desc_private = (uintptr_t)client;
>
>  	/* Pool context is pinned already */
> -	sg = guc->ctx_pool_obj->pages;
> +	sg = guc->ctx_pool->obj->pages;
>  	sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
>  			     sizeof(desc) * client->ctx_index);
>  }
> @@ -410,7 +410,7 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
>
>  	memset(&desc, 0, sizeof(desc));
>
> -	sg = guc->ctx_pool_obj->pages;
> +	sg = guc->ctx_pool->obj->pages;
>  	sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
>  			     sizeof(desc) * client->ctx_index);
>  }
> @@ -492,7 +492,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
>  	/* WQ starts from the page after doorbell / process_desc */
>  	wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
>  	wq_off &= PAGE_SIZE - 1;
> -	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page));
> +	base = kmap_atomic(i915_gem_object_get_page(gc->client->obj, wq_page));
>  	wqi = (struct guc_wq_item *)((char *)base + wq_off);
>
>  	/* Now fill in the 4-word work queue item */
> @@ -611,8 +611,8 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
>   */
>
>  /**
> - * gem_allocate_guc_obj() - Allocate gem object for GuC usage
> - * @dev_priv:	driver private data structure
> + * guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
> + * @guc:	the guc
>   * @size:	size of object
>   *
>   * This is a wrapper to create a gem obj. In order to use it inside GuC, the
> @@ -621,45 +621,49 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
>   *
>   * Return:	A drm_i915_gem_object if successful, otherwise NULL.

This comment is no longer correct.

>   */
> -static struct drm_i915_gem_object *
> -gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size)
> +static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
>  {
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
>  	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
> +	int ret;
>
>  	obj = i915_gem_object_create(&dev_priv->drm, size);
>  	if (IS_ERR(obj))
> -		return NULL;
> +		return ERR_CAST(obj);
>
> -	if (i915_gem_object_get_pages(obj)) {
> -		i915_gem_object_put(obj);
> -		return NULL;
> -	}
> +	vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
> +	if (IS_ERR(vma))
> +		goto err;
>
> -	if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
> -				     PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
> -		i915_gem_object_put(obj);
> -		return NULL;
> +	ret = i915_vma_pin(vma, 0, PAGE_SIZE,
> +			   PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
> +	if (ret) {
> +		vma = ERR_PTR(ret);
> +		goto err;
>  	}
>
>  	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
>  	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
>
> -	return obj;
> +	return vma;
> +
> +err:
> +	i915_gem_object_put(obj);
> +	return vma;
>  }
>
>  /**
> - * gem_release_guc_obj() - Release gem object allocated for GuC usage
> - * @obj:	gem obj to be released
> + * guc_release_vma() - Release gem object allocated for GuC usage
> + * @vma:	gem obj to be released
>   */
> -static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
> +static void guc_release_vma(struct i915_vma *vma)
>  {
> -	if (!obj)
> +	if (!vma)
>  		return;
>
> -	if (i915_gem_obj_is_pinned(obj))
> -		i915_gem_object_ggtt_unpin(obj);
> -
> -	i915_gem_object_put(obj);
> +	i915_vma_unpin(vma);
> +	i915_gem_object_put(vma->obj);
>  }
>
>  static void
> @@ -686,7 +690,7 @@ guc_client_free(struct drm_i915_private *dev_priv,
>  		kunmap(kmap_to_page(client->client_base));
>  	}
>
> -	gem_release_guc_obj(client->client_obj);
> +	guc_release_vma(client->client);
>
>  	if (client->ctx_index != GUC_INVALID_CTX_ID) {
>  		guc_fini_ctx_desc(guc, client);
> @@ -757,7 +761,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
>  {
>  	struct i915_guc_client *client;
>  	struct intel_guc *guc = &dev_priv->guc;
> -	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
>  	uint16_t db_id;
>
>  	client = kzalloc(sizeof(*client), GFP_KERNEL);
> @@ -777,13 +781,13 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
>  	}
>
>  	/* The first page is doorbell/proc_desc. Two followed pages are wq. */
> -	obj = gem_allocate_guc_obj(dev_priv, GUC_DB_SIZE + GUC_WQ_SIZE);
> -	if (!obj)
> +	vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
> +	if (IS_ERR(vma))
>  		goto err;
>
>  	/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
> -	client->client_obj = obj;
> -	client->client_base = kmap(i915_gem_object_get_page(obj, 0));
> +	client->client = vma;
> +	client->client_base = kmap(i915_gem_object_get_page(vma->obj, 0));
>  	client->wq_offset = GUC_DB_SIZE;
>  	client->wq_size = GUC_WQ_SIZE;
>
> @@ -825,8 +829,7 @@ err:
>
>  static void guc_create_log(struct intel_guc *guc)
>  {
> -	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> -	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
>  	unsigned long offset;
>  	uint32_t size, flags;
>
> @@ -842,16 +845,16 @@ static void guc_create_log(struct intel_guc *guc)
>  		GUC_LOG_ISR_PAGES + 1 +
>  		GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
>
> -	obj = guc->log_obj;
> -	if (!obj) {
> -		obj = gem_allocate_guc_obj(dev_priv, size);
> -		if (!obj) {
> +	vma = guc->log;
> +	if (!vma) {
> +		vma = guc_allocate_vma(guc, size);
> +		if (IS_ERR(vma)) {
>  			/* logging will be off */
>  			i915.guc_log_level = -1;
>  			return;
>  		}
>
> -		guc->log_obj = obj;
> +		guc->log = vma;
>  	}
>
>  	/* each allocated unit is a page */
> @@ -860,7 +863,7 @@ static void guc_create_log(struct intel_guc *guc)
>  		(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
>  		(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
>
> -	offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */
> +	offset = vma->node.start >> PAGE_SHIFT; /* in pages */
>  	guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
>  }
>
> @@ -889,7 +892,7 @@ static void init_guc_policies(struct guc_policies *policies)
>  static void guc_create_ads(struct intel_guc *guc)
>  {
>  	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> -	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
>  	struct guc_ads *ads;
>  	struct guc_policies *policies;
>  	struct guc_mmio_reg_state *reg_state;
> @@ -902,16 +905,16 @@ static void guc_create_ads(struct intel_guc *guc)
>  			sizeof(struct guc_mmio_reg_state) +
>  			GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE;
>
> -	obj = guc->ads_obj;
> -	if (!obj) {
> -		obj = gem_allocate_guc_obj(dev_priv, PAGE_ALIGN(size));
> -		if (!obj)
> +	vma = guc->ads;
> +	if (!vma) {
> +		vma = guc_allocate_vma(guc, PAGE_ALIGN(size));
> +		if (IS_ERR(vma))
>  			return;
>
> -		guc->ads_obj = obj;
> +		guc->ads = vma;
>  	}
>
> -	page = i915_gem_object_get_page(obj, 0);
> +	page = i915_gem_object_get_page(vma->obj, 0);
>  	ads = kmap(page);

Changing the names & types in the top-level structure leads to confusion 
here, as the member 'guc->ads' and the existing local 'ads' now have the 
same name but quite different types.

>  	/*
> @@ -931,8 +934,7 @@ static void guc_create_ads(struct intel_guc *guc)
>  	policies = (void *)ads + sizeof(struct guc_ads);
>  	init_guc_policies(policies);
>
> -	ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) +
> -			sizeof(struct guc_ads);
> +	ads->scheduler_policies = vma->node.start + sizeof(struct guc_ads);
>
>  	/* MMIO reg state */
>  	reg_state = (void *)policies + sizeof(struct guc_policies);
> @@ -960,10 +962,9 @@ static void guc_create_ads(struct intel_guc *guc)
>   */
>  int i915_guc_submission_init(struct drm_i915_private *dev_priv)
>  {
> -	const size_t ctxsize = sizeof(struct guc_context_desc);
> -	const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
> -	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
>  	struct intel_guc *guc = &dev_priv->guc;
> +	struct i915_vma *vma;
> +	u32 size;
>
>  	/* Wipe bitmap & delete client in case of reinitialisation */
>  	bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
> @@ -972,13 +973,15 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv)
>  	if (!i915.enable_guc_submission)
>  		return 0; /* not enabled  */
>
> -	if (guc->ctx_pool_obj)
> +	if (guc->ctx_pool)
>  		return 0; /* already allocated */
>
> -	guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv, gemsize);
> -	if (!guc->ctx_pool_obj)
> -		return -ENOMEM;
> +	size = PAGE_ALIGN(GUC_MAX_GPU_CONTEXTS*sizeof(struct guc_context_desc));

What a long ugly line :(

Breaking it into the 'const's at the top of the function made it easier 
to follow the stages of the calculation AND was at least as efficient, 
as the compiler folded the whole calculation into a single constant in 
the [deleted] call to gem_allocate_guc_obj() above.

> +	vma = guc_allocate_vma(guc, size);
> +	if (IS_ERR(vma))
> +		return PTR_ERR(vma);
>
> +	guc->ctx_pool = vma;
>  	ida_init(&guc->ctx_ids);
>  	guc_create_log(guc);
>  	guc_create_ads(guc);
> @@ -1030,16 +1033,12 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
>  {
>  	struct intel_guc *guc = &dev_priv->guc;
>
> -	gem_release_guc_obj(dev_priv->guc.ads_obj);
> -	guc->ads_obj = NULL;
> -
> -	gem_release_guc_obj(dev_priv->guc.log_obj);
> -	guc->log_obj = NULL;
> +	guc_release_vma(nullify(&guc->ads));
> +	guc_release_vma(nullify(&guc->log));

I think this is a very ugly way of hiding the clearing of the pointers.
If you want to manage references like this, it could *possibly* be a macro:

	guc_release_vma(ZAP_AFTER_USE(guc->log));

*without* the '&' so the argument has to be an lvalue; or it could more 
clearly be done by having the releasing function take a pointer to the 
pointer-to-object, which it would clear after releasing the object and 
before returning to the caller.

	guc_release_vma_ref(&guc->ads);	// also clears guc->ads

But I think even that is not as good as explicitly clearing the pointer 
immediately after the call to release(). If K&R had thought having a way 
to implicitly clear a pointer after using it was a good idea, they'd 
have put it into the language:

	struct foo *new = *saved!;	// Dereference & clear 'saved'

But they didn't, so we probably shouldn't invent one.

> -	if (guc->ctx_pool_obj)
> +	if (guc->ctx_pool)
>  		ida_destroy(&guc->ctx_ids);
> -	gem_release_guc_obj(guc->ctx_pool_obj);
> -	guc->ctx_pool_obj = NULL;
> +	guc_release_vma(nullify(&guc->ctx_pool));
>  }
>
>  /**
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 623cf26cd784..a8da563cadb7 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -63,7 +63,7 @@ struct drm_i915_gem_request;
>   *   retcode: errno from last guc_submit()
>   */
>  struct i915_guc_client {
> -	struct drm_i915_gem_object *client_obj;
> +	struct i915_vma *client;

We can't call this vma 'client' because that is the name commonly used 
for an instance of the i915_guc_client class. x->client->client->y is 
just horrible. You could call it 'client_vma', I suppose.

>  	void *client_base;		/* first page (only) of above	*/
>  	struct i915_gem_context *owner;
>  	struct intel_guc *guc;
> @@ -125,11 +125,10 @@ struct intel_guc_fw {
>  struct intel_guc {
>  	struct intel_guc_fw guc_fw;
>  	uint32_t log_flags;
> -	struct drm_i915_gem_object *log_obj;
> +	struct i915_vma *log;

Changing the name to 'log_vma' would be better, since I'd expect 
something called just 'log' to actually BE a log -- or at most a pointer 
to a log -- not just a pointer to something containing a pointer to 
another thing that contains a pointer to a list of pages that eventually 
hold the log data.

Ditto for the other names below.

> -	struct drm_i915_gem_object *ads_obj;
> -
> -	struct drm_i915_gem_object *ctx_pool_obj;
> +	struct i915_vma *ads;
> +	struct i915_vma *ctx_pool;
>  	struct ida ctx_ids;
>
>  	struct i915_guc_client *execbuf_client;
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index 3763e30cc165..58ef4418a2ef 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -181,16 +181,15 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
>  			i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
>  	}
>
> -	if (guc->ads_obj) {
> -		u32 ads = (u32)i915_gem_obj_ggtt_offset(guc->ads_obj)
> -				>> PAGE_SHIFT;
> +	if (guc->ads) {
> +		u32 ads = (u32)guc->ads->node.start >> PAGE_SHIFT;
>  		params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
>  		params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
>  	}
>
>  	/* If GuC submission is enabled, set up additional parameters here */
>  	if (i915.enable_guc_submission) {
> -		u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
> +		u32 pgs = dev_priv->guc.ctx_pool->node.start;
>  		u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
>
>  		pgs >>= PAGE_SHIFT;

Summary: I'm not totally opposed to using VMAs more generally, but here 
there just seem to be extra costs with no offsetting advantages; and the 
details of some of the above changes are just plain ugly.

If the naming and abstraction issues were resolved, the remaining 
conversions would not in themselves be too objectionable, because either
* the extra cycles don't matter (in rarely executed code), or
* we can add extra direct pointers or other cached values in the 
top-level data structures to avoid deep memory chains where necessary.

.Dave.