[Intel-gfx] [PATCH 26/33] drm/i915: Track pinned VMA

Joonas Lahtinen joonas.lahtinen at linux.intel.com
Thu Aug 11 12:18:44 UTC 2016


On su, 2016-08-07 at 15:45 +0100, Chris Wilson wrote:
> @@ -1616,7 +1618,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>  
>  /**
>   * i915_gem_fault - fault a page into the GTT
> - * @vma: VMA in question
> + * @mm: VMA in question

should be @vm or whatever the correct name.

>   * @vmf: fault info
>   *
>   * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
> @@ -1630,20 +1632,21 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>   * suffer if the GTT working set is large or there are few fence registers
>   * left.
>   */
> -int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +int i915_gem_fault(struct vm_area_struct *vm, struct vm_fault *vmf)

'vm' is used elsewhere for the address space, maybe 'kvma'? Or 'area'
(used in linux/mm.h too occasionally)

> @@ -1722,13 +1726,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	} else {
>  		if (!obj->fault_mappable) {
>  			unsigned long size = min_t(unsigned long,
> -						   vma->vm_end - vma->vm_start,
> +						   vm->vm_end - vm->vm_start,
>  						   obj->base.size);
>  			int i;
>  
>  			for (i = 0; i < size >> PAGE_SHIFT; i++) {
> -				ret = vm_insert_pfn(vma,
> -						    (unsigned long)vma->vm_start + i * PAGE_SIZE,
> +				ret = vm_insert_pfn(vm,
> +						    (unsigned long)vm->vm_start + i * PAGE_SIZE,

Hmm, vm->vm_start is already unsigned long, so cast could be
eliminated.

>  						    pfn + i);
>  				if (ret)
>  					break;
> @@ -1736,12 +1740,12 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  
>  			obj->fault_mappable = true;
>  		} else
> -			ret = vm_insert_pfn(vma,
> +			ret = vm_insert_pfn(vm,
>  					    (unsigned long)vmf->virtual_address,
>  					    pfn + page_offset);
>  	}
>  err_unpin:
> -	i915_gem_object_ggtt_unpin_view(obj, &view);
> +	__i915_vma_unpin(vma);
>  err_unlock:
>  	mutex_unlock(&dev->struct_mutex);
>  err_rpm:
> @@ -3190,7 +3194,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  					    old_write_domain);
>  
>  	/* And bump the LRU for this access */
> -	vma = i915_gem_obj_to_ggtt(obj);
> +	vma = i915_gem_object_to_ggtt(obj, NULL);
>  	if (vma &&
>  	    drm_mm_node_allocated(&vma->node) &&
>  	    !i915_vma_is_active(vma))
> @@ -3414,11 +3418,12 @@ rpm_put:
>   * Can be called from an uninterruptible phase (modesetting) and allows
>   * any flushes to be pipelined (for pageflips).
>   */
> -int
> +struct i915_vma *
>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  				     u32 alignment,
>  				     const struct i915_ggtt_view *view)
>  {
> +	struct i915_vma *vma;
>  	u32 old_read_domains, old_write_domain;
>  	int ret;
>  
> @@ -3438,19 +3443,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 */
>  	ret = i915_gem_object_set_cache_level(obj,
>  					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
> -	if (ret)
> +	if (ret) {
> +		vma = ERR_PTR(ret);
>  		goto err_unpin_display;
> +	}
>  
>  	/* As the user may map the buffer once pinned in the display plane
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
>  	 * always use map_and_fenceable for all scanout buffers.
>  	 */
> -	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
> +	vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
>  				       view->type == I915_GGTT_VIEW_NORMAL ?
>  				       PIN_MAPPABLE : 0);
> -	if (ret)
> +	if (IS_ERR(vma))
>  		goto err_unpin_display;
>  
> +	WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
> +
>  	i915_gem_object_flush_cpu_write_domain(obj);
>  
>  	old_write_domain = obj->base.write_domain;
> @@ -3466,23 +3475,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  					    old_read_domains,
>  					    old_write_domain);
>  
> -	return 0;
> +	return vma;
>  
>  err_unpin_display:
>  	obj->pin_display--;
> -	return ret;
> +	return vma;
>  }
>  
>  void
> -i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
> -					 const struct i915_ggtt_view *view)
> +i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
>  {
> -	if (WARN_ON(obj->pin_display == 0))
> +	if (WARN_ON(vma->obj->pin_display == 0))
>  		return;
>  
> -	i915_gem_object_ggtt_unpin_view(obj, view);
> +	vma->obj->pin_display--;
>  
> -	obj->pin_display--;
> +	i915_vma_unpin(vma);
> +	WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
>  }
>  
>  /**
> @@ -3679,27 +3688,25 @@ err:
>  	return ret;
>  }
>  
> -int
> +struct i915_vma *
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> -			 const struct i915_ggtt_view *view,
> +			 const struct i915_ggtt_view *ggtt_view,

Hmm, why distinctive name compared to other places? This function has
_ggtt_ in the name, so should be implicit.

>  			 u64 size,
>  			 u64 alignment,
>  			 u64 flags)
>  {

<SNIP>

> -/* All the new VM stuff */

Oh noes, we destroy all the new stuff :P

> @@ -349,30 +349,34 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
>  		   struct drm_i915_gem_relocation_entry *reloc,
>  		   uint64_t target_offset)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> +	struct i915_vma *vma;
>  	uint64_t delta = relocation_target(reloc, target_offset);
>  	uint64_t offset;
>  	void __iomem *reloc_page;
>  	int ret;
>  
> +	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
> +	if (IS_ERR(vma))
> +		return PTR_ERR(vma);
> +
>  	ret = i915_gem_object_set_to_gtt_domain(obj, true);
>  	if (ret)
> -		return ret;
> +		goto unpin;
>  
>  	ret = i915_gem_object_put_fence(obj);
>  	if (ret)
> -		return ret;
> +		goto unpin;
>  
>  	/* Map the page containing the relocation we're going to perform.  */
> -	offset = i915_gem_obj_ggtt_offset(obj);
> +	offset = vma->node.start;
>  	offset += reloc->offset;

Could concatenate to previous line now;

offset = vma->node.start + reloc->offset;

> -static struct i915_vma*
> +static struct i915_vma *
>  i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
>  			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
>  			  struct drm_i915_gem_object *batch_obj,
> @@ -1305,31 +1311,30 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
>  				      batch_start_offset,
>  				      batch_len,
>  				      is_master);
> -	if (ret)
> +	if (ret) {
> +		if (ret == -EACCES) /* unhandled chained batch */
> +			vma = NULL;
> +		else
> +			vma = ERR_PTR(ret);
>  		goto err;
> +	}
>  
> -	ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
> -	if (ret)
> +	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);

Hmm, 'err' label no longer cares about ret, so this is redundant. Or
should the ret-to-vma translation been kept at the end?

>  		goto err;
> -
> -	i915_gem_object_unpin_pages(shadow_batch_obj);
> +	}
>  
>  	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
>  
> -	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
>  	vma->exec_entry = shadow_exec_entry;
>  	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
>  	i915_gem_object_get(shadow_batch_obj);
>  	list_add_tail(&vma->exec_list, &eb->vmas);
>  
> -	return vma;
> -
>  err:
>  	i915_gem_object_unpin_pages(shadow_batch_obj);
> -	if (ret == -EACCES) /* unhandled chained batch */
> -		return NULL;
> -	else
> -		return ERR_PTR(ret);
> +	return vma;
>  }
>  

<SNIP>

> @@ -432,13 +432,7 @@ bool
>  i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
>  {
>  	if (obj->fence_reg != I915_FENCE_REG_NONE) {
> -		struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> -		struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
> -
> -		WARN_ON(!ggtt_vma ||
> -			dev_priv->fence_regs[obj->fence_reg].pin_count >
> -			i915_vma_pin_count(ggtt_vma));

Is this WARN_ON deliberately removed, not worthy GEM_BUG_ON?

> -		dev_priv->fence_regs[obj->fence_reg].pin_count++;
> +		to_i915(obj->base.dev)->fence_regs[obj->fence_reg].pin_count++;

This is not the most readable line of code one can imagine. dev_priv
alias does make readability better occasionally.

> @@ -3351,14 +3351,10 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
>  
>  	GEM_BUG_ON(vm->closed);
>  
> -	if (WARN_ON(i915_is_ggtt(vm) != !!view))
> -		return ERR_PTR(-EINVAL);
> -
>  	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
>  	if (vma == NULL)
>  		return ERR_PTR(-ENOMEM);
>  
> -	INIT_LIST_HEAD(&vma->obj_link);

This disappears completely?

> @@ -3378,56 +3373,76 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
> +static inline bool vma_matches(struct i915_vma *vma,
> +			       struct i915_address_space *vm,
> +			       const struct i915_ggtt_view *view)

Function name is not clearest. But I can not suggest a better one off
the top of my head.
 
>  static struct drm_i915_error_object *
>  i915_error_object_create(struct drm_i915_private *dev_priv,
> -			 struct drm_i915_gem_object *src,
> -			 struct i915_address_space *vm)
> +			 struct i915_vma *vma)
>  {
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> +	struct drm_i915_gem_object *src;
>  	struct drm_i915_error_object *dst;
> -	struct i915_vma *vma = NULL;
>  	int num_pages;
>  	bool use_ggtt;
>  	int i = 0;
>  	u64 reloc_offset;
>  
> -	if (src == NULL || src->pages == NULL)
> +	if (!vma)
> +		return NULL;
> +
> +	src = vma->obj;
> +	if (!src->pages)
>  		return NULL;
>  
>  	num_pages = src->base.size >> PAGE_SHIFT;
>  
>  	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
> -	if (dst == NULL)
> +	if (!dst)
>  		return NULL;
>  
> -	if (i915_gem_obj_bound(src, vm))
> -		dst->gtt_offset = i915_gem_obj_offset(src, vm);
> -	else
> -		dst->gtt_offset = -1;

What was the purpose of this line previously, the calculations would
get majestically messed up?

> @@ -1035,11 +1029,8 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
>  	struct drm_i915_gem_request *request;
>  	int i, count;
>  
> -	if (dev_priv->semaphore) {
> -		error->semaphore =
> -			i915_error_ggtt_object_create(dev_priv,
> -						      dev_priv->semaphore->obj);
> -	}
> +	error->semaphore =
> +		i915_error_object_create(dev_priv, dev_priv->semaphore);

Not sure if I like hiding the NULL tolerancy inside the function.

> @@ -2240,10 +2240,11 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>  	 */
>  	intel_runtime_pm_get(dev_priv);
>  
> -	ret = i915_gem_object_pin_to_display_plane(obj, alignment,
> -						   &view);
> -	if (ret)
> +	vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);

Other places there's return vma in the error path too, might be cleaner
here too as there's already translation to -EBUSY in the ret error use.

>  		goto err_pm;
> +	}
>  
>  	/* Install a fence for tiled scan-out. Pre-i965 always needs a
>  	 * fence, whereas 965+ only requires a fence if using
> @@ -2270,19 +2271,20 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
>  	}
>  
>  	intel_runtime_pm_put(dev_priv);
> -	return 0;
> +	return vma;
>  
>  err_unpin:
> -	i915_gem_object_unpin_from_display_plane(obj, &view);
> +	i915_gem_object_unpin_from_display_plane(vma);
>  err_pm:
>  	intel_runtime_pm_put(dev_priv);
> -	return ret;
> +	return ERR_PTR(ret);
>  }
>  

<SNIP>

> @@ -2291,7 +2293,8 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>  	if (view.type == I915_GGTT_VIEW_NORMAL)
>  		i915_gem_object_unpin_fence(obj);
>  
> -	i915_gem_object_unpin_from_display_plane(obj, &view);
> +	vma = i915_gem_object_to_ggtt(obj, &view);

GEM_BUG_ON(!vma)?

> +	i915_gem_object_unpin_from_display_plane(vma);
>  }
>  
>  /*
> @@ -2552,7 +2555,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
>  			continue;
>  
>  		obj = intel_fb_obj(fb);
> -		if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
> +		if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) {
>  			drm_framebuffer_reference(fb);
>  			goto valid_fb;
>  		}
> @@ -2709,11 +2712,11 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
>  	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
>  	if (INTEL_INFO(dev)->gen >= 4) {
>  		I915_WRITE(DSPSURF(plane),
> -			   i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
> +			   i915_gem_object_ggtt_offset(obj, NULL) + intel_crtc->dspaddr_offset);

As discussed in IRC, this function to be removed further down the path.

> @@ -216,17 +215,17 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  		sizes->fb_height = intel_fb->base.height;
>  	}
>  
> -	obj = intel_fb->obj;
> -
>  	mutex_lock(&dev->struct_mutex);
>  
>  	/* Pin the GGTT vma for our access via info->screen_base.
>  	 * This also validates that any existing fb inherited from the
>  	 * BIOS is suitable for own access.
>  	 */
> -	ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0));
> -	if (ret)
> +	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0));

Needs rebasing, BIT(DRM_ROTATE_0) is now just DRM_ROTATE_0.

> @@ -1443,8 +1443,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
>  	return;
>  
>  out_unpin_bo:
> -	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
> -		i915_gem_object_ggtt_unpin(reg_bo);
> +	if (vma)
> +		i915_vma_unpin(vma);

This might be the only acceptable use of if () in teardown path.

I hope there is an excellent revision listing in the next iteration.
This was a pain to go through.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation


More information about the Intel-gfx mailing list