[Intel-gfx] [PATCH 06/11] drm/i915: plumb VM into object operations

Daniel Vetter daniel at ffwll.ch
Fri Jul 12 08:26:07 CEST 2013


On Thu, Jul 11, 2013 at 07:23:08PM -0700, Ben Widawsky wrote:
> On Tue, Jul 09, 2013 at 09:15:01AM +0200, Daniel Vetter wrote:
> > On Mon, Jul 08, 2013 at 11:08:37PM -0700, Ben Widawsky wrote:

[snip]

> > > index 058ad44..21015cd 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -38,10 +38,12 @@
> > >  
> > >  static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
> > >  static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
> > > -static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
> > > -						    unsigned alignment,
> > > -						    bool map_and_fenceable,
> > > -						    bool nonblocking);
> > > +static __must_check int
> > > +i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
> > > +			    struct i915_address_space *vm,
> > > +			    unsigned alignment,
> > > +			    bool map_and_fenceable,
> > > +			    bool nonblocking);
> > >  static int i915_gem_phys_pwrite(struct drm_device *dev,
> > >  				struct drm_i915_gem_object *obj,
> > >  				struct drm_i915_gem_pwrite *args,
> > > @@ -135,7 +137,7 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
> > >  static inline bool
> > >  i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
> > >  {
> > > -	return i915_gem_obj_ggtt_bound(obj) && !obj->active;
> > > +	return i915_gem_obj_bound_any(obj) && !obj->active;
> > >  }
> > >  
> > >  int
> > > @@ -422,7 +424,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
> > >  		 * anyway again before the next pread happens. */
> > >  		if (obj->cache_level == I915_CACHE_NONE)
> > >  			needs_clflush = 1;
> > > -		if (i915_gem_obj_ggtt_bound(obj)) {
> > > +		if (i915_gem_obj_bound_any(obj)) {
> > >  			ret = i915_gem_object_set_to_gtt_domain(obj, false);
> > 
> > This is essentially a very convoluted version of "if there's gpu rendering
> > outstanding, please wait for it". Maybe we should switch this to
> > 
> > 	if (obj->active)
> > 		wait_rendering(obj, true);
> > 
> > Same for the shmem_pwrite case below. Would be a separate patch to prep
> > things though. Can I volunteer you for that? The ugly part is to review
> > whether any of the lru list updating that set_domain does in addition to
> > wait_rendering is required, but on a quick read that's not the case.
> 
> Just reading the comment above it says we need the clflush. I don't
> actually understand why we do that even after reading the comment, but
> meh. You tell me, I don't mind doing this as a prep first.

The comment right above is just for the needs_clflush = 1 assignment, the
set_to_gtt_domain call afterwards is just to sync up with the gpu. The
code is confusing and tricky and the lack of a white line in between the
two things plus a comment explaining that we only care about the
wait_rendering side-effect of set_to_gtt_domain doesn't help. If you do
the proposed conversion (and add a white line) that should help a lot in
unconfusing readers.

[snip]

> > > @@ -3333,12 +3376,15 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> > >  	}
> > >  
> > >  	if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
> > > -		ret = i915_gem_object_unbind(obj);
> > > +		ret = i915_gem_object_unbind(obj, vm);
> > >  		if (ret)
> > >  			return ret;
> > >  	}
> > >  
> > > -	if (i915_gem_obj_ggtt_bound(obj)) {
> > > +	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
> > > +		if (!i915_gem_obj_bound(obj, vm))
> > > +			continue;
> > 
> > Hm, shouldn't we have a per-object list of vmas? Or will that follow later
> > on?
> > 
> > Self-correction: It exists already ... why can't we use this here?
> 
> Yes. That should work, I'll fix it and test it. It looks slightly worse
> IMO in terms of code clarity, but I don't mind the change.

Actually I think it'd gain in clarity, doing pte updatest (which
set_cache_level does) on the vma instead of the (obj, vm) pair feels more
natural. And we'd be able to drop lots of (obj, vm) -> vma lookups here.

> 
> > 
> > > +
> > >  		ret = i915_gem_object_finish_gpu(obj);
> > >  		if (ret)
> > >  			return ret;
> > > @@ -3361,7 +3407,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> > >  			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> > >  					       obj, cache_level);
> > >  
> > > -		i915_gem_obj_ggtt_set_color(obj, cache_level);
> > > +		i915_gem_obj_set_color(obj, vm, cache_level);
> > >  	}
> > >  
> > >  	if (cache_level == I915_CACHE_NONE) {
> > > @@ -3421,6 +3467,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
> > >  			       struct drm_file *file)
> > >  {
> > >  	struct drm_i915_gem_caching *args = data;
> > > +	struct drm_i915_private *dev_priv;
> > >  	struct drm_i915_gem_object *obj;
> > >  	enum i915_cache_level level;
> > >  	int ret;
> > > @@ -3445,8 +3492,10 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
> > >  		ret = -ENOENT;
> > >  		goto unlock;
> > >  	}
> > > +	dev_priv = obj->base.dev->dev_private;
> > >  
> > > -	ret = i915_gem_object_set_cache_level(obj, level);
> > > +	/* FIXME: Add interface for specific VM? */
> > > +	ret = i915_gem_object_set_cache_level(obj, &dev_priv->gtt.base, level);
> > >  
> > >  	drm_gem_object_unreference(&obj->base);
> > >  unlock:
> > > @@ -3464,6 +3513,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> > >  				     u32 alignment,
> > >  				     struct intel_ring_buffer *pipelined)
> > >  {
> > > +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> > >  	u32 old_read_domains, old_write_domain;
> > >  	int ret;
> > >  
> > > @@ -3482,7 +3532,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> > >  	 * of uncaching, which would allow us to flush all the LLC-cached data
> > >  	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
> > >  	 */
> > > -	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
> > > +	ret = i915_gem_object_set_cache_level(obj, &dev_priv->gtt.base,
> > > +					      I915_CACHE_NONE);
> > >  	if (ret)
> > >  		return ret;
> > >  
> > > @@ -3490,7 +3541,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> > >  	 * (e.g. libkms for the bootup splash), we have to ensure that we
> > >  	 * always use map_and_fenceable for all scanout buffers.
> > >  	 */
> > > -	ret = i915_gem_object_pin(obj, alignment, true, false);
> > > +	ret = i915_gem_ggtt_pin(obj, alignment, true, false);
> > >  	if (ret)
> > >  		return ret;
> > >  
> > > @@ -3633,6 +3684,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
> > >  
> > >  int
> > >  i915_gem_object_pin(struct drm_i915_gem_object *obj,
> > > +		    struct i915_address_space *vm,
> > >  		    uint32_t alignment,
> > >  		    bool map_and_fenceable,
> > >  		    bool nonblocking)
> > > @@ -3642,26 +3694,29 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
> > >  	if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
> > >  		return -EBUSY;
> > >  
> > > -	if (i915_gem_obj_ggtt_bound(obj)) {
> > > -		if ((alignment && i915_gem_obj_ggtt_offset(obj) & (alignment - 1)) ||
> > > +	BUG_ON(map_and_fenceable && !i915_is_ggtt(vm));
> > 
> > WARN_ON, since presumably we can keep on going if we get this wrong
> > (albeit with slightly corrupted state, so render corruptions might
> > follow).
> 
> Can we make a deal, can we leave this as BUG_ON with a FIXME to convert
> it at the end of merging?

Adding a FIXME right above it will cause equal amounts of conflicts, so I
don't see the point that much ...

> 
> > 
> > > +
> > > +	if (i915_gem_obj_bound(obj, vm)) {
> > > +		if ((alignment &&
> > > +		     i915_gem_obj_offset(obj, vm) & (alignment - 1)) ||
> > >  		    (map_and_fenceable && !obj->map_and_fenceable)) {
> > >  			WARN(obj->pin_count,
> > >  			     "bo is already pinned with incorrect alignment:"
> > >  			     " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
> > >  			     " obj->map_and_fenceable=%d\n",
> > > -			     i915_gem_obj_ggtt_offset(obj), alignment,
> > > +			     i915_gem_obj_offset(obj, vm), alignment,
> > >  			     map_and_fenceable,
> > >  			     obj->map_and_fenceable);
> > > -			ret = i915_gem_object_unbind(obj);
> > > +			ret = i915_gem_object_unbind(obj, vm);
> > >  			if (ret)
> > >  				return ret;
> > >  		}
> > >  	}
> > >  
> > > -	if (!i915_gem_obj_ggtt_bound(obj)) {
> > > +	if (!i915_gem_obj_bound(obj, vm)) {
> > >  		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> > >  
> > > -		ret = i915_gem_object_bind_to_gtt(obj, alignment,
> > > +		ret = i915_gem_object_bind_to_gtt(obj, vm, alignment,
> > >  						  map_and_fenceable,
> > >  						  nonblocking);
> > >  		if (ret)
> > > @@ -3684,7 +3739,7 @@ void
> > >  i915_gem_object_unpin(struct drm_i915_gem_object *obj)
> > >  {
> > >  	BUG_ON(obj->pin_count == 0);
> > > -	BUG_ON(!i915_gem_obj_ggtt_bound(obj));
> > > +	BUG_ON(!i915_gem_obj_bound_any(obj));
> > >  
> > >  	if (--obj->pin_count == 0)
> > >  		obj->pin_mappable = false;
> > > @@ -3722,7 +3777,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
> > >  	}
> > >  
> > >  	if (obj->user_pin_count == 0) {
> > > -		ret = i915_gem_object_pin(obj, args->alignment, true, false);
> > > +		ret = i915_gem_ggtt_pin(obj, args->alignment, true, false);
> > >  		if (ret)
> > >  			goto out;
> > >  	}
> > > @@ -3957,6 +4012,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> > >  	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
> > >  	struct drm_device *dev = obj->base.dev;
> > >  	drm_i915_private_t *dev_priv = dev->dev_private;
> > > +	struct i915_vma *vma, *next;
> > >  
> > >  	trace_i915_gem_object_destroy(obj);
> > >  
> > > @@ -3964,15 +4020,21 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> > >  		i915_gem_detach_phys_object(dev, obj);
> > >  
> > >  	obj->pin_count = 0;
> > > -	if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
> > > -		bool was_interruptible;
> > > +	/* NB: 0 or 1 elements */
> > > +	WARN_ON(!list_empty(&obj->vma_list) &&
> > > +		!list_is_singular(&obj->vma_list));
> > > +	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
> > > +		int ret = i915_gem_object_unbind(obj, vma->vm);
> > > +		if (WARN_ON(ret == -ERESTARTSYS)) {
> > > +			bool was_interruptible;
> > >  
> > > -		was_interruptible = dev_priv->mm.interruptible;
> > > -		dev_priv->mm.interruptible = false;
> > > +			was_interruptible = dev_priv->mm.interruptible;
> > > +			dev_priv->mm.interruptible = false;
> > >  
> > > -		WARN_ON(i915_gem_object_unbind(obj));
> > > +			WARN_ON(i915_gem_object_unbind(obj, vma->vm));
> > >  
> > > -		dev_priv->mm.interruptible = was_interruptible;
> > > +			dev_priv->mm.interruptible = was_interruptible;
> > > +		}
> > >  	}
> > >  
> > >  	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
> > > @@ -4332,6 +4394,16 @@ init_ring_lists(struct intel_ring_buffer *ring)
> > >  	INIT_LIST_HEAD(&ring->request_list);
> > >  }
> > >  
> > > +static void i915_init_vm(struct drm_i915_private *dev_priv,
> > > +			 struct i915_address_space *vm)
> > > +{
> > > +	vm->dev = dev_priv->dev;
> > > +	INIT_LIST_HEAD(&vm->active_list);
> > > +	INIT_LIST_HEAD(&vm->inactive_list);
> > > +	INIT_LIST_HEAD(&vm->global_link);
> > > +	list_add(&vm->global_link, &dev_priv->vm_list);
> > > +}
> > > +
> > >  void
> > >  i915_gem_load(struct drm_device *dev)
> > >  {
> > > @@ -4344,8 +4416,9 @@ i915_gem_load(struct drm_device *dev)
> > >  				  SLAB_HWCACHE_ALIGN,
> > >  				  NULL);
> > >  
> > > -	INIT_LIST_HEAD(&dev_priv->gtt.base.active_list);
> > > -	INIT_LIST_HEAD(&dev_priv->gtt.base.inactive_list);
> > > +	INIT_LIST_HEAD(&dev_priv->vm_list);
> > > +	i915_init_vm(dev_priv, &dev_priv->gtt.base);
> > > +
> > >  	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
> > >  	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
> > >  	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
> > > @@ -4616,9 +4689,9 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
> > >  			     struct drm_i915_private,
> > >  			     mm.inactive_shrinker);
> > >  	struct drm_device *dev = dev_priv->dev;
> > > -	struct i915_address_space *vm = &dev_priv->gtt.base;
> > > +	struct i915_address_space *vm;
> > >  	struct drm_i915_gem_object *obj;
> > > -	int nr_to_scan = sc->nr_to_scan;
> > > +	int nr_to_scan;
> > >  	bool unlock = true;
> > >  	int cnt;
> > >  
> > > @@ -4632,6 +4705,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
> > >  		unlock = false;
> > >  	}
> > >  
> > > +	nr_to_scan = sc->nr_to_scan;
> > >  	if (nr_to_scan) {
> > >  		nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
> > >  		if (nr_to_scan > 0)
> > > @@ -4645,11 +4719,93 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
> > >  	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
> > >  		if (obj->pages_pin_count == 0)
> > >  			cnt += obj->base.size >> PAGE_SHIFT;
> > > -	list_for_each_entry(obj, &vm->inactive_list, mm_list)
> > > -		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
> > > -			cnt += obj->base.size >> PAGE_SHIFT;
> > > +
> > > +	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
> > > +		list_for_each_entry(obj, &vm->inactive_list, global_list)
> > > +			if (obj->pin_count == 0 && obj->pages_pin_count == 0)
> > > +				cnt += obj->base.size >> PAGE_SHIFT;
> > 
> > Isn't this now double-counting objects? In the shrinker we only care about
> > how much physical RAM an object occupies, not how much virtual space it
> > occupies. So just walking the bound list of objects here should be good
> > enough ...
> > 
> 
> Maybe I've misunderstood you. My code is wrong, but I think you're idea
> requires a prep patch because it changes functionality, right?
> 
> So let me know if I've understood you.

Don't we have both the bound and unbound list? So we could just switch
over to counting the bound objects here ... Otherwise yes, we need a prep
patch to create the bound list first.

> 
> > >  
> > >  	if (unlock)
> > >  		mutex_unlock(&dev->struct_mutex);
> > >  	return cnt;
> > >  }
> > > +
> > > +/* All the new VM stuff */
> > > +unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
> > > +				  struct i915_address_space *vm)
> > > +{
> > > +	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
> > > +	struct i915_vma *vma;
> > > +
> > > +	if (vm == &dev_priv->mm.aliasing_ppgtt->base)
> > > +		vm = &dev_priv->gtt.base;
> > > +
> > > +	BUG_ON(list_empty(&o->vma_list));
> > > +	list_for_each_entry(vma, &o->vma_list, vma_link) {
> > 
> > Imo the vma list walking here and in the other helpers below indicates
> > that we should deal more often in vmas instead of (object, vm) pairs. Or
> > is this again something that'll get fixed later on?
> > 
> > I just want to avoid diff churn, and it also makes reviewing easier if the
> > foreshadowing is correct ;-) So generally I'd vote for more liberal
> > sprinkling of obj_to_vma in callers.
> 
> It's not something I fixed in the whole series. I think it makes sense
> conceptually, to keep some things as <obj,vm> and others as direct vma.
> 
> If you want me to change something, you need to be more specific since
> no action specifically comes to mind at this point in the series.

It's just that the (obj, vm) -> vma lookup is a list-walk, so imo we
should try to avoid it whenever possible. Since the vma has both and obj
and a vm pointer the vma is imo strictly better than the (obj, vm) pair.
And the look-up should be pushed down the callchain as much as possible.

So I think generally we want to pass the vma around to functions
everywhere, and the (obj, vm) pair would be the exception (which needs
special justification).

> 
> > 
> > > +		if (vma->vm == vm)
> > > +			return vma->node.start;
> > > +
> > > +	}
> > > +	return -1;
> > > +}
> > > +
> > > +bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
> > > +{
> > > +	return !list_empty(&o->vma_list);
> > > +}
> > > +
> > > +bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
> > > +			struct i915_address_space *vm)
> > > +{
> > > +	struct i915_vma *vma;
> > > +
> > > +	list_for_each_entry(vma, &o->vma_list, vma_link) {
> > > +		if (vma->vm == vm)
> > > +			return true;
> > > +	}
> > > +	return false;
> > > +}
> > > +
> > > +unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
> > > +				struct i915_address_space *vm)
> > > +{
> > > +	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
> > > +	struct i915_vma *vma;
> > > +
> > > +	if (vm == &dev_priv->mm.aliasing_ppgtt->base)
> > > +		vm = &dev_priv->gtt.base;
> > > +	BUG_ON(list_empty(&o->vma_list));
> > > +	list_for_each_entry(vma, &o->vma_list, vma_link) {
> > > +		if (vma->vm == vm)
> > > +			return vma->node.size;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +void i915_gem_obj_set_color(struct drm_i915_gem_object *o,
> > > +			    struct i915_address_space *vm,
> > > +			    enum i915_cache_level color)
> > > +{
> > > +	struct i915_vma *vma;
> > > +	BUG_ON(list_empty(&o->vma_list));
> > > +	list_for_each_entry(vma, &o->vma_list, vma_link) {
> > > +		if (vma->vm == vm) {
> > > +			vma->node.color = color;
> > > +			return;
> > > +		}
> > > +	}
> > > +
> > > +	WARN(1, "Couldn't set color for VM %p\n", vm);
> > > +}
> > > +
> > > +struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> > > +				     struct i915_address_space *vm)
> > > +{
> > > +	struct i915_vma *vma;
> > > +	list_for_each_entry(vma, &obj->vma_list, vma_link)
> > > +		if (vma->vm == vm)
> > > +			return vma;
> > > +
> > > +	return NULL;
> > > +}
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > > index 2074544..c92fd81 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > > @@ -155,6 +155,7 @@ create_hw_context(struct drm_device *dev,
> > >  
> > >  	if (INTEL_INFO(dev)->gen >= 7) {
> > >  		ret = i915_gem_object_set_cache_level(ctx->obj,
> > > +						      &dev_priv->gtt.base,
> > >  						      I915_CACHE_LLC_MLC);
> > >  		/* Failure shouldn't ever happen this early */
> > >  		if (WARN_ON(ret))
> > > @@ -214,7 +215,7 @@ static int create_default_context(struct drm_i915_private *dev_priv)
> > >  	 * default context.
> > >  	 */
> > >  	dev_priv->ring[RCS].default_context = ctx;
> > > -	ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false, false);
> > > +	ret = i915_gem_ggtt_pin(ctx->obj, CONTEXT_ALIGN, false, false);
> > >  	if (ret) {
> > >  		DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
> > >  		goto err_destroy;
> > > @@ -398,6 +399,7 @@ mi_set_context(struct intel_ring_buffer *ring,
> > >  static int do_switch(struct i915_hw_context *to)
> > >  {
> > >  	struct intel_ring_buffer *ring = to->ring;
> > > +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> > >  	struct i915_hw_context *from = ring->last_context;
> > >  	u32 hw_flags = 0;
> > >  	int ret;
> > > @@ -407,7 +409,7 @@ static int do_switch(struct i915_hw_context *to)
> > >  	if (from == to)
> > >  		return 0;
> > >  
> > > -	ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false, false);
> > > +	ret = i915_gem_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false);
> > >  	if (ret)
> > >  		return ret;
> > >  
> > > @@ -444,7 +446,8 @@ static int do_switch(struct i915_hw_context *to)
> > >  	 */
> > >  	if (from != NULL) {
> > >  		from->obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> > > -		i915_gem_object_move_to_active(from->obj, ring);
> > > +		i915_gem_object_move_to_active(from->obj, &dev_priv->gtt.base,
> > > +					       ring);
> > >  		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
> > >  		 * whole damn pipeline, we don't need to explicitly mark the
> > >  		 * object dirty. The only exception is that the context must be
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> > > index df61f33..32efdc0 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> > > @@ -32,24 +32,21 @@
> > >  #include "i915_trace.h"
> > >  
> > >  static bool
> > > -mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
> > > +mark_free(struct i915_vma *vma, struct list_head *unwind)
> > >  {
> > > -	struct i915_vma *vma = __i915_gem_obj_to_vma(obj);
> > > -
> > > -	if (obj->pin_count)
> > > +	if (vma->obj->pin_count)
> > >  		return false;
> > >  
> > > -	list_add(&obj->exec_list, unwind);
> > > +	list_add(&vma->obj->exec_list, unwind);
> > >  	return drm_mm_scan_add_block(&vma->node);
> > >  }
> > >  
> > >  int
> > > -i915_gem_evict_something(struct drm_device *dev, int min_size,
> > > -			 unsigned alignment, unsigned cache_level,
> > > +i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
> > > +			 int min_size, unsigned alignment, unsigned cache_level,
> > >  			 bool mappable, bool nonblocking)
> > >  {
> > >  	drm_i915_private_t *dev_priv = dev->dev_private;
> > > -	struct i915_address_space *vm = &dev_priv->gtt.base;
> > >  	struct list_head eviction_list, unwind_list;
> > >  	struct i915_vma *vma;
> > >  	struct drm_i915_gem_object *obj;
> > > @@ -81,16 +78,18 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
> > >  	 */
> > >  
> > >  	INIT_LIST_HEAD(&unwind_list);
> > > -	if (mappable)
> > > +	if (mappable) {
> > > +		BUG_ON(!i915_is_ggtt(vm));
> > >  		drm_mm_init_scan_with_range(&vm->mm, min_size,
> > >  					    alignment, cache_level, 0,
> > >  					    dev_priv->gtt.mappable_end);
> > > -	else
> > > +	} else
> > >  		drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
> > >  
> > >  	/* First see if there is a large enough contiguous idle region... */
> > >  	list_for_each_entry(obj, &vm->inactive_list, mm_list) {
> > > -		if (mark_free(obj, &unwind_list))
> > > +		struct i915_vma *vma = i915_gem_obj_to_vma(obj, vm);
> > > +		if (mark_free(vma, &unwind_list))
> > >  			goto found;
> > >  	}
> > >  
> > > @@ -99,7 +98,8 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
> > >  
> > >  	/* Now merge in the soon-to-be-expired objects... */
> > >  	list_for_each_entry(obj, &vm->active_list, mm_list) {
> > > -		if (mark_free(obj, &unwind_list))
> > > +		struct i915_vma *vma = i915_gem_obj_to_vma(obj, vm);
> > > +		if (mark_free(vma, &unwind_list))
> > >  			goto found;
> > >  	}
> > >  
> > > @@ -109,7 +109,7 @@ none:
> > >  		obj = list_first_entry(&unwind_list,
> > >  				       struct drm_i915_gem_object,
> > >  				       exec_list);
> > > -		vma = __i915_gem_obj_to_vma(obj);
> > > +		vma = i915_gem_obj_to_vma(obj, vm);
> > >  		ret = drm_mm_scan_remove_block(&vma->node);
> > >  		BUG_ON(ret);
> > >  
> > > @@ -130,7 +130,7 @@ found:
> > >  		obj = list_first_entry(&unwind_list,
> > >  				       struct drm_i915_gem_object,
> > >  				       exec_list);
> > > -		vma = __i915_gem_obj_to_vma(obj);
> > > +		vma = i915_gem_obj_to_vma(obj, vm);
> > >  		if (drm_mm_scan_remove_block(&vma->node)) {
> > >  			list_move(&obj->exec_list, &eviction_list);
> > >  			drm_gem_object_reference(&obj->base);
> > > @@ -145,7 +145,7 @@ found:
> > >  				       struct drm_i915_gem_object,
> > >  				       exec_list);
> > >  		if (ret == 0)
> > > -			ret = i915_gem_object_unbind(obj);
> > > +			ret = i915_gem_object_unbind(obj, vm);
> > >  
> > >  		list_del_init(&obj->exec_list);
> > >  		drm_gem_object_unreference(&obj->base);
> > > @@ -158,13 +158,18 @@ int
> > >  i915_gem_evict_everything(struct drm_device *dev)
> > 
> > I suspect evict_everything eventually wants a address_space *vm argument
> > for those cases where we only want to evict everything in a given vm. Atm
> > we have two use-cases of this:
> > - Called from the shrinker as a last-ditch effort. For that it should move
> >   _every_ object onto the unbound list.
> > - Called from execbuf for badly-fragmented address spaces to clean up the
> >   mess. For that case we only care about one address space.
> 
> The current thing is more or less a result of Chris' suggestions. A
> non-posted iteration did plumb the vm, and after reworking to the
> suggestion made by Chris, the vm didn't make much sense anymore.
> 
> For point #1, it requires VM prioritization I think. I don't really see
> any other way to fairly manage it.

The shrinker will rip out  objects in lru order by walking first unbound
and then bound objects. That's imo as fair as it gets, we don't need
priorities between vms.

> For point #2, that I agree it might be useful, but we can easily create
> a new function, and not call it "shrinker" to do it. 

Well my point was that this function is called
i915_gem_evict_everything(dev, vm) and for the first use case we simply
pass in vm = NULL. But essentially thrashing the vm should be rare enough
that for now we don't need to care.


> 
> 
> > 
> > >  {
> > >  	drm_i915_private_t *dev_priv = dev->dev_private;
> > > -	struct i915_address_space *vm = &dev_priv->gtt.base;
> > > +	struct i915_address_space *vm;
> > >  	struct drm_i915_gem_object *obj, *next;
> > > -	bool lists_empty;
> > > +	bool lists_empty = true;
> > >  	int ret;
> > >  
> > > -	lists_empty = (list_empty(&vm->inactive_list) &&
> > > -		       list_empty(&vm->active_list));
> > > +	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
> > > +		lists_empty = (list_empty(&vm->inactive_list) &&
> > > +			       list_empty(&vm->active_list));
> > > +		if (!lists_empty)
> > > +			lists_empty = false;
> > > +	}
> > > +
> > >  	if (lists_empty)
> > >  		return -ENOSPC;
> > >  
> > > @@ -181,9 +186,11 @@ i915_gem_evict_everything(struct drm_device *dev)
> > >  	i915_gem_retire_requests(dev);
> > >  
> > >  	/* Having flushed everything, unbind() should never raise an error */
> > > -	list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list)
> > > -		if (obj->pin_count == 0)
> > > -			WARN_ON(i915_gem_object_unbind(obj));
> > > +	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
> > > +		list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list)
> > > +			if (obj->pin_count == 0)
> > > +				WARN_ON(i915_gem_object_unbind(obj, vm));
> > > +	}
> > >  
> > >  	return 0;
> > >  }
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > index 5aeb447..e90182d 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > @@ -150,7 +150,7 @@ eb_get_object(struct eb_objects *eb, unsigned long handle)
> > >  }
> > >  
> > >  static void
> > > -eb_destroy(struct eb_objects *eb)
> > > +eb_destroy(struct eb_objects *eb, struct i915_address_space *vm)
> > >  {
> > >  	while (!list_empty(&eb->objects)) {
> > >  		struct drm_i915_gem_object *obj;
> > > @@ -174,7 +174,8 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
> > >  static int
> > >  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
> > >  				   struct eb_objects *eb,
> > > -				   struct drm_i915_gem_relocation_entry *reloc)
> > > +				   struct drm_i915_gem_relocation_entry *reloc,
> > > +				   struct i915_address_space *vm)
> > >  {
> > >  	struct drm_device *dev = obj->base.dev;
> > >  	struct drm_gem_object *target_obj;
> > > @@ -297,7 +298,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
> > >  
> > >  static int
> > >  i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
> > > -				    struct eb_objects *eb)
> > > +				    struct eb_objects *eb,
> > > +				    struct i915_address_space *vm)
> > >  {
> > >  #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
> > >  	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
> > > @@ -321,7 +323,8 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
> > >  		do {
> > >  			u64 offset = r->presumed_offset;
> > >  
> > > -			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
> > > +			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r,
> > > +								 vm);
> > >  			if (ret)
> > >  				return ret;
> > >  
> > > @@ -344,13 +347,15 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
> > >  static int
> > >  i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
> > >  					 struct eb_objects *eb,
> > > -					 struct drm_i915_gem_relocation_entry *relocs)
> > > +					 struct drm_i915_gem_relocation_entry *relocs,
> > > +					 struct i915_address_space *vm)
> > >  {
> > >  	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
> > >  	int i, ret;
> > >  
> > >  	for (i = 0; i < entry->relocation_count; i++) {
> > > -		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
> > > +		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i],
> > > +							 vm);
> > >  		if (ret)
> > >  			return ret;
> > >  	}
> > > @@ -359,7 +364,8 @@ i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
> > >  }
> > >  
> > >  static int
> > > -i915_gem_execbuffer_relocate(struct eb_objects *eb)
> > > +i915_gem_execbuffer_relocate(struct eb_objects *eb,
> > > +			     struct i915_address_space *vm)
> > >  {
> > >  	struct drm_i915_gem_object *obj;
> > >  	int ret = 0;
> > > @@ -373,7 +379,7 @@ i915_gem_execbuffer_relocate(struct eb_objects *eb)
> > >  	 */
> > >  	pagefault_disable();
> > >  	list_for_each_entry(obj, &eb->objects, exec_list) {
> > > -		ret = i915_gem_execbuffer_relocate_object(obj, eb);
> > > +		ret = i915_gem_execbuffer_relocate_object(obj, eb, vm);
> > >  		if (ret)
> > >  			break;
> > >  	}
> > > @@ -395,6 +401,7 @@ need_reloc_mappable(struct drm_i915_gem_object *obj)
> > >  static int
> > >  i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
> > >  				   struct intel_ring_buffer *ring,
> > > +				   struct i915_address_space *vm,
> > >  				   bool *need_reloc)
> > >  {
> > >  	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> > > @@ -409,7 +416,8 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
> > >  		obj->tiling_mode != I915_TILING_NONE;
> > >  	need_mappable = need_fence || need_reloc_mappable(obj);
> > >  
> > > -	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
> > > +	ret = i915_gem_object_pin(obj, vm, entry->alignment, need_mappable,
> > > +				  false);
> > >  	if (ret)
> > >  		return ret;
> > >  
> > > @@ -436,8 +444,8 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
> > >  		obj->has_aliasing_ppgtt_mapping = 1;
> > >  	}
> > >  
> > > -	if (entry->offset != i915_gem_obj_ggtt_offset(obj)) {
> > > -		entry->offset = i915_gem_obj_ggtt_offset(obj);
> > > +	if (entry->offset != i915_gem_obj_offset(obj, vm)) {
> > > +		entry->offset = i915_gem_obj_offset(obj, vm);
> > >  		*need_reloc = true;
> > >  	}
> > >  
> > > @@ -458,7 +466,7 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
> > >  {
> > >  	struct drm_i915_gem_exec_object2 *entry;
> > >  
> > > -	if (!i915_gem_obj_ggtt_bound(obj))
> > > +	if (!i915_gem_obj_bound_any(obj))
> > >  		return;
> > >  
> > >  	entry = obj->exec_entry;
> > > @@ -475,6 +483,7 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
> > >  static int
> > >  i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
> > >  			    struct list_head *objects,
> > > +			    struct i915_address_space *vm,
> > >  			    bool *need_relocs)
> > >  {
> > >  	struct drm_i915_gem_object *obj;
> > > @@ -529,32 +538,37 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
> > >  		list_for_each_entry(obj, objects, exec_list) {
> > >  			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
> > >  			bool need_fence, need_mappable;
> > > +			u32 obj_offset;
> > >  
> > > -			if (!i915_gem_obj_ggtt_bound(obj))
> > > +			if (!i915_gem_obj_bound(obj, vm))
> > >  				continue;
> > 
> > I wonder a bit how we could avoid the multipler (obj, vm) -> vma lookups
> > here ... Maybe we should cache them in some pointer somewhere (either in
> > the eb object or by adding a new pointer to the object struct, e.g.
> > obj->eb_vma, similar to obj->eb_list).
> > 
> 
> I agree, and even did this at one unposted patch too. However, I think
> it's a premature optimization which risks code correctness. So I think
> somewhere a FIXME needs to happen to address that issue. (Or if Chris
> complains bitterly about some perf hit).

If you bring up code correctness I'd vote strongly in favour of using vmas
everywhere - vma has the (obj, vm) pair locked down, doing the lookup all
the thing risks us mixing them up eventually and creating a hella lot of
confusion ;-)

> 
> > >  
> > > +			obj_offset = i915_gem_obj_offset(obj, vm);
> > >  			need_fence =
> > >  				has_fenced_gpu_access &&
> > >  				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> > >  				obj->tiling_mode != I915_TILING_NONE;
> > >  			need_mappable = need_fence || need_reloc_mappable(obj);
> > >  
> > > +			BUG_ON((need_mappable || need_fence) &&
> > > +			       !i915_is_ggtt(vm));
> > > +
> > >  			if ((entry->alignment &&
> > > -			     i915_gem_obj_ggtt_offset(obj) & (entry->alignment - 1)) ||
> > > +			     obj_offset & (entry->alignment - 1)) ||
> > >  			    (need_mappable && !obj->map_and_fenceable))
> > > -				ret = i915_gem_object_unbind(obj);
> > > +				ret = i915_gem_object_unbind(obj, vm);
> > >  			else
> > > -				ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
> > > +				ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs);
> > >  			if (ret)
> > >  				goto err;
> > >  		}
> > >  
> > >  		/* Bind fresh objects */
> > >  		list_for_each_entry(obj, objects, exec_list) {
> > > -			if (i915_gem_obj_ggtt_bound(obj))
> > > +			if (i915_gem_obj_bound(obj, vm))
> > >  				continue;
> > >  
> > > -			ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
> > > +			ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs);
> > >  			if (ret)
> > >  				goto err;
> > >  		}
> > > @@ -578,7 +592,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
> > >  				  struct drm_file *file,
> > >  				  struct intel_ring_buffer *ring,
> > >  				  struct eb_objects *eb,
> > > -				  struct drm_i915_gem_exec_object2 *exec)
> > > +				  struct drm_i915_gem_exec_object2 *exec,
> > > +				  struct i915_address_space *vm)
> > >  {
> > >  	struct drm_i915_gem_relocation_entry *reloc;
> > >  	struct drm_i915_gem_object *obj;
> > > @@ -662,14 +677,15 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
> > >  		goto err;
> > >  
> > >  	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
> > > -	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs);
> > > +	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs);
> > >  	if (ret)
> > >  		goto err;
> > >  
> > >  	list_for_each_entry(obj, &eb->objects, exec_list) {
> > >  		int offset = obj->exec_entry - exec;
> > >  		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
> > > -							       reloc + reloc_offset[offset]);
> > > +							       reloc + reloc_offset[offset],
> > > +							       vm);
> > >  		if (ret)
> > >  			goto err;
> > >  	}
> > > @@ -768,6 +784,7 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
> > >  
> > >  static void
> > >  i915_gem_execbuffer_move_to_active(struct list_head *objects,
> > > +				   struct i915_address_space *vm,
> > >  				   struct intel_ring_buffer *ring)
> > >  {
> > >  	struct drm_i915_gem_object *obj;
> > > @@ -782,7 +799,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
> > >  		obj->base.read_domains = obj->base.pending_read_domains;
> > >  		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
> > >  
> > > -		i915_gem_object_move_to_active(obj, ring);
> > > +		i915_gem_object_move_to_active(obj, vm, ring);
> > >  		if (obj->base.write_domain) {
> > >  			obj->dirty = 1;
> > >  			obj->last_write_seqno = intel_ring_get_seqno(ring);
> > > @@ -836,7 +853,8 @@ static int
> > >  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> > >  		       struct drm_file *file,
> > >  		       struct drm_i915_gem_execbuffer2 *args,
> > > -		       struct drm_i915_gem_exec_object2 *exec)
> > > +		       struct drm_i915_gem_exec_object2 *exec,
> > > +		       struct i915_address_space *vm)
> > >  {
> > >  	drm_i915_private_t *dev_priv = dev->dev_private;
> > >  	struct eb_objects *eb;
> > > @@ -998,17 +1016,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> > >  
> > >  	/* Move the objects en-masse into the GTT, evicting if necessary. */
> > >  	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
> > > -	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs);
> > > +	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs);
> > >  	if (ret)
> > >  		goto err;
> > >  
> > >  	/* The objects are in their final locations, apply the relocations. */
> > >  	if (need_relocs)
> > > -		ret = i915_gem_execbuffer_relocate(eb);
> > > +		ret = i915_gem_execbuffer_relocate(eb, vm);
> > >  	if (ret) {
> > >  		if (ret == -EFAULT) {
> > >  			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
> > > -								eb, exec);
> > > +								eb, exec, vm);
> > >  			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> > >  		}
> > >  		if (ret)
> > > @@ -1059,7 +1077,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> > >  			goto err;
> > >  	}
> > >  
> > > -	exec_start = i915_gem_obj_ggtt_offset(batch_obj) + args->batch_start_offset;
> > > +	exec_start = i915_gem_obj_offset(batch_obj, vm) +
> > > +		args->batch_start_offset;
> > >  	exec_len = args->batch_len;
> > >  	if (cliprects) {
> > >  		for (i = 0; i < args->num_cliprects; i++) {
> > > @@ -1084,11 +1103,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> > >  
> > >  	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
> > >  
> > > -	i915_gem_execbuffer_move_to_active(&eb->objects, ring);
> > > +	i915_gem_execbuffer_move_to_active(&eb->objects, vm, ring);
> > >  	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
> > >  
> > >  err:
> > > -	eb_destroy(eb);
> > > +	eb_destroy(eb, vm);
> > >  
> > >  	mutex_unlock(&dev->struct_mutex);
> > >  
> > > @@ -1105,6 +1124,7 @@ int
> > >  i915_gem_execbuffer(struct drm_device *dev, void *data,
> > >  		    struct drm_file *file)
> > >  {
> > > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > >  	struct drm_i915_gem_execbuffer *args = data;
> > >  	struct drm_i915_gem_execbuffer2 exec2;
> > >  	struct drm_i915_gem_exec_object *exec_list = NULL;
> > > @@ -1160,7 +1180,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
> > >  	exec2.flags = I915_EXEC_RENDER;
> > >  	i915_execbuffer2_set_context_id(exec2, 0);
> > >  
> > > -	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
> > > +	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list,
> > > +				     &dev_priv->gtt.base);
> > >  	if (!ret) {
> > >  		/* Copy the new buffer offsets back to the user's exec list. */
> > >  		for (i = 0; i < args->buffer_count; i++)
> > > @@ -1186,6 +1207,7 @@ int
> > >  i915_gem_execbuffer2(struct drm_device *dev, void *data,
> > >  		     struct drm_file *file)
> > >  {
> > > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > >  	struct drm_i915_gem_execbuffer2 *args = data;
> > >  	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
> > >  	int ret;
> > > @@ -1216,7 +1238,8 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
> > >  		return -EFAULT;
> > >  	}
> > >  
> > > -	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
> > > +	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list,
> > > +				     &dev_priv->gtt.base);
> > >  	if (!ret) {
> > >  		/* Copy the new buffer offsets back to the user's exec list. */
> > >  		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > index 298fc42..70ce2f6 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > @@ -367,6 +367,8 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
> > >  			    ppgtt->base.total);
> > >  	}
> > >  
> > > +	/* i915_init_vm(dev_priv, &ppgtt->base) */
> > > +
> > >  	return ret;
> > >  }
> > >  
> > > @@ -386,17 +388,22 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> > >  			    struct drm_i915_gem_object *obj,
> > >  			    enum i915_cache_level cache_level)
> > >  {
> > > -	ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
> > > -				   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
> > > -				   cache_level);
> > > +	struct i915_address_space *vm = &ppgtt->base;
> > > +	unsigned long obj_offset = i915_gem_obj_offset(obj, vm);
> > > +
> > > +	vm->insert_entries(vm, obj->pages,
> > > +			   obj_offset >> PAGE_SHIFT,
> > > +			   cache_level);
> > >  }
> > >  
> > >  void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> > >  			      struct drm_i915_gem_object *obj)
> > >  {
> > > -	ppgtt->base.clear_range(&ppgtt->base,
> > > -				i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
> > > -				obj->base.size >> PAGE_SHIFT);
> > > +	struct i915_address_space *vm = &ppgtt->base;
> > > +	unsigned long obj_offset = i915_gem_obj_offset(obj, vm);
> > > +
> > > +	vm->clear_range(vm, obj_offset >> PAGE_SHIFT,
> > > +			obj->base.size >> PAGE_SHIFT);
> > >  }
> > >  
> > >  extern int intel_iommu_gfx_mapped;
> > > @@ -447,6 +454,9 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
> > >  				       dev_priv->gtt.base.start / PAGE_SIZE,
> > >  				       dev_priv->gtt.base.total / PAGE_SIZE);
> > >  
> > > +	if (dev_priv->mm.aliasing_ppgtt)
> > > +		gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
> > > +
> > >  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> > >  		i915_gem_clflush_object(obj);
> > >  		i915_gem_gtt_bind_object(obj, obj->cache_level);
> > > @@ -625,7 +635,8 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
> > >  	 * aperture.  One page should be enough to keep any prefetching inside
> > >  	 * of the aperture.
> > >  	 */
> > > -	drm_i915_private_t *dev_priv = dev->dev_private;
> > > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > > +	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
> > >  	struct drm_mm_node *entry;
> > >  	struct drm_i915_gem_object *obj;
> > >  	unsigned long hole_start, hole_end;
> > > @@ -633,19 +644,19 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
> > >  	BUG_ON(mappable_end > end);
> > >  
> > >  	/* Subtract the guard page ... */
> > > -	drm_mm_init(&dev_priv->gtt.base.mm, start, end - start - PAGE_SIZE);
> > > +	drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
> > >  	if (!HAS_LLC(dev))
> > >  		dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
> > >  
> > >  	/* Mark any preallocated objects as occupied */
> > >  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> > > -		struct i915_vma *vma = __i915_gem_obj_to_vma(obj);
> > > +		struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
> > >  		int ret;
> > >  		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
> > >  			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
> > >  
> > >  		WARN_ON(i915_gem_obj_ggtt_bound(obj));
> > > -		ret = drm_mm_reserve_node(&dev_priv->gtt.base.mm, &vma->node);
> > > +		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
> > >  		if (ret)
> > >  			DRM_DEBUG_KMS("Reservation failed\n");
> > >  		obj->has_global_gtt_mapping = 1;
> > > @@ -656,19 +667,15 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
> > >  	dev_priv->gtt.base.total = end - start;
> > >  
> > >  	/* Clear any non-preallocated blocks */
> > > -	drm_mm_for_each_hole(entry, &dev_priv->gtt.base.mm,
> > > -			     hole_start, hole_end) {
> > > +	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
> > >  		const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
> > >  		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> > >  			      hole_start, hole_end);
> > > -		dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> > > -					       hole_start / PAGE_SIZE,
> > > -					       count);
> > > +		ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count);
> > >  	}
> > >  
> > >  	/* And finally clear the reserved guard page */
> > > -	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> > > -				       end / PAGE_SIZE - 1, 1);
> > > +	ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1);
> > >  }
> > >  
> > >  static bool
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > > index 245eb1d..bfe61fa 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > > @@ -391,7 +391,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
> > >  	if (gtt_offset == I915_GTT_OFFSET_NONE)
> > >  		return obj;
> > >  
> > > -	vma = i915_gem_vma_create(obj, &dev_priv->gtt.base);
> > > +	vma = i915_gem_vma_create(obj, vm);
> > >  	if (!vma) {
> > >  		drm_gem_object_unreference(&obj->base);
> > >  		return NULL;
> > > @@ -404,8 +404,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
> > >  	 */
> > >  	vma->node.start = gtt_offset;
> > >  	vma->node.size = size;
> > > -	if (drm_mm_initialized(&dev_priv->gtt.base.mm)) {
> > > -		ret = drm_mm_reserve_node(&dev_priv->gtt.base.mm, &vma->node);
> > > +	if (drm_mm_initialized(&vm->mm)) {
> > > +		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> > 
> > These two hunks here for stolen look fishy - we only ever use the stolen
> > preallocated stuff for objects with mappings in the global gtt. So keeping
> > that explicit is imo the better approach. And tbh I'm confused where the
> > local variable vm is from ...
> 
> If we don't create a vma for it, we potentially have to special case a
> bunch of places, I think. I'm not actually sure of this, but the
> overhead to do it is quite small.
> 
> Anyway, I'll look this over again nd see what I think.

I'm not against the vma, I've just wonedered why you do the
/dev_priv->gtt.base/vm/ replacement here since
- it's never gonna be used with another vm than ggtt
- this patch doesn't add the vm variable, so I'm even more confused where
  this started ;-)

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch



More information about the Intel-gfx mailing list