[Intel-gfx] [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer

Wed Jan 1 22:51:57 CET 2014

On Wed, Jan 01, 2014 at 02:00:54PM +0000, Chris Wilson wrote:
> One side-effect of the introduction of ppgtt was that we needed to
> rebind the object into the appropriate vm (and global gtt in some
> peculiar cases). For simplicity this was done twice for every object on
> every call to execbuffer. However, that adds a tremendous amount of CPU
> overhead (rewriting all the PTE for all objects into WC memory) per
> draw. The fix is to push all the decision about which vm to bind into
> and when down into the low-level bind routines through hints rather than
> inside the execbuffer routine.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72906
> Tested-by: jianx.zhou at intel.com
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky at intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>

My original plan with full PPGTT was to avoid all this deferred bind,
and other complications in the bind logic. My intention had been, if a
VMA exists, it is bound (with global GTT as the special case). It
would have therefore been relatively easy to determine when to bind an
object into a VM. One thing which I didn't plan for was having to change
the caching type. To a large extent, I feel this patch heads towards
that goal, except uses drm_mm_node_allocated where I had expected to
simply check if a VMA exists.

Anyway, no point to this really other than to explain why things are
somewhat messy.

> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  25 +++---
>  drivers/gpu/drm/i915/i915_gem.c            | 119 +++++++++++------------------
>  drivers/gpu/drm/i915/i915_gem_context.c    |   9 +--
>  drivers/gpu/drm/i915/i915_gem_evict.c      |  12 +--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  18 +++--
>  drivers/gpu/drm/i915/i915_gem_gtt.c        |   2 +-
>  drivers/gpu/drm/i915/i915_trace.h          |  20 ++---
>  drivers/gpu/drm/i915/intel_overlay.c       |   2 +-
>  drivers/gpu/drm/i915/intel_pm.c            |   2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  19 +++--
>  10 files changed, 103 insertions(+), 125 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cc8afff878b1..65379474f312 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2033,11 +2033,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     struct i915_address_space *vm,
>  				     uint32_t alignment,
> -				     bool map_and_fenceable,
> -				     bool nonblocking);
> -void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
> +				     unsigned flags);
> +#define PIN_MAPPABLE 0x1
> +#define PIN_GLOBAL 0x2
> +#define PIN_NONBLOCK 0x4
>  int __must_check i915_vma_unbind(struct i915_vma *vma);
> -int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj);
>  int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
>  void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
>  void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
> @@ -2237,13 +2237,19 @@ i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj)
>  static inline int __must_check
>  i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
>  		      uint32_t alignment,
> -		      bool map_and_fenceable,
> -		      bool nonblocking)
> +		      unsigned flags)
>  {
> -	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
> -				   map_and_fenceable, nonblocking);
> +	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags | PIN_GLOBAL);
>  }

I'm torn as to whether or not it's okay to not pass PIN_GLOBAL in flags.

>  
> +static inline int
> +i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
> +{
> +	return i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
> +}
> +
> +void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
> +
>  /* i915_gem_context.c */
>  #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
>  int __must_check i915_gem_context_init(struct drm_device *dev);
> @@ -2280,8 +2286,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
>  					  int min_size,
>  					  unsigned alignment,
>  					  unsigned cache_level,
> -					  bool mappable,
> -					  bool nonblock);
> +					  unsigned flags);
>  int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>  int i915_gem_evict_everything(struct drm_device *dev);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 656406deada0..d9acfa78d1a5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -43,12 +43,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
>  static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			       bool readonly);
> -static __must_check int
> -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> -			   struct i915_address_space *vm,
> -			   unsigned alignment,
> -			   bool map_and_fenceable,
> -			   bool nonblocking);
>  static int i915_gem_phys_pwrite(struct drm_device *dev,
>  				struct drm_i915_gem_object *obj,
>  				struct drm_i915_gem_pwrite *args,
> @@ -605,7 +599,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  	char __user *user_data;
>  	int page_offset, page_length, ret;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, 0, true, true);
> +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
>  	if (ret)
>  		goto out;
>  
> @@ -1399,7 +1393,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Now bind it into the GTT if needed */
> -	ret = i915_gem_obj_ggtt_pin(obj,  0, true, false);
> +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
>  	if (ret)
>  		goto unlock;
>  
> @@ -2767,7 +2761,6 @@ int i915_vma_unbind(struct i915_vma *vma)
>  
>  	if (!drm_mm_node_allocated(&vma->node)) {
>  		i915_gem_vma_destroy(vma);
> -
>  		return 0;
>  	}
>  
> @@ -2819,26 +2812,6 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	return 0;
>  }
>  
> -/**
> - * Unbinds an object from the global GTT aperture.
> - */
> -int
> -i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
> -{
> -	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> -	struct i915_address_space *ggtt = &dev_priv->gtt.base;
> -
> -	if (!i915_gem_obj_ggtt_bound(obj))
> -		return 0;
> -
> -	if (i915_gem_obj_to_ggtt(obj)->pin_count)
> -		return -EBUSY;
> -
> -	BUG_ON(obj->pages == NULL);
> -
> -	return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt));
> -}
> -

My gut tells me this hunk should be in a different patch.

>  int i915_gpu_idle(struct drm_device *dev)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> @@ -3256,18 +3229,17 @@ static void i915_gem_verify_gtt(struct drm_device *dev)
>  /**
>   * Finds free space in the GTT aperture and binds the object there.
>   */
> -static int
> +static struct i915_vma *
>  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  			   struct i915_address_space *vm,
>  			   unsigned alignment,
> -			   bool map_and_fenceable,
> -			   bool nonblocking)
> +			   unsigned flags)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	u32 size, fence_size, fence_alignment, unfenced_alignment;
>  	size_t gtt_max =
> -		map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
> +		flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
>  	struct i915_vma *vma;
>  	int ret;
>  
> @@ -3279,18 +3251,18 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  						     obj->tiling_mode, true);
>  	unfenced_alignment =
>  		i915_gem_get_gtt_alignment(dev,
> -						    obj->base.size,
> -						    obj->tiling_mode, false);
> +					   obj->base.size,
> +					   obj->tiling_mode, false);
>  
>  	if (alignment == 0)
> -		alignment = map_and_fenceable ? fence_alignment :
> +		alignment = flags & PIN_MAPPABLE ? fence_alignment :
>  						unfenced_alignment;
> -	if (map_and_fenceable && alignment & (fence_alignment - 1)) {
> +	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
>  		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
> -		return -EINVAL;
> +		return ERR_PTR(-EINVAL);
>  	}
>  
> -	size = map_and_fenceable ? fence_size : obj->base.size;
> +	size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
>  
>  	/* If the object is bigger than the entire aperture, reject it early
>  	 * before evicting everything in a vain attempt to find space.
> @@ -3298,22 +3270,20 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  	if (obj->base.size > gtt_max) {
>  		DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
>  			  obj->base.size,
> -			  map_and_fenceable ? "mappable" : "total",
> +			  flags & PIN_MAPPABLE ? "mappable" : "total",
>  			  gtt_max);
> -		return -E2BIG;
> +		return ERR_PTR(-E2BIG);
>  	}
>  
>  	ret = i915_gem_object_get_pages(obj);
>  	if (ret)
> -		return ret;
> +		return ERR_PTR(ret);
>  
>  	i915_gem_object_pin_pages(obj);
>  
>  	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
> -	if (IS_ERR(vma)) {
> -		ret = PTR_ERR(vma);
> +	if (IS_ERR(vma))
>  		goto err_unpin;
> -	}
>  
>  search_free:
>  	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> @@ -3322,9 +3292,7 @@ search_free:
>  						  DRM_MM_SEARCH_DEFAULT);
>  	if (ret) {
>  		ret = i915_gem_evict_something(dev, vm, size, alignment,
> -					       obj->cache_level,
> -					       map_and_fenceable,
> -					       nonblocking);
> +					       obj->cache_level, flags);
>  		if (ret == 0)
>  			goto search_free;
>  
> @@ -3355,19 +3323,23 @@ search_free:
>  		obj->map_and_fenceable = mappable && fenceable;
>  	}
>  
> -	WARN_ON(map_and_fenceable && !obj->map_and_fenceable);
> +	WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
> +
> +	trace_i915_vma_bind(vma, flags);
> +	vma->bind_vma(vma, obj->cache_level,
> +		      flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
>  
> -	trace_i915_vma_bind(vma, map_and_fenceable);
>  	i915_gem_verify_gtt(dev);
> -	return 0;
> +	return vma;
>  
>  err_remove_node:
>  	drm_mm_remove_node(&vma->node);
>  err_free_vma:
>  	i915_gem_vma_destroy(vma);
> +	vma = ERR_PTR(ret);
>  err_unpin:
>  	i915_gem_object_unpin_pages(obj);
> -	return ret;
> +	return vma;
>  }
>  
>  bool
> @@ -3559,7 +3531,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  		}
>  
>  		list_for_each_entry(vma, &obj->vma_list, vma_link)
> -			vma->bind_vma(vma, cache_level, 0);
> +			if (drm_mm_node_allocated(&vma->node))
> +				vma->bind_vma(vma, cache_level,
> +					      obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);

will if (vma->node.color != cache_level) work here?

>  	}
>  
>  	list_for_each_entry(vma, &obj->vma_list, vma_link)
> @@ -3728,7 +3702,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
>  	 * always use map_and_fenceable for all scanout buffers.
>  	 */
> -	ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false);
> +	ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
>  	if (ret)
>  		goto err_unpin_display;
>  
> @@ -3884,52 +3858,49 @@ int
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
>  		    uint32_t alignment,
> -		    bool map_and_fenceable,
> -		    bool nonblocking)
> +		    unsigned flags)
>  {
> -	const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0;
>  	struct i915_vma *vma;
>  	int ret;
>  
> -	WARN_ON(map_and_fenceable && !i915_is_ggtt(vm));
> +	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
> +		return -EINVAL;
>  
>  	vma = i915_gem_obj_to_vma(obj, vm);
> -
>  	if (vma) {
>  		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
>  			return -EBUSY;
>  
>  		if ((alignment &&
>  		     vma->node.start & (alignment - 1)) ||
> -		    (map_and_fenceable && !obj->map_and_fenceable)) {
> +		    (flags & PIN_MAPPABLE && !obj->map_and_fenceable)) {
>  			WARN(vma->pin_count,
>  			     "bo is already pinned with incorrect alignment:"
>  			     " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
>  			     " obj->map_and_fenceable=%d\n",
>  			     i915_gem_obj_offset(obj, vm), alignment,
> -			     map_and_fenceable,
> +			     flags & PIN_MAPPABLE,
>  			     obj->map_and_fenceable);
>  			ret = i915_vma_unbind(vma);
>  			if (ret)
>  				return ret;
> +
> +			vma = NULL;
>  		}
>  	}
>  
> -	if (!i915_gem_obj_bound(obj, vm)) {
> -		ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
> -						 map_and_fenceable,
> -						 nonblocking);
> -		if (ret)
> -			return ret;
> -
> +	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> +		vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
> +		if (IS_ERR(vma))
> +			return PTR_ERR(vma);
>  	}
>  
> -	vma = i915_gem_obj_to_vma(obj, vm);
> -
> -	vma->bind_vma(vma, obj->cache_level, flags);
> +	if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
> +		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);

For some reason I remember seeing a regression when I tried this. It
should be safe to do this without most of the other changes - now I
wonder what I had seen.

>  
> -	i915_gem_obj_to_vma(obj, vm)->pin_count++;
> -	obj->pin_mappable |= map_and_fenceable;
> +	vma->pin_count++;
> +	if (flags & PIN_MAPPABLE)
> +		obj->pin_mappable |= true;

just '= true'

>  
>  	return 0;
>  }
> @@ -3987,7 +3958,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	if (obj->user_pin_count == 0) {
> -		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false);
> +		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
>  		if (ret)
>  			goto out;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index ebe0f67eac08..b428b101f9d0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -282,8 +282,7 @@ i915_gem_create_context(struct drm_device *dev,
>  			 * context.
>  			 */
>  			ret = i915_gem_obj_ggtt_pin(ctx->obj,
> -						    get_context_alignment(dev),
> -						    false, false);
> +						    get_context_alignment(dev), 0);
>  			if (ret) {
>  				DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
>  				goto err_destroy;
> @@ -336,8 +335,7 @@ void i915_gem_context_reset(struct drm_device *dev)
>  
>  		if (i == RCS) {
>  			WARN_ON(i915_gem_obj_ggtt_pin(dctx->obj,
> -						      get_context_alignment(dev),
> -						      false, false));
> +						      get_context_alignment(dev), 0));
>  			/* Fake a finish/inactive */
>  			dctx->obj->base.write_domain = 0;
>  			dctx->obj->active = 0;
> @@ -600,8 +598,7 @@ static int do_switch(struct intel_ring_buffer *ring,
>  	/* Trying to pin first makes error handling easier. */
>  	if (ring == &dev_priv->ring[RCS]) {
>  		ret = i915_gem_obj_ggtt_pin(to->obj,
> -					    get_context_alignment(ring->dev),
> -					    false, false);
> +					    get_context_alignment(ring->dev), 0);
>  		if (ret)
>  			return ret;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 54413ed46a46..fed72a67afd2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -48,14 +48,14 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
>  int
>  i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
>  			 int min_size, unsigned alignment, unsigned cache_level,
> -			 bool mappable, bool nonblocking)
> +			 unsigned flags)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	struct list_head eviction_list, unwind_list;
>  	struct i915_vma *vma;
>  	int ret = 0;
>  
> -	trace_i915_gem_evict(dev, min_size, alignment, mappable);
> +	trace_i915_gem_evict(dev, min_size, alignment, flags);
>  
>  	/*
>  	 * The goal is to evict objects and amalgamate space in LRU order.
> @@ -81,7 +81,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
>  	 */
>  
>  	INIT_LIST_HEAD(&unwind_list);
> -	if (mappable) {
> +	if (flags & PIN_MAPPABLE) {
>  		BUG_ON(!i915_is_ggtt(vm));
>  		drm_mm_init_scan_with_range(&vm->mm, min_size,
>  					    alignment, cache_level, 0,
> @@ -96,7 +96,7 @@ search_again:
>  			goto found;
>  	}
>  
> -	if (nonblocking)
> +	if (flags & PIN_NONBLOCK)
>  		goto none;
>  
>  	/* Now merge in the soon-to-be-expired objects... */
> @@ -120,13 +120,13 @@ none:
>  	/* Can we unpin some objects such as idle hw contents,
>  	 * or pending flips?
>  	 */
> -	ret = nonblocking ? -ENOSPC : i915_gpu_idle(dev);
> +	ret = flags & PIN_NONBLOCK ? -ENOSPC : i915_gpu_idle(dev);
>  	if (ret)
>  		return ret;
>  
>  	/* Only idle the GPU and repeat the search once */
>  	i915_gem_retire_requests(dev);
> -	nonblocking = true;
> +	flags |= PIN_NONBLOCK;
>  	goto search_again;
>  
>  found:
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index bbff8f9b6549..cd09d42f507b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -544,19 +544,23 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> -	bool need_fence, need_mappable;
> -	u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
> -		!vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
> +	bool need_fence;
> +	unsigned flags;
>  	int ret;
>  
> +	flags = 0;
> +
>  	need_fence =
>  		has_fenced_gpu_access &&
>  		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
>  		obj->tiling_mode != I915_TILING_NONE;
> -	need_mappable = need_fence || need_reloc_mappable(vma);
> +	if (need_fence || need_reloc_mappable(vma))
> +		flags |= PIN_MAPPABLE;
>  
> -	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable,
> -				  false);
> +	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
> +		flags |= PIN_GLOBAL;
> +
> +	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
>  	if (ret)
>  		return ret;
>  
> @@ -585,8 +589,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
>  	}
>  
> -	vma->bind_vma(vma, obj->cache_level, flags);
> -
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 998f9a0b322a..d113eb5e2f5b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -865,7 +865,7 @@ alloc:
>  	if (ret == -ENOSPC && !retried) {
>  		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
>  					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
> -					       I915_CACHE_NONE, false, true);
> +					       I915_CACHE_NONE, 0);

You sure you want to stick this behavioral change in here?

>  		if (ret)
>  			return ret;
>  
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 6e580c98dede..b95a380958db 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -34,15 +34,15 @@ TRACE_EVENT(i915_gem_object_create,
>  );
>  
>  TRACE_EVENT(i915_vma_bind,
> -	    TP_PROTO(struct i915_vma *vma, bool mappable),
> -	    TP_ARGS(vma, mappable),
> +	    TP_PROTO(struct i915_vma *vma, unsigned flags),
> +	    TP_ARGS(vma, flags),
>  
>  	    TP_STRUCT__entry(
>  			     __field(struct drm_i915_gem_object *, obj)
>  			     __field(struct i915_address_space *, vm)
>  			     __field(u32, offset)
>  			     __field(u32, size)
> -			     __field(bool, mappable)
> +			     __field(unsigned, flags)
>  			     ),
>  
>  	    TP_fast_assign(
> @@ -50,12 +50,12 @@ TRACE_EVENT(i915_vma_bind,
>  			   __entry->vm = vma->vm;
>  			   __entry->offset = vma->node.start;
>  			   __entry->size = vma->node.size;
> -			   __entry->mappable = mappable;
> +			   __entry->flags = flags;
>  			   ),
>  
>  	    TP_printk("obj=%p, offset=%08x size=%x%s vm=%p",
>  		      __entry->obj, __entry->offset, __entry->size,
> -		      __entry->mappable ? ", mappable" : "",
> +		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "",
>  		      __entry->vm)
>  );
>  
> @@ -196,26 +196,26 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
>  );
>  
>  TRACE_EVENT(i915_gem_evict,
> -	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, bool mappable),
> -	    TP_ARGS(dev, size, align, mappable),
> +	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags),
> +	    TP_ARGS(dev, size, align, flags),
>  
>  	    TP_STRUCT__entry(
>  			     __field(u32, dev)
>  			     __field(u32, size)
>  			     __field(u32, align)
> -			     __field(bool, mappable)
> +			     __field(unsigned, flags)
>  			    ),
>  
>  	    TP_fast_assign(
>  			   __entry->dev = dev->primary->index;
>  			   __entry->size = size;
>  			   __entry->align = align;
> -			   __entry->mappable = mappable;
> +			   __entry->flags = flags;
>  			  ),
>  
>  	    TP_printk("dev=%d, size=%d, align=%d %s",
>  		      __entry->dev, __entry->size, __entry->align,
> -		      __entry->mappable ? ", mappable" : "")
> +		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "")
>  );
>  
>  TRACE_EVENT(i915_gem_evict_everything,
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index a1397b1646af..1d46608bbd8b 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1349,7 +1349,7 @@ void intel_setup_overlay(struct drm_device *dev)
>  		}
>  		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
>  	} else {
> -		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, true, false);
> +		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
>  		if (ret) {
>  			DRM_ERROR("failed to pin overlay register bo\n");
>  			goto out_free_bo;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 469170c11bb4..e29804eda113 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2753,7 +2753,7 @@ intel_alloc_context_page(struct drm_device *dev)
>  		return NULL;
>  	}
>  
> -	ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
> +	ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);

and here?

>  	if (ret) {
>  		DRM_ERROR("failed to pin power context: %d\n", ret);
>  		goto err_unref;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 442c9a66ebab..5744841669e4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -531,9 +531,11 @@ init_pipe_control(struct intel_ring_buffer *ring)
>  		goto err;
>  	}
>  
> -	i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
> +	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
> +	if (ret)
> +		goto err_unref;
>  
> -	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false);
> +	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
>  	if (ret)
>  		goto err_unref;

and here?

>  
> @@ -1269,12 +1271,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
>  		goto err;
>  	}
>  
> -	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> +	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> +	if (ret)
> +		goto err_unref;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
> -	if (ret != 0) {
> +	ret = i915_gem_obj_ggtt_pin(obj, 4096, 0);
> +	if (ret)
>  		goto err_unref;
> -	}

and here?

>  
>  	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
>  	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
> @@ -1354,7 +1357,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  
>  	ring->obj = obj;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
> +	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
>  	if (ret)
>  		goto err_unref;
>  
> @@ -1927,7 +1930,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  			return -ENOMEM;
>  		}
>  
> -		ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
> +		ret = i915_gem_obj_ggtt_pin(obj, 0, 0);

and here?

>  		if (ret != 0) {
>  			drm_gem_object_unreference(&obj->base);
>  			DRM_ERROR("Failed to ping batch bo\n");
> -- 
> 1.8.5.2
> 

I think it would have been better to do a prep patch with all m&f ->
flags changes - and then the actual fixes. And all the changes for
the mappable/fenceable attributes in a separate patch as well.

Past those relatively minor suggestions, lgtm. I'd like to see the patch
merged and run through as much QA/use as possible.

In its current state:
Acked-by: Ben Widawsky <ben at bwidawsk.net>

If you want to break it up a bit, I'll try to turn it into an r-b

-- 
Ben Widawsky, Intel Open Source Technology Center