[Intel-gfx] [PATCH v3] drm/i915: Split out i915_vma.c
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Nov 11 09:51:54 UTC 2016
On 11/11/2016 08:59, Joonas Lahtinen wrote:
> As a side product, had to split two other files;
> - i915_gem_fence_reg.h
> - i915_gem_object.h (only parts that needed immediate untanglement)
>
> I tried to move code in as big chunks as possible, to make review
> easier. i915_vma_compare was moved to a header temporarily.
>
> v2:
> - Use i915_gem_fence_reg.{c,h}
>
> v3:
> - Rebased
>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Acked-by: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> ---
> drivers/gpu/drm/i915/Makefile | 3 +-
> drivers/gpu/drm/i915/i915_drv.h | 385 +---------------
> drivers/gpu/drm/i915/i915_gem.c | 371 ----------------
> drivers/gpu/drm/i915/i915_gem_fence.c | 716 ------------------------------
> drivers/gpu/drm/i915/i915_gem_fence_reg.c | 716 ++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_gem_fence_reg.h | 51 +++
> drivers/gpu/drm/i915/i915_gem_gtt.c | 277 +-----------
> drivers/gpu/drm/i915/i915_gem_gtt.h | 225 +---------
> drivers/gpu/drm/i915/i915_gem_object.h | 337 ++++++++++++++
> drivers/gpu/drm/i915/i915_gem_request.h | 3 +
> drivers/gpu/drm/i915/i915_vma.c | 650 +++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_vma.h | 342 ++++++++++++++
> 12 files changed, 2120 insertions(+), 1956 deletions(-)
> delete mode 100644 drivers/gpu/drm/i915/i915_gem_fence.c
> create mode 100644 drivers/gpu/drm/i915/i915_gem_fence_reg.c
> create mode 100644 drivers/gpu/drm/i915/i915_gem_fence_reg.h
> create mode 100644 drivers/gpu/drm/i915/i915_gem_object.h
> create mode 100644 drivers/gpu/drm/i915/i915_vma.c
> create mode 100644 drivers/gpu/drm/i915/i915_vma.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 0857e50..3dea46a 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -33,7 +33,7 @@ i915-y += i915_cmd_parser.o \
> i915_gem_dmabuf.o \
> i915_gem_evict.o \
> i915_gem_execbuffer.o \
> - i915_gem_fence.o \
> + i915_gem_fence_reg.o \
> i915_gem_gtt.o \
> i915_gem_internal.o \
> i915_gem.o \
> @@ -45,6 +45,7 @@ i915-y += i915_cmd_parser.o \
> i915_gem_timeline.o \
> i915_gem_userptr.o \
> i915_trace_points.o \
> + i915_vma.o \
> intel_breadcrumbs.o \
> intel_engine_cs.o \
> intel_hangcheck.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 30777de..ccd0361 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -60,11 +60,15 @@
> #include "intel_ringbuffer.h"
>
> #include "i915_gem.h"
> +#include "i915_gem_fence_reg.h"
> +#include "i915_gem_object.h"
> #include "i915_gem_gtt.h"
> #include "i915_gem_render_state.h"
> #include "i915_gem_request.h"
> #include "i915_gem_timeline.h"
>
> +#include "i915_vma.h"
> +
> #include "intel_gvt.h"
>
> /* General customization:
> @@ -459,23 +463,6 @@ struct intel_opregion {
> struct intel_overlay;
> struct intel_overlay_error_state;
>
> -struct drm_i915_fence_reg {
> - struct list_head link;
> - struct drm_i915_private *i915;
> - struct i915_vma *vma;
> - int pin_count;
> - int id;
> - /**
> - * Whether the tiling parameters for the currently
> - * associated fence register have changed. Note that
> - * for the purposes of tracking tiling changes we also
> - * treat the unfenced register, the register slot that
> - * the object occupies whilst it executes a fenced
> - * command (such as BLT on gen2/3), as a "fence".
> - */
> - bool dirty;
> -};
> -
> struct sdvo_device_mapping {
> u8 initialized;
> u8 dvo_port;
> @@ -2179,31 +2166,6 @@ enum hdmi_force_audio {
>
> #define I915_GTT_OFFSET_NONE ((u32)-1)
>
> -struct drm_i915_gem_object_ops {
> - unsigned int flags;
> -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
> -#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2
> -
> - /* Interface between the GEM object and its backing storage.
> - * get_pages() is called once prior to the use of the associated set
> - * of pages before to binding them into the GTT, and put_pages() is
> - * called after we no longer need them. As we expect there to be
> - * associated cost with migrating pages between the backing storage
> - * and making them available for the GPU (e.g. clflush), we may hold
> - * onto the pages after they are no longer referenced by the GPU
> - * in case they may be used again shortly (for example migrating the
> - * pages to a different memory domain within the GTT). put_pages()
> - * will therefore most likely be called when the object itself is
> - * being released or under memory pressure (where we attempt to
> - * reap pages for the shrinker).
> - */
> - struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
> - void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
> -
> - int (*dmabuf_export)(struct drm_i915_gem_object *);
> - void (*release)(struct drm_i915_gem_object *);
> -};
> -
> /*
> * Frontbuffer tracking bits. Set in obj->frontbuffer_bits while a gem bo is
> * considered to be the frontbuffer for the given plane interface-wise. This
> @@ -2225,292 +2187,6 @@ struct drm_i915_gem_object_ops {
> #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
> (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
>
> -struct drm_i915_gem_object {
> - struct drm_gem_object base;
> -
> - const struct drm_i915_gem_object_ops *ops;
> -
> - /** List of VMAs backed by this object */
> - struct list_head vma_list;
> - struct rb_root vma_tree;
> -
> - /** Stolen memory for this object, instead of being backed by shmem. */
> - struct drm_mm_node *stolen;
> - struct list_head global_link;
> - union {
> - struct rcu_head rcu;
> - struct llist_node freed;
> - };
> -
> - /**
> - * Whether the object is currently in the GGTT mmap.
> - */
> - struct list_head userfault_link;
> -
> - /** Used in execbuf to temporarily hold a ref */
> - struct list_head obj_exec_link;
> -
> - struct list_head batch_pool_link;
> -
> - unsigned long flags;
> -
> - /**
> - * Have we taken a reference for the object for incomplete GPU
> - * activity?
> - */
> -#define I915_BO_ACTIVE_REF 0
> -
> - /*
> - * Is the object to be mapped as read-only to the GPU
> - * Only honoured if hardware has relevant pte bit
> - */
> - unsigned long gt_ro:1;
> - unsigned int cache_level:3;
> - unsigned int cache_dirty:1;
> -
> - atomic_t frontbuffer_bits;
> - unsigned int frontbuffer_ggtt_origin; /* write once */
> -
> - /** Current tiling stride for the object, if it's tiled. */
> - unsigned int tiling_and_stride;
> -#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
> -#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
> -#define STRIDE_MASK (~TILING_MASK)
> -
> - /** Count of VMA actually bound by this object */
> - unsigned int bind_count;
> - unsigned int active_count;
> - unsigned int pin_display;
> -
> - struct {
> - struct mutex lock; /* protects the pages and their use */
> - atomic_t pages_pin_count;
> -
> - struct sg_table *pages;
> - void *mapping;
> -
> - struct i915_gem_object_page_iter {
> - struct scatterlist *sg_pos;
> - unsigned int sg_idx; /* in pages, but 32bit eek! */
> -
> - struct radix_tree_root radix;
> - struct mutex lock; /* protects this cache */
> - } get_page;
> -
> - /**
> - * Advice: are the backing pages purgeable?
> - */
> - unsigned int madv:2;
> -
> - /**
> - * This is set if the object has been written to since the
> - * pages were last acquired.
> - */
> - bool dirty:1;
> -
> - /**
> - * This is set if the object has been pinned due to unknown
> - * swizzling.
> - */
> - bool quirked:1;
> - } mm;
> -
> - /** Breadcrumb of last rendering to the buffer.
> - * There can only be one writer, but we allow for multiple readers.
> - * If there is a writer that necessarily implies that all other
> - * read requests are complete - but we may only be lazily clearing
> - * the read requests. A read request is naturally the most recent
> - * request on a ring, so we may have two different write and read
> - * requests on one ring where the write request is older than the
> - * read request. This allows for the CPU to read from an active
> - * buffer by only waiting for the write to complete.
> - */
> - struct reservation_object *resv;
> -
> - /** References from framebuffers, locks out tiling changes. */
> - unsigned long framebuffer_references;
> -
> - /** Record of address bit 17 of each page at last unbind. */
> - unsigned long *bit_17;
> -
> - struct i915_gem_userptr {
> - uintptr_t ptr;
> - unsigned read_only :1;
> -
> - struct i915_mm_struct *mm;
> - struct i915_mmu_object *mmu_object;
> - struct work_struct *work;
> - } userptr;
> -
> - /** for phys allocated objects */
> - struct drm_dma_handle *phys_handle;
> -
> - struct reservation_object __builtin_resv;
> -};
> -
> -static inline struct drm_i915_gem_object *
> -to_intel_bo(struct drm_gem_object *gem)
> -{
> - /* Assert that to_intel_bo(NULL) == NULL */
> - BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
> -
> - return container_of(gem, struct drm_i915_gem_object, base);
> -}
> -
> -/**
> - * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
> - * @filp: DRM file private date
> - * @handle: userspace handle
> - *
> - * Returns:
> - *
> - * A pointer to the object named by the handle if such exists on @filp, NULL
> - * otherwise. This object is only valid whilst under the RCU read lock, and
> - * note carefully the object may be in the process of being destroyed.
> - */
> -static inline struct drm_i915_gem_object *
> -i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
> -{
> -#ifdef CONFIG_LOCKDEP
> - WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map));
> -#endif
> - return idr_find(&file->object_idr, handle);
> -}
> -
> -static inline struct drm_i915_gem_object *
> -i915_gem_object_lookup(struct drm_file *file, u32 handle)
> -{
> - struct drm_i915_gem_object *obj;
> -
> - rcu_read_lock();
> - obj = i915_gem_object_lookup_rcu(file, handle);
> - if (obj && !kref_get_unless_zero(&obj->base.refcount))
> - obj = NULL;
> - rcu_read_unlock();
> -
> - return obj;
> -}
> -
> -__deprecated
> -extern struct drm_gem_object *
> -drm_gem_object_lookup(struct drm_file *file, u32 handle);
> -
> -__attribute__((nonnull))
> -static inline struct drm_i915_gem_object *
> -i915_gem_object_get(struct drm_i915_gem_object *obj)
> -{
> - drm_gem_object_reference(&obj->base);
> - return obj;
> -}
> -
> -__deprecated
> -extern void drm_gem_object_reference(struct drm_gem_object *);
> -
> -__attribute__((nonnull))
> -static inline void
> -i915_gem_object_put(struct drm_i915_gem_object *obj)
> -{
> - __drm_gem_object_unreference(&obj->base);
> -}
> -
> -__deprecated
> -extern void drm_gem_object_unreference(struct drm_gem_object *);
> -
> -__deprecated
> -extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
> -
> -static inline bool
> -i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
> -{
> - return atomic_read(&obj->base.refcount.refcount) == 0;
> -}
> -
> -static inline bool
> -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
> -{
> - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
> -}
> -
> -static inline bool
> -i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
> -{
> - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE;
> -}
> -
> -static inline bool
> -i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
> -{
> - return obj->active_count;
> -}
> -
> -static inline bool
> -i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
> -{
> - return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
> -}
> -
> -static inline void
> -i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
> -{
> - lockdep_assert_held(&obj->base.dev->struct_mutex);
> - __set_bit(I915_BO_ACTIVE_REF, &obj->flags);
> -}
> -
> -static inline void
> -i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
> -{
> - lockdep_assert_held(&obj->base.dev->struct_mutex);
> - __clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
> -}
> -
> -void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
> -
> -static inline unsigned int
> -i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
> -{
> - return obj->tiling_and_stride & TILING_MASK;
> -}
> -
> -static inline bool
> -i915_gem_object_is_tiled(struct drm_i915_gem_object *obj)
> -{
> - return i915_gem_object_get_tiling(obj) != I915_TILING_NONE;
> -}
> -
> -static inline unsigned int
> -i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
> -{
> - return obj->tiling_and_stride & STRIDE_MASK;
> -}
> -
> -static inline struct intel_engine_cs *
> -i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
> -{
> - struct intel_engine_cs *engine = NULL;
> - struct dma_fence *fence;
> -
> - rcu_read_lock();
> - fence = reservation_object_get_excl_rcu(obj->resv);
> - rcu_read_unlock();
> -
> - if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
> - engine = to_request(fence)->engine;
> - dma_fence_put(fence);
> -
> - return engine;
> -}
> -
> -static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
> -{
> - i915_gem_object_get(vma->obj);
> - return vma;
> -}
> -
> -static inline void i915_vma_put(struct i915_vma *vma)
> -{
> - i915_gem_object_put(vma->obj);
> -}
> -
> /*
> * Optimised SGL iterator for GEM objects
> */
> @@ -3222,13 +2898,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> u64 alignment,
> u64 flags);
>
> -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> - u32 flags);
> -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
> -int __must_check i915_vma_unbind(struct i915_vma *vma);
> -void i915_vma_close(struct i915_vma *vma);
> -void i915_vma_destroy(struct i915_vma *vma);
> -
> int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
> void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
>
> @@ -3478,54 +3147,10 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
> return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view));
> }
>
> -/* i915_gem_fence.c */
> +/* i915_gem_fence_reg.c */
> int __must_check i915_vma_get_fence(struct i915_vma *vma);
> int __must_check i915_vma_put_fence(struct i915_vma *vma);
>
> -/**
> - * i915_vma_pin_fence - pin fencing state
> - * @vma: vma to pin fencing for
> - *
> - * This pins the fencing state (whether tiled or untiled) to make sure the
> - * vma (and its object) is ready to be used as a scanout target. Fencing
> - * status must be synchronize first by calling i915_vma_get_fence():
> - *
> - * The resulting fence pin reference must be released again with
> - * i915_vma_unpin_fence().
> - *
> - * Returns:
> - *
> - * True if the vma has a fence, false otherwise.
> - */
> -static inline bool
> -i915_vma_pin_fence(struct i915_vma *vma)
> -{
> - lockdep_assert_held(&vma->vm->dev->struct_mutex);
> - if (vma->fence) {
> - vma->fence->pin_count++;
> - return true;
> - } else
> - return false;
> -}
> -
> -/**
> - * i915_vma_unpin_fence - unpin fencing state
> - * @vma: vma to unpin fencing for
> - *
> - * This releases the fence pin reference acquired through
> - * i915_vma_pin_fence. It will handle both objects with and without an
> - * attached fence correctly, callers do not need to distinguish this.
> - */
> -static inline void
> -i915_vma_unpin_fence(struct i915_vma *vma)
> -{
> - lockdep_assert_held(&vma->vm->dev->struct_mutex);
> - if (vma->fence) {
> - GEM_BUG_ON(vma->fence->pin_count <= 0);
> - vma->fence->pin_count--;
> - }
> -}
> -
> void i915_gem_restore_fences(struct drm_device *dev);
>
> void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1c20edb..d51fb5d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2919,117 +2919,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> return ret;
> }
>
> -static void __i915_vma_iounmap(struct i915_vma *vma)
> -{
> - GEM_BUG_ON(i915_vma_is_pinned(vma));
> -
> - if (vma->iomap == NULL)
> - return;
> -
> - io_mapping_unmap(vma->iomap);
> - vma->iomap = NULL;
> -}
> -
> -int i915_vma_unbind(struct i915_vma *vma)
> -{
> - struct drm_i915_gem_object *obj = vma->obj;
> - unsigned long active;
> - int ret;
> -
> - lockdep_assert_held(&obj->base.dev->struct_mutex);
> -
> - /* First wait upon any activity as retiring the request may
> - * have side-effects such as unpinning or even unbinding this vma.
> - */
> - active = i915_vma_get_active(vma);
> - if (active) {
> - int idx;
> -
> - /* When a closed VMA is retired, it is unbound - eek.
> - * In order to prevent it from being recursively closed,
> - * take a pin on the vma so that the second unbind is
> - * aborted.
> - *
> - * Even more scary is that the retire callback may free
> - * the object (last active vma). To prevent the explosion
> - * we defer the actual object free to a worker that can
> - * only proceed once it acquires the struct_mutex (which
> - * we currently hold, therefore it cannot free this object
> - * before we are finished).
> - */
> - __i915_vma_pin(vma);
> -
> - for_each_active(active, idx) {
> - ret = i915_gem_active_retire(&vma->last_read[idx],
> - &vma->vm->dev->struct_mutex);
> - if (ret)
> - break;
> - }
> -
> - __i915_vma_unpin(vma);
> - if (ret)
> - return ret;
> -
> - GEM_BUG_ON(i915_vma_is_active(vma));
> - }
> -
> - if (i915_vma_is_pinned(vma))
> - return -EBUSY;
> -
> - if (!drm_mm_node_allocated(&vma->node))
> - goto destroy;
> -
> - GEM_BUG_ON(obj->bind_count == 0);
> - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> -
> - if (i915_vma_is_map_and_fenceable(vma)) {
> - /* release the fence reg _after_ flushing */
> - ret = i915_vma_put_fence(vma);
> - if (ret)
> - return ret;
> -
> - /* Force a pagefault for domain tracking on next user access */
> - i915_gem_release_mmap(obj);
> -
> - __i915_vma_iounmap(vma);
> - vma->flags &= ~I915_VMA_CAN_FENCE;
> - }
> -
> - if (likely(!vma->vm->closed)) {
> - trace_i915_vma_unbind(vma);
> - vma->vm->unbind_vma(vma);
> - }
> - vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
> -
> - drm_mm_remove_node(&vma->node);
> - list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
> -
> - if (vma->pages != obj->mm.pages) {
> - GEM_BUG_ON(!vma->pages);
> - sg_free_table(vma->pages);
> - kfree(vma->pages);
> - }
> - vma->pages = NULL;
> -
> - /* Since the unbound list is global, only move to that list if
> - * no more VMAs exist. */
> - if (--obj->bind_count == 0)
> - list_move_tail(&obj->global_link,
> - &to_i915(obj->base.dev)->mm.unbound_list);
> -
> - /* And finally now the object is completely decoupled from this vma,
> - * we can drop its hold on the backing storage and allow it to be
> - * reaped by the shrinker.
> - */
> - i915_gem_object_unpin_pages(obj);
> -
> -destroy:
> - if (unlikely(i915_vma_is_closed(vma)))
> - i915_vma_destroy(vma);
> -
> - return 0;
> -}
> -
> static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
> {
> int ret, i;
> @@ -3057,172 +2946,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
> return 0;
> }
>
> -static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
> - unsigned long cache_level)
> -{
> - struct drm_mm_node *gtt_space = &vma->node;
> - struct drm_mm_node *other;
> -
> - /*
> - * On some machines we have to be careful when putting differing types
> - * of snoopable memory together to avoid the prefetcher crossing memory
> - * domains and dying. During vm initialisation, we decide whether or not
> - * these constraints apply and set the drm_mm.color_adjust
> - * appropriately.
> - */
> - if (vma->vm->mm.color_adjust == NULL)
> - return true;
> -
> - if (!drm_mm_node_allocated(gtt_space))
> - return true;
> -
> - if (list_empty(>t_space->node_list))
> - return true;
> -
> - other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
> - if (other->allocated && !other->hole_follows && other->color != cache_level)
> - return false;
> -
> - other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
> - if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
> - return false;
> -
> - return true;
> -}
> -
> -/**
> - * i915_vma_insert - finds a slot for the vma in its address space
> - * @vma: the vma
> - * @size: requested size in bytes (can be larger than the VMA)
> - * @alignment: required alignment
> - * @flags: mask of PIN_* flags to use
> - *
> - * First we try to allocate some free space that meets the requirements for
> - * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
> - * preferrably the oldest idle entry to make room for the new VMA.
> - *
> - * Returns:
> - * 0 on success, negative error code otherwise.
> - */
> -static int
> -i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> -{
> - struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
> - struct drm_i915_gem_object *obj = vma->obj;
> - u64 start, end;
> - int ret;
> -
> - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
> - GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
> -
> - size = max(size, vma->size);
> - if (flags & PIN_MAPPABLE)
> - size = i915_gem_get_ggtt_size(dev_priv, size,
> - i915_gem_object_get_tiling(obj));
> -
> - alignment = max(max(alignment, vma->display_alignment),
> - i915_gem_get_ggtt_alignment(dev_priv, size,
> - i915_gem_object_get_tiling(obj),
> - flags & PIN_MAPPABLE));
> -
> - start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> -
> - end = vma->vm->total;
> - if (flags & PIN_MAPPABLE)
> - end = min_t(u64, end, dev_priv->ggtt.mappable_end);
> - if (flags & PIN_ZONE_4G)
> - end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
> -
> - /* If binding the object/GGTT view requires more space than the entire
> - * aperture has, reject it early before evicting everything in a vain
> - * attempt to find space.
> - */
> - if (size > end) {
> - DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
> - size, obj->base.size,
> - flags & PIN_MAPPABLE ? "mappable" : "total",
> - end);
> - return -E2BIG;
> - }
> -
> - ret = i915_gem_object_pin_pages(obj);
> - if (ret)
> - return ret;
> -
> - if (flags & PIN_OFFSET_FIXED) {
> - u64 offset = flags & PIN_OFFSET_MASK;
> - if (offset & (alignment - 1) || offset > end - size) {
> - ret = -EINVAL;
> - goto err_unpin;
> - }
> -
> - vma->node.start = offset;
> - vma->node.size = size;
> - vma->node.color = obj->cache_level;
> - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
> - if (ret) {
> - ret = i915_gem_evict_for_vma(vma);
> - if (ret == 0)
> - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
> - if (ret)
> - goto err_unpin;
> - }
> - } else {
> - u32 search_flag, alloc_flag;
> -
> - if (flags & PIN_HIGH) {
> - search_flag = DRM_MM_SEARCH_BELOW;
> - alloc_flag = DRM_MM_CREATE_TOP;
> - } else {
> - search_flag = DRM_MM_SEARCH_DEFAULT;
> - alloc_flag = DRM_MM_CREATE_DEFAULT;
> - }
> -
> - /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
> - * so we know that we always have a minimum alignment of 4096.
> - * The drm_mm range manager is optimised to return results
> - * with zero alignment, so where possible use the optimal
> - * path.
> - */
> - if (alignment <= 4096)
> - alignment = 0;
> -
> -search_free:
> - ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
> - &vma->node,
> - size, alignment,
> - obj->cache_level,
> - start, end,
> - search_flag,
> - alloc_flag);
> - if (ret) {
> - ret = i915_gem_evict_something(vma->vm, size, alignment,
> - obj->cache_level,
> - start, end,
> - flags);
> - if (ret == 0)
> - goto search_free;
> -
> - goto err_unpin;
> - }
> -
> - GEM_BUG_ON(vma->node.start < start);
> - GEM_BUG_ON(vma->node.start + vma->node.size > end);
> - }
> - GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
> -
> - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list);
> - list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> - obj->bind_count++;
> - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
> -
> - return 0;
> -
> -err_unpin:
> - i915_gem_object_unpin_pages(obj);
> - return ret;
> -}
> -
> void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
> bool force)
> {
> @@ -3818,100 +3541,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
> return ret < 0 ? ret : 0;
> }
>
> -static bool
> -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> -{
> - if (!drm_mm_node_allocated(&vma->node))
> - return false;
> -
> - if (vma->node.size < size)
> - return true;
> -
> - if (alignment && vma->node.start & (alignment - 1))
> - return true;
> -
> - if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
> - return true;
> -
> - if (flags & PIN_OFFSET_BIAS &&
> - vma->node.start < (flags & PIN_OFFSET_MASK))
> - return true;
> -
> - if (flags & PIN_OFFSET_FIXED &&
> - vma->node.start != (flags & PIN_OFFSET_MASK))
> - return true;
> -
> - return false;
> -}
> -
> -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
> -{
> - struct drm_i915_gem_object *obj = vma->obj;
> - struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> - bool mappable, fenceable;
> - u32 fence_size, fence_alignment;
> -
> - fence_size = i915_gem_get_ggtt_size(dev_priv,
> - vma->size,
> - i915_gem_object_get_tiling(obj));
> - fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
> - vma->size,
> - i915_gem_object_get_tiling(obj),
> - true);
> -
> - fenceable = (vma->node.size == fence_size &&
> - (vma->node.start & (fence_alignment - 1)) == 0);
> -
> - mappable = (vma->node.start + fence_size <=
> - dev_priv->ggtt.mappable_end);
> -
> - /*
> - * Explicitly disable for rotated VMA since the display does not
> - * need the fence and the VMA is not accessible to other users.
> - */
> - if (mappable && fenceable &&
> - vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED)
> - vma->flags |= I915_VMA_CAN_FENCE;
> - else
> - vma->flags &= ~I915_VMA_CAN_FENCE;
> -}
> -
> -int __i915_vma_do_pin(struct i915_vma *vma,
> - u64 size, u64 alignment, u64 flags)
> -{
> - unsigned int bound = vma->flags;
> - int ret;
> -
> - lockdep_assert_held(&vma->vm->dev->struct_mutex);
> - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
> - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
> -
> - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
> - ret = -EBUSY;
> - goto err;
> - }
> -
> - if ((bound & I915_VMA_BIND_MASK) == 0) {
> - ret = i915_vma_insert(vma, size, alignment, flags);
> - if (ret)
> - goto err;
> - }
> -
> - ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
> - if (ret)
> - goto err;
> -
> - if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
> - __i915_vma_set_map_and_fenceable(vma);
> -
> - GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
> - return 0;
> -
> -err:
> - __i915_vma_unpin(vma);
> - return ret;
> -}
> -
> struct i915_vma *
> i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> const struct i915_ggtt_view *view,
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
> deleted file mode 100644
> index cd59dbc..0000000
> --- a/drivers/gpu/drm/i915/i915_gem_fence.c
> +++ /dev/null
> @@ -1,716 +0,0 @@
> -/*
> - * Copyright © 2008-2015 Intel Corporation
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice (including the next
> - * paragraph) shall be included in all copies or substantial portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> - * IN THE SOFTWARE.
> - */
> -
> -#include <drm/drmP.h>
> -#include <drm/i915_drm.h>
> -#include "i915_drv.h"
> -
> -/**
> - * DOC: fence register handling
> - *
> - * Important to avoid confusions: "fences" in the i915 driver are not execution
> - * fences used to track command completion but hardware detiler objects which
> - * wrap a given range of the global GTT. Each platform has only a fairly limited
> - * set of these objects.
> - *
> - * Fences are used to detile GTT memory mappings. They're also connected to the
> - * hardware frontbuffer render tracking and hence interact with frontbuffer
> - * compression. Furthermore on older platforms fences are required for tiled
> - * objects used by the display engine. They can also be used by the render
> - * engine - they're required for blitter commands and are optional for render
> - * commands. But on gen4+ both display (with the exception of fbc) and rendering
> - * have their own tiling state bits and don't need fences.
> - *
> - * Also note that fences only support X and Y tiling and hence can't be used for
> - * the fancier new tiling formats like W, Ys and Yf.
> - *
> - * Finally note that because fences are such a restricted resource they're
> - * dynamically associated with objects. Furthermore fence state is committed to
> - * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
> - * explicitly call i915_gem_object_get_fence() to synchronize fencing status
> - * for cpu access. Also note that some code wants an unfenced view, for those
> - * cases the fence can be removed forcefully with i915_gem_object_put_fence().
> - *
> - * Internally these functions will synchronize with userspace access by removing
> - * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
> - */
> -
> -#define pipelined 0
> -
> -static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
> - struct i915_vma *vma)
> -{
> - i915_reg_t fence_reg_lo, fence_reg_hi;
> - int fence_pitch_shift;
> - u64 val;
> -
> - if (INTEL_INFO(fence->i915)->gen >= 6) {
> - fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
> - fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
> - fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
> -
> - } else {
> - fence_reg_lo = FENCE_REG_965_LO(fence->id);
> - fence_reg_hi = FENCE_REG_965_HI(fence->id);
> - fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
> - }
> -
> - val = 0;
> - if (vma) {
> - unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
> - bool is_y_tiled = tiling == I915_TILING_Y;
> - unsigned int stride = i915_gem_object_get_stride(vma->obj);
> - u32 row_size = stride * (is_y_tiled ? 32 : 8);
> - u32 size = rounddown((u32)vma->node.size, row_size);
> -
> - val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
> - val |= vma->node.start & 0xfffff000;
> - val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
> - if (is_y_tiled)
> - val |= BIT(I965_FENCE_TILING_Y_SHIFT);
> - val |= I965_FENCE_REG_VALID;
> - }
> -
> - if (!pipelined) {
> - struct drm_i915_private *dev_priv = fence->i915;
> -
> - /* To w/a incoherency with non-atomic 64-bit register updates,
> - * we split the 64-bit update into two 32-bit writes. In order
> - * for a partial fence not to be evaluated between writes, we
> - * precede the update with write to turn off the fence register,
> - * and only enable the fence as the last step.
> - *
> - * For extra levels of paranoia, we make sure each step lands
> - * before applying the next step.
> - */
> - I915_WRITE(fence_reg_lo, 0);
> - POSTING_READ(fence_reg_lo);
> -
> - I915_WRITE(fence_reg_hi, upper_32_bits(val));
> - I915_WRITE(fence_reg_lo, lower_32_bits(val));
> - POSTING_READ(fence_reg_lo);
> - }
> -}
> -
> -static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
> - struct i915_vma *vma)
> -{
> - u32 val;
> -
> - val = 0;
> - if (vma) {
> - unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
> - bool is_y_tiled = tiling == I915_TILING_Y;
> - unsigned int stride = i915_gem_object_get_stride(vma->obj);
> - int pitch_val;
> - int tile_width;
> -
> - WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
> - !is_power_of_2(vma->node.size) ||
> - (vma->node.start & (vma->node.size - 1)),
> - "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n",
> - vma->node.start,
> - i915_vma_is_map_and_fenceable(vma),
> - vma->node.size);
> -
> - if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
> - tile_width = 128;
> - else
> - tile_width = 512;
> -
> - /* Note: pitch better be a power of two tile widths */
> - pitch_val = stride / tile_width;
> - pitch_val = ffs(pitch_val) - 1;
> -
> - val = vma->node.start;
> - if (is_y_tiled)
> - val |= BIT(I830_FENCE_TILING_Y_SHIFT);
> - val |= I915_FENCE_SIZE_BITS(vma->node.size);
> - val |= pitch_val << I830_FENCE_PITCH_SHIFT;
> - val |= I830_FENCE_REG_VALID;
> - }
> -
> - if (!pipelined) {
> - struct drm_i915_private *dev_priv = fence->i915;
> - i915_reg_t reg = FENCE_REG(fence->id);
> -
> - I915_WRITE(reg, val);
> - POSTING_READ(reg);
> - }
> -}
> -
> -static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
> - struct i915_vma *vma)
> -{
> - u32 val;
> -
> - val = 0;
> - if (vma) {
> - unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
> - bool is_y_tiled = tiling == I915_TILING_Y;
> - unsigned int stride = i915_gem_object_get_stride(vma->obj);
> - u32 pitch_val;
> -
> - WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
> - !is_power_of_2(vma->node.size) ||
> - (vma->node.start & (vma->node.size - 1)),
> - "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n",
> - vma->node.start, vma->node.size);
> -
> - pitch_val = stride / 128;
> - pitch_val = ffs(pitch_val) - 1;
> -
> - val = vma->node.start;
> - if (is_y_tiled)
> - val |= BIT(I830_FENCE_TILING_Y_SHIFT);
> - val |= I830_FENCE_SIZE_BITS(vma->node.size);
> - val |= pitch_val << I830_FENCE_PITCH_SHIFT;
> - val |= I830_FENCE_REG_VALID;
> - }
> -
> - if (!pipelined) {
> - struct drm_i915_private *dev_priv = fence->i915;
> - i915_reg_t reg = FENCE_REG(fence->id);
> -
> - I915_WRITE(reg, val);
> - POSTING_READ(reg);
> - }
> -}
> -
> -static void fence_write(struct drm_i915_fence_reg *fence,
> - struct i915_vma *vma)
> -{
> - /* Previous access through the fence register is marshalled by
> - * the mb() inside the fault handlers (i915_gem_release_mmaps)
> - * and explicitly managed for internal users.
> - */
> -
> - if (IS_GEN2(fence->i915))
> - i830_write_fence_reg(fence, vma);
> - else if (IS_GEN3(fence->i915))
> - i915_write_fence_reg(fence, vma);
> - else
> - i965_write_fence_reg(fence, vma);
> -
> - /* Access through the fenced region afterwards is
> - * ordered by the posting reads whilst writing the registers.
> - */
> -
> - fence->dirty = false;
> -}
> -
> -static int fence_update(struct drm_i915_fence_reg *fence,
> - struct i915_vma *vma)
> -{
> - int ret;
> -
> - if (vma) {
> - if (!i915_vma_is_map_and_fenceable(vma))
> - return -EINVAL;
> -
> - if (WARN(!i915_gem_object_get_stride(vma->obj) ||
> - !i915_gem_object_get_tiling(vma->obj),
> - "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
> - i915_gem_object_get_stride(vma->obj),
> - i915_gem_object_get_tiling(vma->obj)))
> - return -EINVAL;
> -
> - ret = i915_gem_active_retire(&vma->last_fence,
> - &vma->obj->base.dev->struct_mutex);
> - if (ret)
> - return ret;
> - }
> -
> - if (fence->vma) {
> - ret = i915_gem_active_retire(&fence->vma->last_fence,
> - &fence->vma->obj->base.dev->struct_mutex);
> - if (ret)
> - return ret;
> - }
> -
> - if (fence->vma && fence->vma != vma) {
> - /* Ensure that all userspace CPU access is completed before
> - * stealing the fence.
> - */
> - i915_gem_release_mmap(fence->vma->obj);
> -
> - fence->vma->fence = NULL;
> - fence->vma = NULL;
> -
> - list_move(&fence->link, &fence->i915->mm.fence_list);
> - }
> -
> - fence_write(fence, vma);
> -
> - if (vma) {
> - if (fence->vma != vma) {
> - vma->fence = fence;
> - fence->vma = vma;
> - }
> -
> - list_move_tail(&fence->link, &fence->i915->mm.fence_list);
> - }
> -
> - return 0;
> -}
> -
> -/**
> - * i915_vma_put_fence - force-remove fence for a VMA
> - * @vma: vma to map linearly (not through a fence reg)
> - *
> - * This function force-removes any fence from the given object, which is useful
> - * if the kernel wants to do untiled GTT access.
> - *
> - * Returns:
> - *
> - * 0 on success, negative error code on failure.
> - */
> -int
> -i915_vma_put_fence(struct i915_vma *vma)
> -{
> - struct drm_i915_fence_reg *fence = vma->fence;
> -
> - assert_rpm_wakelock_held(to_i915(vma->vm->dev));
> -
> - if (!fence)
> - return 0;
> -
> - if (fence->pin_count)
> - return -EBUSY;
> -
> - return fence_update(fence, NULL);
> -}
> -
> -static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
> -{
> - struct drm_i915_fence_reg *fence;
> -
> - list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
> - if (fence->pin_count)
> - continue;
> -
> - return fence;
> - }
> -
> - /* Wait for completion of pending flips which consume fences */
> - if (intel_has_pending_fb_unpin(&dev_priv->drm))
> - return ERR_PTR(-EAGAIN);
> -
> - return ERR_PTR(-EDEADLK);
> -}
> -
> -/**
> - * i915_vma_get_fence - set up fencing for a vma
> - * @vma: vma to map through a fence reg
> - *
> - * When mapping objects through the GTT, userspace wants to be able to write
> - * to them without having to worry about swizzling if the object is tiled.
> - * This function walks the fence regs looking for a free one for @obj,
> - * stealing one if it can't find any.
> - *
> - * It then sets up the reg based on the object's properties: address, pitch
> - * and tiling format.
> - *
> - * For an untiled surface, this removes any existing fence.
> - *
> - * Returns:
> - *
> - * 0 on success, negative error code on failure.
> - */
> -int
> -i915_vma_get_fence(struct i915_vma *vma)
> -{
> - struct drm_i915_fence_reg *fence;
> - struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
> -
> - /* Note that we revoke fences on runtime suspend. Therefore the user
> - * must keep the device awake whilst using the fence.
> - */
> - assert_rpm_wakelock_held(to_i915(vma->vm->dev));
> -
> - /* Just update our place in the LRU if our fence is getting reused. */
> - if (vma->fence) {
> - fence = vma->fence;
> - if (!fence->dirty) {
> - list_move_tail(&fence->link,
> - &fence->i915->mm.fence_list);
> - return 0;
> - }
> - } else if (set) {
> - fence = fence_find(to_i915(vma->vm->dev));
> - if (IS_ERR(fence))
> - return PTR_ERR(fence);
> - } else
> - return 0;
> -
> - return fence_update(fence, set);
> -}
> -
> -/**
> - * i915_gem_restore_fences - restore fence state
> - * @dev: DRM device
> - *
> - * Restore the hw fence state to match the software tracking again, to be called
> - * after a gpu reset and on resume. Note that on runtime suspend we only cancel
> - * the fences, to be reacquired by the user later.
> - */
> -void i915_gem_restore_fences(struct drm_device *dev)
> -{
> - struct drm_i915_private *dev_priv = to_i915(dev);
> - int i;
> -
> - for (i = 0; i < dev_priv->num_fence_regs; i++) {
> - struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
> - struct i915_vma *vma = reg->vma;
> -
> - /*
> - * Commit delayed tiling changes if we have an object still
> - * attached to the fence, otherwise just clear the fence.
> - */
> - if (vma && !i915_gem_object_is_tiled(vma->obj)) {
> - GEM_BUG_ON(!reg->dirty);
> - GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
> -
> - list_move(®->link, &dev_priv->mm.fence_list);
> - vma->fence = NULL;
> - vma = NULL;
> - }
> -
> - fence_write(reg, vma);
> - reg->vma = vma;
> - }
> -}
> -
> -/**
> - * DOC: tiling swizzling details
> - *
> - * The idea behind tiling is to increase cache hit rates by rearranging
> - * pixel data so that a group of pixel accesses are in the same cacheline.
> - * Performance improvement from doing this on the back/depth buffer are on
> - * the order of 30%.
> - *
> - * Intel architectures make this somewhat more complicated, though, by
> - * adjustments made to addressing of data when the memory is in interleaved
> - * mode (matched pairs of DIMMS) to improve memory bandwidth.
> - * For interleaved memory, the CPU sends every sequential 64 bytes
> - * to an alternate memory channel so it can get the bandwidth from both.
> - *
> - * The GPU also rearranges its accesses for increased bandwidth to interleaved
> - * memory, and it matches what the CPU does for non-tiled. However, when tiled
> - * it does it a little differently, since one walks addresses not just in the
> - * X direction but also Y. So, along with alternating channels when bit
> - * 6 of the address flips, it also alternates when other bits flip -- Bits 9
> - * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
> - * are common to both the 915 and 965-class hardware.
> - *
> - * The CPU also sometimes XORs in higher bits as well, to improve
> - * bandwidth doing strided access like we do so frequently in graphics. This
> - * is called "Channel XOR Randomization" in the MCH documentation. The result
> - * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
> - * decode.
> - *
> - * All of this bit 6 XORing has an effect on our memory management,
> - * as we need to make sure that the 3d driver can correctly address object
> - * contents.
> - *
> - * If we don't have interleaved memory, all tiling is safe and no swizzling is
> - * required.
> - *
> - * When bit 17 is XORed in, we simply refuse to tile at all. Bit
> - * 17 is not just a page offset, so as we page an object out and back in,
> - * individual pages in it will have different bit 17 addresses, resulting in
> - * each 64 bytes being swapped with its neighbor!
> - *
> - * Otherwise, if interleaved, we have to tell the 3d driver what the address
> - * swizzling it needs to do is, since it's writing with the CPU to the pages
> - * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
> - * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
> - * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
> - * to match what the GPU expects.
> - */
> -
> -/**
> - * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
> - * @dev: DRM device
> - *
> - * Detects bit 6 swizzling of address lookup between IGD access and CPU
> - * access through main memory.
> - */
> -void
> -i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
> -{
> - struct drm_i915_private *dev_priv = to_i915(dev);
> - uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
> - uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
> -
> - if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) {
> - /*
> - * On BDW+, swizzling is not used. We leave the CPU memory
> - * controller in charge of optimizing memory accesses without
> - * the extra address manipulation GPU side.
> - *
> - * VLV and CHV don't have GPU swizzling.
> - */
> - swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> - swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> - } else if (INTEL_INFO(dev)->gen >= 6) {
> - if (dev_priv->preserve_bios_swizzle) {
> - if (I915_READ(DISP_ARB_CTL) &
> - DISP_TILE_SURFACE_SWIZZLING) {
> - swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> - swizzle_y = I915_BIT_6_SWIZZLE_9;
> - } else {
> - swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> - swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> - }
> - } else {
> - uint32_t dimm_c0, dimm_c1;
> - dimm_c0 = I915_READ(MAD_DIMM_C0);
> - dimm_c1 = I915_READ(MAD_DIMM_C1);
> - dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> - dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> - /* Enable swizzling when the channels are populated
> - * with identically sized dimms. We don't need to check
> - * the 3rd channel because no cpu with gpu attached
> - * ships in that configuration. Also, swizzling only
> - * makes sense for 2 channels anyway. */
> - if (dimm_c0 == dimm_c1) {
> - swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> - swizzle_y = I915_BIT_6_SWIZZLE_9;
> - } else {
> - swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> - swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> - }
> - }
> - } else if (IS_GEN5(dev_priv)) {
> - /* On Ironlake whatever DRAM config, GPU always do
> - * same swizzling setup.
> - */
> - swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> - swizzle_y = I915_BIT_6_SWIZZLE_9;
> - } else if (IS_GEN2(dev_priv)) {
> - /* As far as we know, the 865 doesn't have these bit 6
> - * swizzling issues.
> - */
> - swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> - swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> - } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) &&
> - !IS_G33(dev_priv))) {
> - uint32_t dcc;
> -
> - /* On 9xx chipsets, channel interleave by the CPU is
> - * determined by DCC. For single-channel, neither the CPU
> - * nor the GPU do swizzling. For dual channel interleaved,
> - * the GPU's interleave is bit 9 and 10 for X tiled, and bit
> - * 9 for Y tiled. The CPU's interleave is independent, and
> - * can be based on either bit 11 (haven't seen this yet) or
> - * bit 17 (common).
> - */
> - dcc = I915_READ(DCC);
> - switch (dcc & DCC_ADDRESSING_MODE_MASK) {
> - case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
> - case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
> - swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> - swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> - break;
> - case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
> - if (dcc & DCC_CHANNEL_XOR_DISABLE) {
> - /* This is the base swizzling by the GPU for
> - * tiled buffers.
> - */
> - swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> - swizzle_y = I915_BIT_6_SWIZZLE_9;
> - } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
> - /* Bit 11 swizzling by the CPU in addition. */
> - swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
> - swizzle_y = I915_BIT_6_SWIZZLE_9_11;
> - } else {
> - /* Bit 17 swizzling by the CPU in addition. */
> - swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
> - swizzle_y = I915_BIT_6_SWIZZLE_9_17;
> - }
> - break;
> - }
> -
> - /* check for L-shaped memory aka modified enhanced addressing */
> - if (IS_GEN4(dev_priv) &&
> - !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
> - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
> - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
> - }
> -
> - if (dcc == 0xffffffff) {
> - DRM_ERROR("Couldn't read from MCHBAR. "
> - "Disabling tiling.\n");
> - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
> - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
> - }
> - } else {
> - /* The 965, G33, and newer, have a very flexible memory
> - * configuration. It will enable dual-channel mode
> - * (interleaving) on as much memory as it can, and the GPU
> - * will additionally sometimes enable different bit 6
> - * swizzling for tiled objects from the CPU.
> - *
> - * Here's what I found on the G965:
> - * slot fill memory size swizzling
> - * 0A 0B 1A 1B 1-ch 2-ch
> - * 512 0 0 0 512 0 O
> - * 512 0 512 0 16 1008 X
> - * 512 0 0 512 16 1008 X
> - * 0 512 0 512 16 1008 X
> - * 1024 1024 1024 0 2048 1024 O
> - *
> - * We could probably detect this based on either the DRB
> - * matching, which was the case for the swizzling required in
> - * the table above, or from the 1-ch value being less than
> - * the minimum size of a rank.
> - *
> - * Reports indicate that the swizzling actually
> - * varies depending upon page placement inside the
> - * channels, i.e. we see swizzled pages where the
> - * banks of memory are paired and unswizzled on the
> - * uneven portion, so leave that as unknown.
> - */
> - if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
> - swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> - swizzle_y = I915_BIT_6_SWIZZLE_9;
> - }
> - }
> -
> - if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
> - swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
> - /* Userspace likes to explode if it sees unknown swizzling,
> - * so lie. We will finish the lie when reporting through
> - * the get-tiling-ioctl by reporting the physical swizzle
> - * mode as unknown instead.
> - *
> - * As we don't strictly know what the swizzling is, it may be
> - * bit17 dependent, and so we need to also prevent the pages
> - * from being moved.
> - */
> - dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
> - swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> - swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> - }
> -
> - dev_priv->mm.bit_6_swizzle_x = swizzle_x;
> - dev_priv->mm.bit_6_swizzle_y = swizzle_y;
> -}
> -
> -/*
> - * Swap every 64 bytes of this page around, to account for it having a new
> - * bit 17 of its physical address and therefore being interpreted differently
> - * by the GPU.
> - */
> -static void
> -i915_gem_swizzle_page(struct page *page)
> -{
> - char temp[64];
> - char *vaddr;
> - int i;
> -
> - vaddr = kmap(page);
> -
> - for (i = 0; i < PAGE_SIZE; i += 128) {
> - memcpy(temp, &vaddr[i], 64);
> - memcpy(&vaddr[i], &vaddr[i + 64], 64);
> - memcpy(&vaddr[i + 64], temp, 64);
> - }
> -
> - kunmap(page);
> -}
> -
> -/**
> - * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
> - * @obj: i915 GEM buffer object
> - * @pages: the scattergather list of physical pages
> - *
> - * This function fixes up the swizzling in case any page frame number for this
> - * object has changed in bit 17 since that state has been saved with
> - * i915_gem_object_save_bit_17_swizzle().
> - *
> - * This is called when pinning backing storage again, since the kernel is free
> - * to move unpinned backing storage around (either by directly moving pages or
> - * by swapping them out and back in again).
> - */
> -void
> -i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
> - struct sg_table *pages)
> -{
> - struct sgt_iter sgt_iter;
> - struct page *page;
> - int i;
> -
> - if (obj->bit_17 == NULL)
> - return;
> -
> - i = 0;
> - for_each_sgt_page(page, sgt_iter, pages) {
> - char new_bit_17 = page_to_phys(page) >> 17;
> - if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
> - i915_gem_swizzle_page(page);
> - set_page_dirty(page);
> - }
> - i++;
> - }
> -}
> -
> -/**
> - * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
> - * @obj: i915 GEM buffer object
> - * @pages: the scattergather list of physical pages
> - *
> - * This function saves the bit 17 of each page frame number so that swizzling
> - * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
> - * be called before the backing storage can be unpinned.
> - */
> -void
> -i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
> - struct sg_table *pages)
> -{
> - const unsigned int page_count = obj->base.size >> PAGE_SHIFT;
> - struct sgt_iter sgt_iter;
> - struct page *page;
> - int i;
> -
> - if (obj->bit_17 == NULL) {
> - obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
> - sizeof(long), GFP_KERNEL);
> - if (obj->bit_17 == NULL) {
> - DRM_ERROR("Failed to allocate memory for bit 17 "
> - "record\n");
> - return;
> - }
> - }
> -
> - i = 0;
> -
> - for_each_sgt_page(page, sgt_iter, pages) {
> - if (page_to_phys(page) & (1 << 17))
> - __set_bit(i, obj->bit_17);
> - else
> - __clear_bit(i, obj->bit_17);
> - i++;
> - }
> -}
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> new file mode 100644
> index 0000000..cd59dbc
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -0,0 +1,716 @@
> +/*
> + * Copyright © 2008-2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <drm/drmP.h>
> +#include <drm/i915_drm.h>
> +#include "i915_drv.h"
> +
> +/**
> + * DOC: fence register handling
> + *
> + * Important to avoid confusions: "fences" in the i915 driver are not execution
> + * fences used to track command completion but hardware detiler objects which
> + * wrap a given range of the global GTT. Each platform has only a fairly limited
> + * set of these objects.
> + *
> + * Fences are used to detile GTT memory mappings. They're also connected to the
> + * hardware frontbuffer render tracking and hence interact with frontbuffer
> + * compression. Furthermore on older platforms fences are required for tiled
> + * objects used by the display engine. They can also be used by the render
> + * engine - they're required for blitter commands and are optional for render
> + * commands. But on gen4+ both display (with the exception of fbc) and rendering
> + * have their own tiling state bits and don't need fences.
> + *
> + * Also note that fences only support X and Y tiling and hence can't be used for
> + * the fancier new tiling formats like W, Ys and Yf.
> + *
> + * Finally note that because fences are such a restricted resource they're
> + * dynamically associated with objects. Furthermore fence state is committed to
> + * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
> + * explicitly call i915_gem_object_get_fence() to synchronize fencing status
> + * for cpu access. Also note that some code wants an unfenced view, for those
> + * cases the fence can be removed forcefully with i915_gem_object_put_fence().
> + *
> + * Internally these functions will synchronize with userspace access by removing
> + * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
> + */
> +
> +#define pipelined 0
> +
> +static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
> + struct i915_vma *vma)
> +{
> + i915_reg_t fence_reg_lo, fence_reg_hi;
> + int fence_pitch_shift;
> + u64 val;
> +
> + if (INTEL_INFO(fence->i915)->gen >= 6) {
> + fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
> + fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
> + fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
> +
> + } else {
> + fence_reg_lo = FENCE_REG_965_LO(fence->id);
> + fence_reg_hi = FENCE_REG_965_HI(fence->id);
> + fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
> + }
> +
> + val = 0;
> + if (vma) {
> + unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
> + bool is_y_tiled = tiling == I915_TILING_Y;
> + unsigned int stride = i915_gem_object_get_stride(vma->obj);
> + u32 row_size = stride * (is_y_tiled ? 32 : 8);
> + u32 size = rounddown((u32)vma->node.size, row_size);
> +
> + val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
> + val |= vma->node.start & 0xfffff000;
> + val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
> + if (is_y_tiled)
> + val |= BIT(I965_FENCE_TILING_Y_SHIFT);
> + val |= I965_FENCE_REG_VALID;
> + }
> +
> + if (!pipelined) {
> + struct drm_i915_private *dev_priv = fence->i915;
> +
> + /* To w/a incoherency with non-atomic 64-bit register updates,
> + * we split the 64-bit update into two 32-bit writes. In order
> + * for a partial fence not to be evaluated between writes, we
> + * precede the update with write to turn off the fence register,
> + * and only enable the fence as the last step.
> + *
> + * For extra levels of paranoia, we make sure each step lands
> + * before applying the next step.
> + */
> + I915_WRITE(fence_reg_lo, 0);
> + POSTING_READ(fence_reg_lo);
> +
> + I915_WRITE(fence_reg_hi, upper_32_bits(val));
> + I915_WRITE(fence_reg_lo, lower_32_bits(val));
> + POSTING_READ(fence_reg_lo);
> + }
> +}
> +
> +static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
> + struct i915_vma *vma)
> +{
> + u32 val;
> +
> + val = 0;
> + if (vma) {
> + unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
> + bool is_y_tiled = tiling == I915_TILING_Y;
> + unsigned int stride = i915_gem_object_get_stride(vma->obj);
> + int pitch_val;
> + int tile_width;
> +
> + WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
> + !is_power_of_2(vma->node.size) ||
> + (vma->node.start & (vma->node.size - 1)),
> + "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n",
> + vma->node.start,
> + i915_vma_is_map_and_fenceable(vma),
> + vma->node.size);
> +
> + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
> + tile_width = 128;
> + else
> + tile_width = 512;
> +
> + /* Note: pitch better be a power of two tile widths */
> + pitch_val = stride / tile_width;
> + pitch_val = ffs(pitch_val) - 1;
> +
> + val = vma->node.start;
> + if (is_y_tiled)
> + val |= BIT(I830_FENCE_TILING_Y_SHIFT);
> + val |= I915_FENCE_SIZE_BITS(vma->node.size);
> + val |= pitch_val << I830_FENCE_PITCH_SHIFT;
> + val |= I830_FENCE_REG_VALID;
> + }
> +
> + if (!pipelined) {
> + struct drm_i915_private *dev_priv = fence->i915;
> + i915_reg_t reg = FENCE_REG(fence->id);
> +
> + I915_WRITE(reg, val);
> + POSTING_READ(reg);
> + }
> +}
> +
> +static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
> + struct i915_vma *vma)
> +{
> + u32 val;
> +
> + val = 0;
> + if (vma) {
> + unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
> + bool is_y_tiled = tiling == I915_TILING_Y;
> + unsigned int stride = i915_gem_object_get_stride(vma->obj);
> + u32 pitch_val;
> +
> + WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
> + !is_power_of_2(vma->node.size) ||
> + (vma->node.start & (vma->node.size - 1)),
> + "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n",
> + vma->node.start, vma->node.size);
> +
> + pitch_val = stride / 128;
> + pitch_val = ffs(pitch_val) - 1;
> +
> + val = vma->node.start;
> + if (is_y_tiled)
> + val |= BIT(I830_FENCE_TILING_Y_SHIFT);
> + val |= I830_FENCE_SIZE_BITS(vma->node.size);
> + val |= pitch_val << I830_FENCE_PITCH_SHIFT;
> + val |= I830_FENCE_REG_VALID;
> + }
> +
> + if (!pipelined) {
> + struct drm_i915_private *dev_priv = fence->i915;
> + i915_reg_t reg = FENCE_REG(fence->id);
> +
> + I915_WRITE(reg, val);
> + POSTING_READ(reg);
> + }
> +}
> +
> +static void fence_write(struct drm_i915_fence_reg *fence,
> + struct i915_vma *vma)
> +{
> + /* Previous access through the fence register is marshalled by
> + * the mb() inside the fault handlers (i915_gem_release_mmaps)
> + * and explicitly managed for internal users.
> + */
> +
> + if (IS_GEN2(fence->i915))
> + i830_write_fence_reg(fence, vma);
> + else if (IS_GEN3(fence->i915))
> + i915_write_fence_reg(fence, vma);
> + else
> + i965_write_fence_reg(fence, vma);
> +
> + /* Access through the fenced region afterwards is
> + * ordered by the posting reads whilst writing the registers.
> + */
> +
> + fence->dirty = false;
> +}
> +
> +static int fence_update(struct drm_i915_fence_reg *fence,
> + struct i915_vma *vma)
> +{
> + int ret;
> +
> + if (vma) {
> + if (!i915_vma_is_map_and_fenceable(vma))
> + return -EINVAL;
> +
> + if (WARN(!i915_gem_object_get_stride(vma->obj) ||
> + !i915_gem_object_get_tiling(vma->obj),
> + "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
> + i915_gem_object_get_stride(vma->obj),
> + i915_gem_object_get_tiling(vma->obj)))
> + return -EINVAL;
> +
> + ret = i915_gem_active_retire(&vma->last_fence,
> + &vma->obj->base.dev->struct_mutex);
> + if (ret)
> + return ret;
> + }
> +
> + if (fence->vma) {
> + ret = i915_gem_active_retire(&fence->vma->last_fence,
> + &fence->vma->obj->base.dev->struct_mutex);
> + if (ret)
> + return ret;
> + }
> +
> + if (fence->vma && fence->vma != vma) {
> + /* Ensure that all userspace CPU access is completed before
> + * stealing the fence.
> + */
> + i915_gem_release_mmap(fence->vma->obj);
> +
> + fence->vma->fence = NULL;
> + fence->vma = NULL;
> +
> + list_move(&fence->link, &fence->i915->mm.fence_list);
> + }
> +
> + fence_write(fence, vma);
> +
> + if (vma) {
> + if (fence->vma != vma) {
> + vma->fence = fence;
> + fence->vma = vma;
> + }
> +
> + list_move_tail(&fence->link, &fence->i915->mm.fence_list);
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * i915_vma_put_fence - force-remove fence for a VMA
> + * @vma: vma to map linearly (not through a fence reg)
> + *
> + * This function force-removes any fence from the given object, which is useful
> + * if the kernel wants to do untiled GTT access.
> + *
> + * Returns:
> + *
> + * 0 on success, negative error code on failure.
> + */
> +int
> +i915_vma_put_fence(struct i915_vma *vma)
> +{
> + struct drm_i915_fence_reg *fence = vma->fence;
> +
> + assert_rpm_wakelock_held(to_i915(vma->vm->dev));
> +
> + if (!fence)
> + return 0;
> +
> + if (fence->pin_count)
> + return -EBUSY;
> +
> + return fence_update(fence, NULL);
> +}
> +
> +static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
> +{
> + struct drm_i915_fence_reg *fence;
> +
> + list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
> + if (fence->pin_count)
> + continue;
> +
> + return fence;
> + }
> +
> + /* Wait for completion of pending flips which consume fences */
> + if (intel_has_pending_fb_unpin(&dev_priv->drm))
> + return ERR_PTR(-EAGAIN);
> +
> + return ERR_PTR(-EDEADLK);
> +}
> +
> +/**
> + * i915_vma_get_fence - set up fencing for a vma
> + * @vma: vma to map through a fence reg
> + *
> + * When mapping objects through the GTT, userspace wants to be able to write
> + * to them without having to worry about swizzling if the object is tiled.
> + * This function walks the fence regs looking for a free one for @obj,
> + * stealing one if it can't find any.
> + *
> + * It then sets up the reg based on the object's properties: address, pitch
> + * and tiling format.
> + *
> + * For an untiled surface, this removes any existing fence.
> + *
> + * Returns:
> + *
> + * 0 on success, negative error code on failure.
> + */
> +int
> +i915_vma_get_fence(struct i915_vma *vma)
> +{
> + struct drm_i915_fence_reg *fence;
> + struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
> +
> + /* Note that we revoke fences on runtime suspend. Therefore the user
> + * must keep the device awake whilst using the fence.
> + */
> + assert_rpm_wakelock_held(to_i915(vma->vm->dev));
> +
> + /* Just update our place in the LRU if our fence is getting reused. */
> + if (vma->fence) {
> + fence = vma->fence;
> + if (!fence->dirty) {
> + list_move_tail(&fence->link,
> + &fence->i915->mm.fence_list);
> + return 0;
> + }
> + } else if (set) {
> + fence = fence_find(to_i915(vma->vm->dev));
> + if (IS_ERR(fence))
> + return PTR_ERR(fence);
> + } else
> + return 0;
> +
> + return fence_update(fence, set);
> +}
> +
> +/**
> + * i915_gem_restore_fences - restore fence state
> + * @dev: DRM device
> + *
> + * Restore the hw fence state to match the software tracking again, to be called
> + * after a gpu reset and on resume. Note that on runtime suspend we only cancel
> + * the fences, to be reacquired by the user later.
> + */
> +void i915_gem_restore_fences(struct drm_device *dev)
> +{
> + struct drm_i915_private *dev_priv = to_i915(dev);
> + int i;
> +
> + for (i = 0; i < dev_priv->num_fence_regs; i++) {
> + struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
> + struct i915_vma *vma = reg->vma;
> +
> + /*
> + * Commit delayed tiling changes if we have an object still
> + * attached to the fence, otherwise just clear the fence.
> + */
> + if (vma && !i915_gem_object_is_tiled(vma->obj)) {
> + GEM_BUG_ON(!reg->dirty);
> + GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
> +
> + list_move(®->link, &dev_priv->mm.fence_list);
> + vma->fence = NULL;
> + vma = NULL;
> + }
> +
> + fence_write(reg, vma);
> + reg->vma = vma;
> + }
> +}
> +
> +/**
> + * DOC: tiling swizzling details
> + *
> + * The idea behind tiling is to increase cache hit rates by rearranging
> + * pixel data so that a group of pixel accesses are in the same cacheline.
> + * Performance improvement from doing this on the back/depth buffer are on
> + * the order of 30%.
> + *
> + * Intel architectures make this somewhat more complicated, though, by
> + * adjustments made to addressing of data when the memory is in interleaved
> + * mode (matched pairs of DIMMS) to improve memory bandwidth.
> + * For interleaved memory, the CPU sends every sequential 64 bytes
> + * to an alternate memory channel so it can get the bandwidth from both.
> + *
> + * The GPU also rearranges its accesses for increased bandwidth to interleaved
> + * memory, and it matches what the CPU does for non-tiled. However, when tiled
> + * it does it a little differently, since one walks addresses not just in the
> + * X direction but also Y. So, along with alternating channels when bit
> + * 6 of the address flips, it also alternates when other bits flip -- Bits 9
> + * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
> + * are common to both the 915 and 965-class hardware.
> + *
> + * The CPU also sometimes XORs in higher bits as well, to improve
> + * bandwidth doing strided access like we do so frequently in graphics. This
> + * is called "Channel XOR Randomization" in the MCH documentation. The result
> + * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
> + * decode.
> + *
> + * All of this bit 6 XORing has an effect on our memory management,
> + * as we need to make sure that the 3d driver can correctly address object
> + * contents.
> + *
> + * If we don't have interleaved memory, all tiling is safe and no swizzling is
> + * required.
> + *
> + * When bit 17 is XORed in, we simply refuse to tile at all. Bit
> + * 17 is not just a page offset, so as we page an object out and back in,
> + * individual pages in it will have different bit 17 addresses, resulting in
> + * each 64 bytes being swapped with its neighbor!
> + *
> + * Otherwise, if interleaved, we have to tell the 3d driver what the address
> + * swizzling it needs to do is, since it's writing with the CPU to the pages
> + * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
> + * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
> + * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
> + * to match what the GPU expects.
> + */
> +
> +/**
> + * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
> + * @dev: DRM device
> + *
> + * Detects bit 6 swizzling of address lookup between IGD access and CPU
> + * access through main memory.
> + */
> +void
> +i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
> +{
> + struct drm_i915_private *dev_priv = to_i915(dev);
> + uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
> + uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
> +
> + if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) {
> + /*
> + * On BDW+, swizzling is not used. We leave the CPU memory
> + * controller in charge of optimizing memory accesses without
> + * the extra address manipulation GPU side.
> + *
> + * VLV and CHV don't have GPU swizzling.
> + */
> + swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> + swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> + } else if (INTEL_INFO(dev)->gen >= 6) {
> + if (dev_priv->preserve_bios_swizzle) {
> + if (I915_READ(DISP_ARB_CTL) &
> + DISP_TILE_SURFACE_SWIZZLING) {
> + swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> + swizzle_y = I915_BIT_6_SWIZZLE_9;
> + } else {
> + swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> + swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> + }
> + } else {
> + uint32_t dimm_c0, dimm_c1;
> + dimm_c0 = I915_READ(MAD_DIMM_C0);
> + dimm_c1 = I915_READ(MAD_DIMM_C1);
> + dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> + dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> + /* Enable swizzling when the channels are populated
> + * with identically sized dimms. We don't need to check
> + * the 3rd channel because no cpu with gpu attached
> + * ships in that configuration. Also, swizzling only
> + * makes sense for 2 channels anyway. */
> + if (dimm_c0 == dimm_c1) {
> + swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> + swizzle_y = I915_BIT_6_SWIZZLE_9;
> + } else {
> + swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> + swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> + }
> + }
> + } else if (IS_GEN5(dev_priv)) {
> + /* On Ironlake whatever DRAM config, GPU always do
> + * same swizzling setup.
> + */
> + swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> + swizzle_y = I915_BIT_6_SWIZZLE_9;
> + } else if (IS_GEN2(dev_priv)) {
> + /* As far as we know, the 865 doesn't have these bit 6
> + * swizzling issues.
> + */
> + swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> + swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> + } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) &&
> + !IS_G33(dev_priv))) {
> + uint32_t dcc;
> +
> + /* On 9xx chipsets, channel interleave by the CPU is
> + * determined by DCC. For single-channel, neither the CPU
> + * nor the GPU do swizzling. For dual channel interleaved,
> + * the GPU's interleave is bit 9 and 10 for X tiled, and bit
> + * 9 for Y tiled. The CPU's interleave is independent, and
> + * can be based on either bit 11 (haven't seen this yet) or
> + * bit 17 (common).
> + */
> + dcc = I915_READ(DCC);
> + switch (dcc & DCC_ADDRESSING_MODE_MASK) {
> + case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
> + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
> + swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> + swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> + break;
> + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
> + if (dcc & DCC_CHANNEL_XOR_DISABLE) {
> + /* This is the base swizzling by the GPU for
> + * tiled buffers.
> + */
> + swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> + swizzle_y = I915_BIT_6_SWIZZLE_9;
> + } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
> + /* Bit 11 swizzling by the CPU in addition. */
> + swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
> + swizzle_y = I915_BIT_6_SWIZZLE_9_11;
> + } else {
> + /* Bit 17 swizzling by the CPU in addition. */
> + swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
> + swizzle_y = I915_BIT_6_SWIZZLE_9_17;
> + }
> + break;
> + }
> +
> + /* check for L-shaped memory aka modified enhanced addressing */
> + if (IS_GEN4(dev_priv) &&
> + !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
> + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
> + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
> + }
> +
> + if (dcc == 0xffffffff) {
> + DRM_ERROR("Couldn't read from MCHBAR. "
> + "Disabling tiling.\n");
> + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
> + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
> + }
> + } else {
> + /* The 965, G33, and newer, have a very flexible memory
> + * configuration. It will enable dual-channel mode
> + * (interleaving) on as much memory as it can, and the GPU
> + * will additionally sometimes enable different bit 6
> + * swizzling for tiled objects from the CPU.
> + *
> + * Here's what I found on the G965:
> + * slot fill memory size swizzling
> + * 0A 0B 1A 1B 1-ch 2-ch
> + * 512 0 0 0 512 0 O
> + * 512 0 512 0 16 1008 X
> + * 512 0 0 512 16 1008 X
> + * 0 512 0 512 16 1008 X
> + * 1024 1024 1024 0 2048 1024 O
> + *
> + * We could probably detect this based on either the DRB
> + * matching, which was the case for the swizzling required in
> + * the table above, or from the 1-ch value being less than
> + * the minimum size of a rank.
> + *
> + * Reports indicate that the swizzling actually
> + * varies depending upon page placement inside the
> + * channels, i.e. we see swizzled pages where the
> + * banks of memory are paired and unswizzled on the
> + * uneven portion, so leave that as unknown.
> + */
> + if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
> + swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> + swizzle_y = I915_BIT_6_SWIZZLE_9;
> + }
> + }
> +
> + if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
> + swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
> + /* Userspace likes to explode if it sees unknown swizzling,
> + * so lie. We will finish the lie when reporting through
> + * the get-tiling-ioctl by reporting the physical swizzle
> + * mode as unknown instead.
> + *
> + * As we don't strictly know what the swizzling is, it may be
> + * bit17 dependent, and so we need to also prevent the pages
> + * from being moved.
> + */
> + dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
> + swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> + swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> + }
> +
> + dev_priv->mm.bit_6_swizzle_x = swizzle_x;
> + dev_priv->mm.bit_6_swizzle_y = swizzle_y;
> +}
> +
> +/*
> + * Swap every 64 bytes of this page around, to account for it having a new
> + * bit 17 of its physical address and therefore being interpreted differently
> + * by the GPU.
> + */
> +static void
> +i915_gem_swizzle_page(struct page *page)
> +{
> + char temp[64];
> + char *vaddr;
> + int i;
> +
> + vaddr = kmap(page);
> +
> + for (i = 0; i < PAGE_SIZE; i += 128) {
> + memcpy(temp, &vaddr[i], 64);
> + memcpy(&vaddr[i], &vaddr[i + 64], 64);
> + memcpy(&vaddr[i + 64], temp, 64);
> + }
> +
> + kunmap(page);
> +}
> +
> +/**
> + * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
> + * @obj: i915 GEM buffer object
> + * @pages: the scattergather list of physical pages
> + *
> + * This function fixes up the swizzling in case any page frame number for this
> + * object has changed in bit 17 since that state has been saved with
> + * i915_gem_object_save_bit_17_swizzle().
> + *
> + * This is called when pinning backing storage again, since the kernel is free
> + * to move unpinned backing storage around (either by directly moving pages or
> + * by swapping them out and back in again).
> + */
> +void
> +i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
> + struct sg_table *pages)
> +{
> + struct sgt_iter sgt_iter;
> + struct page *page;
> + int i;
> +
> + if (obj->bit_17 == NULL)
> + return;
> +
> + i = 0;
> + for_each_sgt_page(page, sgt_iter, pages) {
> + char new_bit_17 = page_to_phys(page) >> 17;
> + if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
> + i915_gem_swizzle_page(page);
> + set_page_dirty(page);
> + }
> + i++;
> + }
> +}
> +
> +/**
> + * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
> + * @obj: i915 GEM buffer object
> + * @pages: the scattergather list of physical pages
> + *
> + * This function saves the bit 17 of each page frame number so that swizzling
> + * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
> + * be called before the backing storage can be unpinned.
> + */
> +void
> +i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
> + struct sg_table *pages)
> +{
> + const unsigned int page_count = obj->base.size >> PAGE_SHIFT;
> + struct sgt_iter sgt_iter;
> + struct page *page;
> + int i;
> +
> + if (obj->bit_17 == NULL) {
> + obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
> + sizeof(long), GFP_KERNEL);
> + if (obj->bit_17 == NULL) {
> + DRM_ERROR("Failed to allocate memory for bit 17 "
> + "record\n");
> + return;
> + }
> + }
> +
> + i = 0;
> +
> + for_each_sgt_page(page, sgt_iter, pages) {
> + if (page_to_phys(page) & (1 << 17))
> + __set_bit(i, obj->bit_17);
> + else
> + __clear_bit(i, obj->bit_17);
> + i++;
> + }
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> new file mode 100644
> index 0000000..22c4a2d
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> @@ -0,0 +1,51 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __I915_FENCE_REG_H__
> +#define __I915_FENCE_REG_H__
> +
> +#include <linux/list.h>
> +
> +struct drm_i915_private;
> +struct i915_vma;
> +
> +struct drm_i915_fence_reg {
> + struct list_head link;
> + struct drm_i915_private *i915;
> + struct i915_vma *vma;
> + int pin_count;
> + int id;
> + /**
> + * Whether the tiling parameters for the currently
> + * associated fence register have changed. Note that
> + * for the purposes of tracking tiling changes we also
> + * treat the unfenced register, the register slot that
> + * the object occupies whilst it executes a fenced
> + * command (such as BLT on gen2/3), as a "fence".
> + */
> + bool dirty;
> +};
> +
> +#endif
> +
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index a5fafa3..f60e5a7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -96,13 +96,6 @@
> *
> */
>
> -static inline struct i915_ggtt *
> -i915_vm_to_ggtt(struct i915_address_space *vm)
> -{
> - GEM_BUG_ON(!i915_is_ggtt(vm));
> - return container_of(vm, struct i915_ggtt, base);
> -}
> -
> static int
> i915_get_ggtt_vma_pages(struct i915_vma *vma);
>
> @@ -3348,176 +3341,6 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
> i915_ggtt_flush(dev_priv);
> }
>
> -static void
> -i915_vma_retire(struct i915_gem_active *active,
> - struct drm_i915_gem_request *rq)
> -{
> - const unsigned int idx = rq->engine->id;
> - struct i915_vma *vma =
> - container_of(active, struct i915_vma, last_read[idx]);
> - struct drm_i915_gem_object *obj = vma->obj;
> -
> - GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> -
> - i915_vma_clear_active(vma, idx);
> - if (i915_vma_is_active(vma))
> - return;
> -
> - list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
> - WARN_ON(i915_vma_unbind(vma));
> -
> - GEM_BUG_ON(!i915_gem_object_is_active(obj));
> - if (--obj->active_count)
> - return;
> -
> - /* Bump our place on the bound list to keep it roughly in LRU order
> - * so that we don't steal from recently used but inactive objects
> - * (unless we are forced to ofc!)
> - */
> - if (obj->bind_count)
> - list_move_tail(&obj->global_link, &rq->i915->mm.bound_list);
> -
> - obj->mm.dirty = true; /* be paranoid */
> -
> - if (i915_gem_object_has_active_reference(obj)) {
> - i915_gem_object_clear_active_reference(obj);
> - i915_gem_object_put(obj);
> - }
> -}
> -
> -static void
> -i915_ggtt_retire__write(struct i915_gem_active *active,
> - struct drm_i915_gem_request *request)
> -{
> - struct i915_vma *vma =
> - container_of(active, struct i915_vma, last_write);
> -
> - intel_fb_obj_flush(vma->obj, true, ORIGIN_CS);
> -}
> -
> -void i915_vma_destroy(struct i915_vma *vma)
> -{
> - GEM_BUG_ON(vma->node.allocated);
> - GEM_BUG_ON(i915_vma_is_active(vma));
> - GEM_BUG_ON(!i915_vma_is_closed(vma));
> - GEM_BUG_ON(vma->fence);
> -
> - list_del(&vma->vm_link);
> - if (!i915_vma_is_ggtt(vma))
> - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
> -
> - kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
> -}
> -
> -void i915_vma_close(struct i915_vma *vma)
> -{
> - GEM_BUG_ON(i915_vma_is_closed(vma));
> - vma->flags |= I915_VMA_CLOSED;
> -
> - list_del(&vma->obj_link);
> - rb_erase(&vma->obj_node, &vma->obj->vma_tree);
> -
> - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
> - WARN_ON(i915_vma_unbind(vma));
> -}
> -
> -static inline long vma_compare(struct i915_vma *vma,
> - struct i915_address_space *vm,
> - const struct i915_ggtt_view *view)
> -{
> - GEM_BUG_ON(view && !i915_is_ggtt(vm));
> -
> - if (vma->vm != vm)
> - return vma->vm - vm;
> -
> - if (!view)
> - return vma->ggtt_view.type;
> -
> - if (vma->ggtt_view.type != view->type)
> - return vma->ggtt_view.type - view->type;
> -
> - return memcmp(&vma->ggtt_view.params,
> - &view->params,
> - sizeof(view->params));
> -}
> -
> -static struct i915_vma *
> -__i915_vma_create(struct drm_i915_gem_object *obj,
> - struct i915_address_space *vm,
> - const struct i915_ggtt_view *view)
> -{
> - struct i915_vma *vma;
> - struct rb_node *rb, **p;
> - int i;
> -
> - GEM_BUG_ON(vm->closed);
> -
> - vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
> - if (vma == NULL)
> - return ERR_PTR(-ENOMEM);
> -
> - INIT_LIST_HEAD(&vma->exec_list);
> - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> - init_request_active(&vma->last_read[i], i915_vma_retire);
> - init_request_active(&vma->last_write,
> - i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL);
> - init_request_active(&vma->last_fence, NULL);
> - list_add(&vma->vm_link, &vm->unbound_list);
> - vma->vm = vm;
> - vma->obj = obj;
> - vma->size = obj->base.size;
> -
> - if (view) {
> - vma->ggtt_view = *view;
> - if (view->type == I915_GGTT_VIEW_PARTIAL) {
> - vma->size = view->params.partial.size;
> - vma->size <<= PAGE_SHIFT;
> - } else if (view->type == I915_GGTT_VIEW_ROTATED) {
> - vma->size =
> - intel_rotation_info_size(&view->params.rotated);
> - vma->size <<= PAGE_SHIFT;
> - }
> - }
> -
> - if (i915_is_ggtt(vm)) {
> - vma->flags |= I915_VMA_GGTT;
> - list_add(&vma->obj_link, &obj->vma_list);
> - } else {
> - i915_ppgtt_get(i915_vm_to_ppgtt(vm));
> - list_add_tail(&vma->obj_link, &obj->vma_list);
> - }
> -
> - rb = NULL;
> - p = &obj->vma_tree.rb_node;
> - while (*p) {
> - struct i915_vma *pos;
> -
> - rb = *p;
> - pos = rb_entry(rb, struct i915_vma, obj_node);
> - if (vma_compare(pos, vm, view) < 0)
> - p = &rb->rb_right;
> - else
> - p = &rb->rb_left;
> - }
> - rb_link_node(&vma->obj_node, rb, p);
> - rb_insert_color(&vma->obj_node, &obj->vma_tree);
> -
> - return vma;
> -}
> -
> -struct i915_vma *
> -i915_vma_create(struct drm_i915_gem_object *obj,
> - struct i915_address_space *vm,
> - const struct i915_ggtt_view *view)
> -{
> - lockdep_assert_held(&obj->base.dev->struct_mutex);
> - GEM_BUG_ON(view && !i915_is_ggtt(vm));
> - GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
> -
> - return __i915_vma_create(obj, vm, view);
> -}
> -
> struct i915_vma *
> i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> @@ -3530,7 +3353,7 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
> long cmp;
>
> - cmp = vma_compare(vma, vm, view);
> + cmp = i915_vma_compare(vma, vm, view);
> if (cmp == 0)
> return vma;
>
> @@ -3555,7 +3378,7 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
>
> vma = i915_gem_obj_to_vma(obj, vm, view);
> if (!vma) {
> - vma = __i915_vma_create(obj, vm, view);
> + vma = i915_vma_create(obj, vm, view);
> GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view));
> }
>
> @@ -3747,99 +3570,3 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
> return ret;
> }
>
> -/**
> - * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
> - * @vma: VMA to map
> - * @cache_level: mapping cache level
> - * @flags: flags like global or local mapping
> - *
> - * DMA addresses are taken from the scatter-gather table of this object (or of
> - * this VMA in case of non-default GGTT views) and PTE entries set up.
> - * Note that DMA addresses are also the only part of the SG table we care about.
> - */
> -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> - u32 flags)
> -{
> - u32 bind_flags;
> - u32 vma_flags;
> - int ret;
> -
> - if (WARN_ON(flags == 0))
> - return -EINVAL;
> -
> - bind_flags = 0;
> - if (flags & PIN_GLOBAL)
> - bind_flags |= I915_VMA_GLOBAL_BIND;
> - if (flags & PIN_USER)
> - bind_flags |= I915_VMA_LOCAL_BIND;
> -
> - vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
> - if (flags & PIN_UPDATE)
> - bind_flags |= vma_flags;
> - else
> - bind_flags &= ~vma_flags;
> - if (bind_flags == 0)
> - return 0;
> -
> - if (vma_flags == 0 && vma->vm->allocate_va_range) {
> - trace_i915_va_alloc(vma);
> - ret = vma->vm->allocate_va_range(vma->vm,
> - vma->node.start,
> - vma->node.size);
> - if (ret)
> - return ret;
> - }
> -
> - ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
> - if (ret)
> - return ret;
> -
> - vma->flags |= bind_flags;
> - return 0;
> -}
> -
> -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
> -{
> - void __iomem *ptr;
> -
> - /* Access through the GTT requires the device to be awake. */
> - assert_rpm_wakelock_held(to_i915(vma->vm->dev));
> -
> - lockdep_assert_held(&vma->vm->dev->struct_mutex);
> - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
> - return IO_ERR_PTR(-ENODEV);
> -
> - GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> - GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
> -
> - ptr = vma->iomap;
> - if (ptr == NULL) {
> - ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
> - vma->node.start,
> - vma->node.size);
> - if (ptr == NULL)
> - return IO_ERR_PTR(-ENOMEM);
> -
> - vma->iomap = ptr;
> - }
> -
> - __i915_vma_pin(vma);
> - return ptr;
> -}
> -
> -void i915_vma_unpin_and_release(struct i915_vma **p_vma)
> -{
> - struct i915_vma *vma;
> - struct drm_i915_gem_object *obj;
> -
> - vma = fetch_and_zero(p_vma);
> - if (!vma)
> - return;
> -
> - obj = vma->obj;
> -
> - i915_vma_unpin(vma);
> - i915_vma_close(vma);
> -
> - __i915_gem_object_release_unless_active(obj);
> -}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index c23ef9d..57b5849 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -35,7 +35,9 @@
> #define __I915_GEM_GTT_H__
>
> #include <linux/io-mapping.h>
> +#include <linux/mm.h>
>
> +#include "i915_gem_timeline.h"
> #include "i915_gem_request.h"
>
> #define I915_FENCE_REG_NONE -1
> @@ -138,6 +140,8 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
> #define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
> #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
>
> +struct sg_table;
> +
> enum i915_ggtt_view_type {
> I915_GGTT_VIEW_NORMAL = 0,
> I915_GGTT_VIEW_ROTATED,
> @@ -168,135 +172,7 @@ extern const struct i915_ggtt_view i915_ggtt_view_rotated;
>
> enum i915_cache_level;
>
> -/**
> - * A VMA represents a GEM BO that is bound into an address space. Therefore, a
> - * VMA's presence cannot be guaranteed before binding, or after unbinding the
> - * object into/from the address space.
> - *
> - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
> - * will always be <= an objects lifetime. So object refcounting should cover us.
> - */
> -struct i915_vma {
> - struct drm_mm_node node;
> - struct drm_i915_gem_object *obj;
> - struct i915_address_space *vm;
> - struct drm_i915_fence_reg *fence;
> - struct sg_table *pages;
> - void __iomem *iomap;
> - u64 size;
> - u64 display_alignment;
> -
> - unsigned int flags;
> - /**
> - * How many users have pinned this object in GTT space. The following
> - * users can each hold at most one reference: pwrite/pread, execbuffer
> - * (objects are not allowed multiple times for the same batchbuffer),
> - * and the framebuffer code. When switching/pageflipping, the
> - * framebuffer code has at most two buffers pinned per crtc.
> - *
> - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> - * bits with absolutely no headroom. So use 4 bits.
> - */
> -#define I915_VMA_PIN_MASK 0xf
> -#define I915_VMA_PIN_OVERFLOW BIT(5)
> -
> - /** Flags and address space this VMA is bound to */
> -#define I915_VMA_GLOBAL_BIND BIT(6)
> -#define I915_VMA_LOCAL_BIND BIT(7)
> -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
> -
> -#define I915_VMA_GGTT BIT(8)
> -#define I915_VMA_CAN_FENCE BIT(9)
> -#define I915_VMA_CLOSED BIT(10)
> -
> - unsigned int active;
> - struct i915_gem_active last_read[I915_NUM_ENGINES];
> - struct i915_gem_active last_write;
> - struct i915_gem_active last_fence;
> -
> - /**
> - * Support different GGTT views into the same object.
> - * This means there can be multiple VMA mappings per object and per VM.
> - * i915_ggtt_view_type is used to distinguish between those entries.
> - * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also
> - * assumed in GEM functions which take no ggtt view parameter.
> - */
> - struct i915_ggtt_view ggtt_view;
> -
> - /** This object's place on the active/inactive lists */
> - struct list_head vm_link;
> -
> - struct list_head obj_link; /* Link in the object's VMA list */
> - struct rb_node obj_node;
> -
> - /** This vma's place in the batchbuffer or on the eviction list */
> - struct list_head exec_list;
> -
> - /**
> - * Used for performing relocations during execbuffer insertion.
> - */
> - struct hlist_node exec_node;
> - unsigned long exec_handle;
> - struct drm_i915_gem_exec_object2 *exec_entry;
> -};
> -
> -struct i915_vma *
> -i915_vma_create(struct drm_i915_gem_object *obj,
> - struct i915_address_space *vm,
> - const struct i915_ggtt_view *view);
> -void i915_vma_unpin_and_release(struct i915_vma **p_vma);
> -
> -static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
> -{
> - return vma->flags & I915_VMA_GGTT;
> -}
> -
> -static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
> -{
> - return vma->flags & I915_VMA_CAN_FENCE;
> -}
> -
> -static inline bool i915_vma_is_closed(const struct i915_vma *vma)
> -{
> - return vma->flags & I915_VMA_CLOSED;
> -}
> -
> -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
> -{
> - return vma->active;
> -}
> -
> -static inline bool i915_vma_is_active(const struct i915_vma *vma)
> -{
> - return i915_vma_get_active(vma);
> -}
> -
> -static inline void i915_vma_set_active(struct i915_vma *vma,
> - unsigned int engine)
> -{
> - vma->active |= BIT(engine);
> -}
> -
> -static inline void i915_vma_clear_active(struct i915_vma *vma,
> - unsigned int engine)
> -{
> - vma->active &= ~BIT(engine);
> -}
> -
> -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
> - unsigned int engine)
> -{
> - return vma->active & BIT(engine);
> -}
> -
> -static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
> -{
> - GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> - GEM_BUG_ON(!vma->node.allocated);
> - GEM_BUG_ON(upper_32_bits(vma->node.start));
> - GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
> - return lower_32_bits(vma->node.start);
> -}
> +struct i915_vma;
>
> struct i915_page_dma {
> struct page *page;
> @@ -606,6 +482,13 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
> px_dma(ppgtt->base.scratch_pd);
> }
>
> +static inline struct i915_ggtt *
> +i915_vm_to_ggtt(struct i915_address_space *vm)
> +{
> + GEM_BUG_ON(!i915_is_ggtt(vm));
> + return container_of(vm, struct i915_ggtt, base);
> +}
> +
> int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
> int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
> int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
> @@ -653,88 +536,4 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
> #define PIN_OFFSET_FIXED BIT(11)
> #define PIN_OFFSET_MASK (~4095)
>
> -int __i915_vma_do_pin(struct i915_vma *vma,
> - u64 size, u64 alignment, u64 flags);
> -static inline int __must_check
> -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> -{
> - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
> - BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
> - BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
> -
> - /* Pin early to prevent the shrinker/eviction logic from destroying
> - * our vma as we insert and bind.
> - */
> - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0))
> - return 0;
> -
> - return __i915_vma_do_pin(vma, size, alignment, flags);
> -}
> -
> -static inline int i915_vma_pin_count(const struct i915_vma *vma)
> -{
> - return vma->flags & I915_VMA_PIN_MASK;
> -}
> -
> -static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
> -{
> - return i915_vma_pin_count(vma);
> -}
> -
> -static inline void __i915_vma_pin(struct i915_vma *vma)
> -{
> - vma->flags++;
> - GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
> -}
> -
> -static inline void __i915_vma_unpin(struct i915_vma *vma)
> -{
> - GEM_BUG_ON(!i915_vma_is_pinned(vma));
> - vma->flags--;
> -}
> -
> -static inline void i915_vma_unpin(struct i915_vma *vma)
> -{
> - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> - __i915_vma_unpin(vma);
> -}
> -
> -/**
> - * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
> - * @vma: VMA to iomap
> - *
> - * The passed in VMA has to be pinned in the global GTT mappable region.
> - * An extra pinning of the VMA is acquired for the return iomapping,
> - * the caller must call i915_vma_unpin_iomap to relinquish the pinning
> - * after the iomapping is no longer required.
> - *
> - * Callers must hold the struct_mutex.
> - *
> - * Returns a valid iomapped pointer or ERR_PTR.
> - */
> -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
> -#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
> -
> -/**
> - * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
> - * @vma: VMA to unpin
> - *
> - * Unpins the previously iomapped VMA from i915_vma_pin_iomap().
> - *
> - * Callers must hold the struct_mutex. This function is only valid to be
> - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
> - */
> -static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
> -{
> - lockdep_assert_held(&vma->vm->dev->struct_mutex);
> - GEM_BUG_ON(vma->iomap == NULL);
> - i915_vma_unpin(vma);
> -}
> -
> -static inline struct page *i915_vma_first_page(struct i915_vma *vma)
> -{
> - GEM_BUG_ON(!vma->pages);
> - return sg_page(vma->pages->sgl);
> -}
> -
> #endif
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> new file mode 100644
> index 0000000..014f803
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -0,0 +1,337 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __I915_GEM_OBJECT_H__
> +#define __I915_GEM_OBJECT_H__
> +
> +#include <linux/reservation.h>
> +
> +#include <drm/drm_vma_manager.h>
> +#include <drm/drm_gem.h>
> +#include <drm/drmP.h>
> +
> +#include <drm/i915_drm.h>
> +
> +struct drm_i915_gem_object_ops {
> + unsigned int flags;
> +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
> +#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2
> +
> + /* Interface between the GEM object and its backing storage.
> + * get_pages() is called once prior to the use of the associated set
> + * of pages before to binding them into the GTT, and put_pages() is
> + * called after we no longer need them. As we expect there to be
> + * associated cost with migrating pages between the backing storage
> + * and making them available for the GPU (e.g. clflush), we may hold
> + * onto the pages after they are no longer referenced by the GPU
> + * in case they may be used again shortly (for example migrating the
> + * pages to a different memory domain within the GTT). put_pages()
> + * will therefore most likely be called when the object itself is
> + * being released or under memory pressure (where we attempt to
> + * reap pages for the shrinker).
> + */
> + struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
> + void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
> +
> + int (*dmabuf_export)(struct drm_i915_gem_object *);
> + void (*release)(struct drm_i915_gem_object *);
> +};
> +
> +struct drm_i915_gem_object {
> + struct drm_gem_object base;
> +
> + const struct drm_i915_gem_object_ops *ops;
> +
> + /** List of VMAs backed by this object */
> + struct list_head vma_list;
> + struct rb_root vma_tree;
> +
> + /** Stolen memory for this object, instead of being backed by shmem. */
> + struct drm_mm_node *stolen;
> + struct list_head global_link;
> + union {
> + struct rcu_head rcu;
> + struct llist_node freed;
> + };
> +
> + /**
> + * Whether the object is currently in the GGTT mmap.
> + */
> + struct list_head userfault_link;
> +
> + /** Used in execbuf to temporarily hold a ref */
> + struct list_head obj_exec_link;
> +
> + struct list_head batch_pool_link;
> +
> + unsigned long flags;
> +
> + /**
> + * Have we taken a reference for the object for incomplete GPU
> + * activity?
> + */
> +#define I915_BO_ACTIVE_REF 0
> +
> + /*
> + * Is the object to be mapped as read-only to the GPU
> + * Only honoured if hardware has relevant pte bit
> + */
> + unsigned long gt_ro:1;
> + unsigned int cache_level:3;
> + unsigned int cache_dirty:1;
> +
> + atomic_t frontbuffer_bits;
> + unsigned int frontbuffer_ggtt_origin; /* write once */
> +
> + /** Current tiling stride for the object, if it's tiled. */
> + unsigned int tiling_and_stride;
> +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
> +#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
> +#define STRIDE_MASK (~TILING_MASK)
> +
> + /** Count of VMA actually bound by this object */
> + unsigned int bind_count;
> + unsigned int active_count;
> + unsigned int pin_display;
> +
> + struct {
> + struct mutex lock; /* protects the pages and their use */
> + atomic_t pages_pin_count;
> +
> + struct sg_table *pages;
> + void *mapping;
> +
> + struct i915_gem_object_page_iter {
> + struct scatterlist *sg_pos;
> + unsigned int sg_idx; /* in pages, but 32bit eek! */
> +
> + struct radix_tree_root radix;
> + struct mutex lock; /* protects this cache */
> + } get_page;
> +
> + /**
> + * Advice: are the backing pages purgeable?
> + */
> + unsigned int madv:2;
> +
> + /**
> + * This is set if the object has been written to since the
> + * pages were last acquired.
> + */
> + bool dirty:1;
> +
> + /**
> + * This is set if the object has been pinned due to unknown
> + * swizzling.
> + */
> + bool quirked:1;
> + } mm;
> +
> + /** Breadcrumb of last rendering to the buffer.
> + * There can only be one writer, but we allow for multiple readers.
> + * If there is a writer that necessarily implies that all other
> + * read requests are complete - but we may only be lazily clearing
> + * the read requests. A read request is naturally the most recent
> + * request on a ring, so we may have two different write and read
> + * requests on one ring where the write request is older than the
> + * read request. This allows for the CPU to read from an active
> + * buffer by only waiting for the write to complete.
> + */
> + struct reservation_object *resv;
> +
> + /** References from framebuffers, locks out tiling changes. */
> + unsigned long framebuffer_references;
> +
> + /** Record of address bit 17 of each page at last unbind. */
> + unsigned long *bit_17;
> +
> + struct i915_gem_userptr {
> + uintptr_t ptr;
> + unsigned read_only :1;
> +
> + struct i915_mm_struct *mm;
> + struct i915_mmu_object *mmu_object;
> + struct work_struct *work;
> + } userptr;
> +
> + /** for phys allocated objects */
> + struct drm_dma_handle *phys_handle;
> +
> + struct reservation_object __builtin_resv;
> +};
> +
> +static inline struct drm_i915_gem_object *
> +to_intel_bo(struct drm_gem_object *gem)
> +{
> + /* Assert that to_intel_bo(NULL) == NULL */
> + BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
> +
> + return container_of(gem, struct drm_i915_gem_object, base);
> +}
> +
> +/**
> + * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
> + * @filp: DRM file private date
> + * @handle: userspace handle
> + *
> + * Returns:
> + *
> + * A pointer to the object named by the handle if such exists on @filp, NULL
> + * otherwise. This object is only valid whilst under the RCU read lock, and
> + * note carefully the object may be in the process of being destroyed.
> + */
> +static inline struct drm_i915_gem_object *
> +i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
> +{
> +#ifdef CONFIG_LOCKDEP
> + WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map));
> +#endif
> + return idr_find(&file->object_idr, handle);
> +}
> +
> +static inline struct drm_i915_gem_object *
> +i915_gem_object_lookup(struct drm_file *file, u32 handle)
> +{
> + struct drm_i915_gem_object *obj;
> +
> + rcu_read_lock();
> + obj = i915_gem_object_lookup_rcu(file, handle);
> + if (obj && !kref_get_unless_zero(&obj->base.refcount))
> + obj = NULL;
> + rcu_read_unlock();
> +
> + return obj;
> +}
> +
> +__deprecated
> +extern struct drm_gem_object *
> +drm_gem_object_lookup(struct drm_file *file, u32 handle);
> +
> +__attribute__((nonnull))
> +static inline struct drm_i915_gem_object *
> +i915_gem_object_get(struct drm_i915_gem_object *obj)
> +{
> + drm_gem_object_reference(&obj->base);
> + return obj;
> +}
> +
> +__deprecated
> +extern void drm_gem_object_reference(struct drm_gem_object *);
> +
> +__attribute__((nonnull))
> +static inline void
> +i915_gem_object_put(struct drm_i915_gem_object *obj)
> +{
> + __drm_gem_object_unreference(&obj->base);
> +}
> +
> +__deprecated
> +extern void drm_gem_object_unreference(struct drm_gem_object *);
> +
> +__deprecated
> +extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
> +
> +static inline bool
> +i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
> +{
> + return atomic_read(&obj->base.refcount.refcount) == 0;
> +}
> +
> +static inline bool
> +i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
> +{
> + return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
> +}
> +
> +static inline bool
> +i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
> +{
> + return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE;
> +}
> +
> +static inline bool
> +i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
> +{
> + return obj->active_count;
> +}
> +
> +static inline bool
> +i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
> +{
> + return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
> +}
> +
> +static inline void
> +i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
> +{
> + lockdep_assert_held(&obj->base.dev->struct_mutex);
> + __set_bit(I915_BO_ACTIVE_REF, &obj->flags);
> +}
> +
> +static inline void
> +i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
> +{
> + lockdep_assert_held(&obj->base.dev->struct_mutex);
> + __clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
> +}
> +
> +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
> +
> +static inline unsigned int
> +i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
> +{
> + return obj->tiling_and_stride & TILING_MASK;
> +}
> +
> +static inline bool
> +i915_gem_object_is_tiled(struct drm_i915_gem_object *obj)
> +{
> + return i915_gem_object_get_tiling(obj) != I915_TILING_NONE;
> +}
> +
> +static inline unsigned int
> +i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
> +{
> + return obj->tiling_and_stride & STRIDE_MASK;
> +}
> +
> +static inline struct intel_engine_cs *
> +i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
> +{
> + struct intel_engine_cs *engine = NULL;
> + struct dma_fence *fence;
> +
> + rcu_read_lock();
> + fence = reservation_object_get_excl_rcu(obj->resv);
> + rcu_read_unlock();
> +
> + if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
> + engine = to_request(fence)->engine;
> + dma_fence_put(fence);
> +
> + return engine;
> +}
> +
> +#endif
> +
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 0f69fad..a56559e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -30,6 +30,9 @@
> #include "i915_gem.h"
> #include "i915_sw_fence.h"
>
> +struct drm_file;
> +struct drm_i915_gem_object;
> +
> struct intel_wait {
> struct rb_node node;
> struct task_struct *tsk;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> new file mode 100644
> index 0000000..738ff3a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -0,0 +1,650 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "i915_vma.h"
> +
> +#include "i915_drv.h"
> +#include "intel_ringbuffer.h"
> +#include "intel_frontbuffer.h"
> +
> +#include <drm/drm_gem.h>
> +
> +static void
> +i915_vma_retire(struct i915_gem_active *active,
> + struct drm_i915_gem_request *rq)
> +{
> + const unsigned int idx = rq->engine->id;
> + struct i915_vma *vma =
> + container_of(active, struct i915_vma, last_read[idx]);
> + struct drm_i915_gem_object *obj = vma->obj;
> +
> + GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> +
> + i915_vma_clear_active(vma, idx);
> + if (i915_vma_is_active(vma))
> + return;
> +
> + list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> + if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
> + WARN_ON(i915_vma_unbind(vma));
> +
> + GEM_BUG_ON(!i915_gem_object_is_active(obj));
> + if (--obj->active_count)
> + return;
> +
> + /* Bump our place on the bound list to keep it roughly in LRU order
> + * so that we don't steal from recently used but inactive objects
> + * (unless we are forced to ofc!)
> + */
> + if (obj->bind_count)
> + list_move_tail(&obj->global_link, &rq->i915->mm.bound_list);
> +
> + obj->mm.dirty = true; /* be paranoid */
> +
> + if (i915_gem_object_has_active_reference(obj)) {
> + i915_gem_object_clear_active_reference(obj);
> + i915_gem_object_put(obj);
> + }
> +}
> +
> +static void
> +i915_ggtt_retire__write(struct i915_gem_active *active,
> + struct drm_i915_gem_request *request)
> +{
> + struct i915_vma *vma =
> + container_of(active, struct i915_vma, last_write);
> +
> + intel_fb_obj_flush(vma->obj, true, ORIGIN_CS);
> +}
Doesn't fit or work well in i915_gem_request.c ?
> +
> +static struct i915_vma *
> +__i915_vma_create(struct drm_i915_gem_object *obj,
> + struct i915_address_space *vm,
> + const struct i915_ggtt_view *view)
> +{
> + struct i915_vma *vma;
> + struct rb_node *rb, **p;
> + int i;
> +
> + GEM_BUG_ON(vm->closed);
> +
> + vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
> + if (vma == NULL)
> + return ERR_PTR(-ENOMEM);
> +
> + INIT_LIST_HEAD(&vma->exec_list);
> + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> + init_request_active(&vma->last_read[i], i915_vma_retire);
> + init_request_active(&vma->last_write,
> + i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL);
> + init_request_active(&vma->last_fence, NULL);
> + list_add(&vma->vm_link, &vm->unbound_list);
> + vma->vm = vm;
> + vma->obj = obj;
> + vma->size = obj->base.size;
> +
> + if (view) {
> + vma->ggtt_view = *view;
> + if (view->type == I915_GGTT_VIEW_PARTIAL) {
> + vma->size = view->params.partial.size;
> + vma->size <<= PAGE_SHIFT;
> + } else if (view->type == I915_GGTT_VIEW_ROTATED) {
> + vma->size =
> + intel_rotation_info_size(&view->params.rotated);
> + vma->size <<= PAGE_SHIFT;
> + }
> + }
> +
> + if (i915_is_ggtt(vm)) {
> + vma->flags |= I915_VMA_GGTT;
> + list_add(&vma->obj_link, &obj->vma_list);
> + } else {
> + i915_ppgtt_get(i915_vm_to_ppgtt(vm));
> + list_add_tail(&vma->obj_link, &obj->vma_list);
> + }
> +
> + rb = NULL;
> + p = &obj->vma_tree.rb_node;
> + while (*p) {
> + struct i915_vma *pos;
> +
> + rb = *p;
> + pos = rb_entry(rb, struct i915_vma, obj_node);
> + if (i915_vma_compare(pos, vm, view) < 0)
> + p = &rb->rb_right;
> + else
> + p = &rb->rb_left;
> + }
> + rb_link_node(&vma->obj_node, rb, p);
> + rb_insert_color(&vma->obj_node, &obj->vma_tree);
> +
> + return vma;
> +}
> +
> +struct i915_vma *
> +i915_vma_create(struct drm_i915_gem_object *obj,
> + struct i915_address_space *vm,
> + const struct i915_ggtt_view *view)
> +{
> + lockdep_assert_held(&obj->base.dev->struct_mutex);
> + GEM_BUG_ON(view && !i915_is_ggtt(vm));
> + GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
> +
> + return __i915_vma_create(obj, vm, view);
> +}
> +
> +/**
> + * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
> + * @vma: VMA to map
> + * @cache_level: mapping cache level
> + * @flags: flags like global or local mapping
> + *
> + * DMA addresses are taken from the scatter-gather table of this object (or of
> + * this VMA in case of non-default GGTT views) and PTE entries set up.
> + * Note that DMA addresses are also the only part of the SG table we care about.
> + */
> +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> + u32 flags)
> +{
> + u32 bind_flags;
> + u32 vma_flags;
> + int ret;
> +
> + if (WARN_ON(flags == 0))
> + return -EINVAL;
> +
> + bind_flags = 0;
> + if (flags & PIN_GLOBAL)
> + bind_flags |= I915_VMA_GLOBAL_BIND;
> + if (flags & PIN_USER)
> + bind_flags |= I915_VMA_LOCAL_BIND;
> +
> + vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
> + if (flags & PIN_UPDATE)
> + bind_flags |= vma_flags;
> + else
> + bind_flags &= ~vma_flags;
> + if (bind_flags == 0)
> + return 0;
> +
> + if (vma_flags == 0 && vma->vm->allocate_va_range) {
> + trace_i915_va_alloc(vma);
> + ret = vma->vm->allocate_va_range(vma->vm,
> + vma->node.start,
> + vma->node.size);
> + if (ret)
> + return ret;
> + }
> +
> + ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
> + if (ret)
> + return ret;
> +
> + vma->flags |= bind_flags;
> + return 0;
> +}
> +
> +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
> +{
> + void __iomem *ptr;
> +
> + /* Access through the GTT requires the device to be awake. */
> + assert_rpm_wakelock_held(to_i915(vma->vm->dev));
> +
> + lockdep_assert_held(&vma->vm->dev->struct_mutex);
> + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
> + return IO_ERR_PTR(-ENODEV);
> +
> + GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> + GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
> +
> + ptr = vma->iomap;
> + if (ptr == NULL) {
> + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
> + vma->node.start,
> + vma->node.size);
> + if (ptr == NULL)
> + return IO_ERR_PTR(-ENOMEM);
> +
> + vma->iomap = ptr;
> + }
> +
> + __i915_vma_pin(vma);
> + return ptr;
> +}
> +
> +void i915_vma_unpin_and_release(struct i915_vma **p_vma)
> +{
> + struct i915_vma *vma;
> + struct drm_i915_gem_object *obj;
> +
> + vma = fetch_and_zero(p_vma);
> + if (!vma)
> + return;
> +
> + obj = vma->obj;
> +
> + i915_vma_unpin(vma);
> + i915_vma_close(vma);
> +
> + __i915_gem_object_release_unless_active(obj);
> +}
> +
> +bool
> +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> +{
> + if (!drm_mm_node_allocated(&vma->node))
> + return false;
> +
> + if (vma->node.size < size)
> + return true;
> +
> + if (alignment && vma->node.start & (alignment - 1))
> + return true;
> +
> + if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
> + return true;
> +
> + if (flags & PIN_OFFSET_BIAS &&
> + vma->node.start < (flags & PIN_OFFSET_MASK))
> + return true;
> +
> + if (flags & PIN_OFFSET_FIXED &&
> + vma->node.start != (flags & PIN_OFFSET_MASK))
> + return true;
> +
> + return false;
> +}
> +
> +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
> +{
> + struct drm_i915_gem_object *obj = vma->obj;
> + struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> + bool mappable, fenceable;
> + u32 fence_size, fence_alignment;
> +
> + fence_size = i915_gem_get_ggtt_size(dev_priv,
> + vma->size,
> + i915_gem_object_get_tiling(obj));
> + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
> + vma->size,
> + i915_gem_object_get_tiling(obj),
> + true);
> +
> + fenceable = (vma->node.size == fence_size &&
> + (vma->node.start & (fence_alignment - 1)) == 0);
> +
> + mappable = (vma->node.start + fence_size <=
> + dev_priv->ggtt.mappable_end);
> +
> + /*
> + * Explicitly disable for rotated VMA since the display does not
> + * need the fence and the VMA is not accessible to other users.
> + */
> + if (mappable && fenceable &&
> + vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED)
> + vma->flags |= I915_VMA_CAN_FENCE;
> + else
> + vma->flags &= ~I915_VMA_CAN_FENCE;
> +}
> +
> +bool i915_gem_valid_gtt_space(struct i915_vma *vma,
> + unsigned long cache_level)
> +{
> + struct drm_mm_node *gtt_space = &vma->node;
> + struct drm_mm_node *other;
> +
> + /*
> + * On some machines we have to be careful when putting differing types
> + * of snoopable memory together to avoid the prefetcher crossing memory
> + * domains and dying. During vm initialisation, we decide whether or not
> + * these constraints apply and set the drm_mm.color_adjust
> + * appropriately.
> + */
> + if (vma->vm->mm.color_adjust == NULL)
> + return true;
> +
> + if (!drm_mm_node_allocated(gtt_space))
> + return true;
> +
> + if (list_empty(>t_space->node_list))
> + return true;
> +
> + other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
> + if (other->allocated && !other->hole_follows && other->color != cache_level)
> + return false;
> +
> + other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
> + if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
> + return false;
> +
> + return true;
> +}
> +
> +/**
> + * i915_vma_insert - finds a slot for the vma in its address space
> + * @vma: the vma
> + * @size: requested size in bytes (can be larger than the VMA)
> + * @alignment: required alignment
> + * @flags: mask of PIN_* flags to use
> + *
> + * First we try to allocate some free space that meets the requirements for
> + * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
> + * preferrably the oldest idle entry to make room for the new VMA.
> + *
> + * Returns:
> + * 0 on success, negative error code otherwise.
> + */
> +static int
> +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> +{
> + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
> + struct drm_i915_gem_object *obj = vma->obj;
> + u64 start, end;
> + int ret;
> +
> + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
> + GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
> +
> + size = max(size, vma->size);
> + if (flags & PIN_MAPPABLE)
> + size = i915_gem_get_ggtt_size(dev_priv, size,
> + i915_gem_object_get_tiling(obj));
> +
> + alignment = max(max(alignment, vma->display_alignment),
> + i915_gem_get_ggtt_alignment(dev_priv, size,
> + i915_gem_object_get_tiling(obj),
> + flags & PIN_MAPPABLE));
> +
> + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> +
> + end = vma->vm->total;
> + if (flags & PIN_MAPPABLE)
> + end = min_t(u64, end, dev_priv->ggtt.mappable_end);
> + if (flags & PIN_ZONE_4G)
> + end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
> +
> + /* If binding the object/GGTT view requires more space than the entire
> + * aperture has, reject it early before evicting everything in a vain
> + * attempt to find space.
> + */
> + if (size > end) {
> + DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
> + size, obj->base.size,
> + flags & PIN_MAPPABLE ? "mappable" : "total",
> + end);
> + return -E2BIG;
> + }
> +
> + ret = i915_gem_object_pin_pages(obj);
> + if (ret)
> + return ret;
> +
> + if (flags & PIN_OFFSET_FIXED) {
> + u64 offset = flags & PIN_OFFSET_MASK;
> + if (offset & (alignment - 1) || offset > end - size) {
> + ret = -EINVAL;
> + goto err_unpin;
> + }
> +
> + vma->node.start = offset;
> + vma->node.size = size;
> + vma->node.color = obj->cache_level;
> + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
> + if (ret) {
> + ret = i915_gem_evict_for_vma(vma);
> + if (ret == 0)
> + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
> + if (ret)
> + goto err_unpin;
> + }
> + } else {
> + u32 search_flag, alloc_flag;
> +
> + if (flags & PIN_HIGH) {
> + search_flag = DRM_MM_SEARCH_BELOW;
> + alloc_flag = DRM_MM_CREATE_TOP;
> + } else {
> + search_flag = DRM_MM_SEARCH_DEFAULT;
> + alloc_flag = DRM_MM_CREATE_DEFAULT;
> + }
> +
> + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
> + * so we know that we always have a minimum alignment of 4096.
> + * The drm_mm range manager is optimised to return results
> + * with zero alignment, so where possible use the optimal
> + * path.
> + */
> + if (alignment <= 4096)
> + alignment = 0;
> +
> +search_free:
> + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
> + &vma->node,
> + size, alignment,
> + obj->cache_level,
> + start, end,
> + search_flag,
> + alloc_flag);
> + if (ret) {
> + ret = i915_gem_evict_something(vma->vm, size, alignment,
> + obj->cache_level,
> + start, end,
> + flags);
> + if (ret == 0)
> + goto search_free;
> +
> + goto err_unpin;
> + }
> +
> + GEM_BUG_ON(vma->node.start < start);
> + GEM_BUG_ON(vma->node.start + vma->node.size > end);
> + }
> + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
> +
> + list_move_tail(&obj->global_link, &dev_priv->mm.bound_list);
> + list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> + obj->bind_count++;
> + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
> +
> + return 0;
> +
> +err_unpin:
> + i915_gem_object_unpin_pages(obj);
> + return ret;
> +}
> +
> +int __i915_vma_do_pin(struct i915_vma *vma,
> + u64 size, u64 alignment, u64 flags)
> +{
> + unsigned int bound = vma->flags;
> + int ret;
> +
> + lockdep_assert_held(&vma->vm->dev->struct_mutex);
> + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
> + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
> +
> + if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
> + ret = -EBUSY;
> + goto err;
> + }
> +
> + if ((bound & I915_VMA_BIND_MASK) == 0) {
> + ret = i915_vma_insert(vma, size, alignment, flags);
> + if (ret)
> + goto err;
> + }
> +
> + ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
> + if (ret)
> + goto err;
> +
> + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
> + __i915_vma_set_map_and_fenceable(vma);
> +
> + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
> + return 0;
> +
> +err:
> + __i915_vma_unpin(vma);
> + return ret;
> +}
> +
> +void i915_vma_destroy(struct i915_vma *vma)
> +{
> + GEM_BUG_ON(vma->node.allocated);
> + GEM_BUG_ON(i915_vma_is_active(vma));
> + GEM_BUG_ON(!i915_vma_is_closed(vma));
> + GEM_BUG_ON(vma->fence);
> +
> + list_del(&vma->vm_link);
> + if (!i915_vma_is_ggtt(vma))
> + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
> +
> + kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
> +}
> +
> +void i915_vma_close(struct i915_vma *vma)
> +{
> + GEM_BUG_ON(i915_vma_is_closed(vma));
> + vma->flags |= I915_VMA_CLOSED;
> +
> + list_del(&vma->obj_link);
> + rb_erase(&vma->obj_node, &vma->obj->vma_tree);
> +
> + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
> + WARN_ON(i915_vma_unbind(vma));
> +}
> +
> +static void __i915_vma_iounmap(struct i915_vma *vma)
> +{
> + GEM_BUG_ON(i915_vma_is_pinned(vma));
> +
> + if (vma->iomap == NULL)
> + return;
> +
> + io_mapping_unmap(vma->iomap);
> + vma->iomap = NULL;
> +}
> +
> +int i915_vma_unbind(struct i915_vma *vma)
> +{
> + struct drm_i915_gem_object *obj = vma->obj;
> + unsigned long active;
> + int ret;
> +
> + lockdep_assert_held(&obj->base.dev->struct_mutex);
> +
> + /* First wait upon any activity as retiring the request may
> + * have side-effects such as unpinning or even unbinding this vma.
> + */
> + active = i915_vma_get_active(vma);
> + if (active) {
> + int idx;
> +
> + /* When a closed VMA is retired, it is unbound - eek.
> + * In order to prevent it from being recursively closed,
> + * take a pin on the vma so that the second unbind is
> + * aborted.
> + *
> + * Even more scary is that the retire callback may free
> + * the object (last active vma). To prevent the explosion
> + * we defer the actual object free to a worker that can
> + * only proceed once it acquires the struct_mutex (which
> + * we currently hold, therefore it cannot free this object
> + * before we are finished).
> + */
> + __i915_vma_pin(vma);
> +
> + for_each_active(active, idx) {
> + ret = i915_gem_active_retire(&vma->last_read[idx],
> + &vma->vm->dev->struct_mutex);
> + if (ret)
> + break;
> + }
> +
> + __i915_vma_unpin(vma);
> + if (ret)
> + return ret;
> +
> + GEM_BUG_ON(i915_vma_is_active(vma));
> + }
> +
> + if (i915_vma_is_pinned(vma))
> + return -EBUSY;
> +
> + if (!drm_mm_node_allocated(&vma->node))
> + goto destroy;
> +
> + GEM_BUG_ON(obj->bind_count == 0);
> + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> +
> + if (i915_vma_is_map_and_fenceable(vma)) {
> + /* release the fence reg _after_ flushing */
> + ret = i915_vma_put_fence(vma);
> + if (ret)
> + return ret;
> +
> + /* Force a pagefault for domain tracking on next user access */
> + i915_gem_release_mmap(obj);
> +
> + __i915_vma_iounmap(vma);
> + vma->flags &= ~I915_VMA_CAN_FENCE;
> + }
> +
> + if (likely(!vma->vm->closed)) {
> + trace_i915_vma_unbind(vma);
> + vma->vm->unbind_vma(vma);
> + }
> + vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
> +
> + drm_mm_remove_node(&vma->node);
> + list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
> +
> + if (vma->pages != obj->mm.pages) {
> + GEM_BUG_ON(!vma->pages);
> + sg_free_table(vma->pages);
> + kfree(vma->pages);
> + }
> + vma->pages = NULL;
> +
> + /* Since the unbound list is global, only move to that list if
> + * no more VMAs exist. */
> + if (--obj->bind_count == 0)
> + list_move_tail(&obj->global_link,
> + &to_i915(obj->base.dev)->mm.unbound_list);
> +
> + /* And finally now the object is completely decoupled from this vma,
> + * we can drop its hold on the backing storage and allow it to be
> + * reaped by the shrinker.
> + */
> + i915_gem_object_unpin_pages(obj);
> +
> +destroy:
> + if (unlikely(i915_vma_is_closed(vma)))
> + i915_vma_destroy(vma);
> +
> + return 0;
> +}
> +
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> new file mode 100644
> index 0000000..d358b30
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -0,0 +1,342 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __I915_VMA_H__
> +#define __I915_VMA_H__
> +
> +#include <linux/io-mapping.h>
> +
> +#include <drm/drm_mm.h>
> +
> +#include "i915_gem_gtt.h"
> +#include "i915_gem_fence_reg.h"
> +#include "i915_gem_object.h"
> +#include "i915_gem_request.h"
> +
> +
> +enum i915_cache_level;
> +
> +/**
> + * A VMA represents a GEM BO that is bound into an address space. Therefore, a
> + * VMA's presence cannot be guaranteed before binding, or after unbinding the
> + * object into/from the address space.
> + *
> + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
> + * will always be <= an objects lifetime. So object refcounting should cover us.
> + */
> +struct i915_vma {
> + struct drm_mm_node node;
> + struct drm_i915_gem_object *obj;
> + struct i915_address_space *vm;
> + struct drm_i915_fence_reg *fence;
> + struct sg_table *pages;
> + void __iomem *iomap;
> + u64 size;
> + u64 display_alignment;
> +
> + unsigned int flags;
> + /**
> + * How many users have pinned this object in GTT space. The following
> + * users can each hold at most one reference: pwrite/pread, execbuffer
> + * (objects are not allowed multiple times for the same batchbuffer),
> + * and the framebuffer code. When switching/pageflipping, the
> + * framebuffer code has at most two buffers pinned per crtc.
> + *
> + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> + * bits with absolutely no headroom. So use 4 bits.
> + */
> +#define I915_VMA_PIN_MASK 0xf
> +#define I915_VMA_PIN_OVERFLOW BIT(5)
> +
> + /** Flags and address space this VMA is bound to */
> +#define I915_VMA_GLOBAL_BIND BIT(6)
> +#define I915_VMA_LOCAL_BIND BIT(7)
> +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
> +
> +#define I915_VMA_GGTT BIT(8)
> +#define I915_VMA_CAN_FENCE BIT(9)
> +#define I915_VMA_CLOSED BIT(10)
> +
> + unsigned int active;
> + struct i915_gem_active last_read[I915_NUM_ENGINES];
> + struct i915_gem_active last_write;
> + struct i915_gem_active last_fence;
> +
> + /**
> + * Support different GGTT views into the same object.
> + * This means there can be multiple VMA mappings per object and per VM.
> + * i915_ggtt_view_type is used to distinguish between those entries.
> + * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also
> + * assumed in GEM functions which take no ggtt view parameter.
> + */
> + struct i915_ggtt_view ggtt_view;
> +
> + /** This object's place on the active/inactive lists */
> + struct list_head vm_link;
> +
> + struct list_head obj_link; /* Link in the object's VMA list */
> + struct rb_node obj_node;
> +
> + /** This vma's place in the batchbuffer or on the eviction list */
> + struct list_head exec_list;
> +
> + /**
> + * Used for performing relocations during execbuffer insertion.
> + */
> + struct hlist_node exec_node;
> + unsigned long exec_handle;
> + struct drm_i915_gem_exec_object2 *exec_entry;
> +};
> +
> +struct i915_vma *
> +i915_vma_create(struct drm_i915_gem_object *obj,
> + struct i915_address_space *vm,
> + const struct i915_ggtt_view *view);
> +
> +static inline long
> +i915_vma_compare(struct i915_vma *vma,
> + struct i915_address_space *vm,
> + const struct i915_ggtt_view *view)
> +{
> + GEM_BUG_ON(view && !i915_vma_is_ggtt(vma));
> +
> + if (vma->vm != vm)
> + return vma->vm - vm;
> +
> + if (!view)
> + return vma->ggtt_view.type;
> +
> + if (vma->ggtt_view.type != view->type)
> + return vma->ggtt_view.type - view->type;
> +
> + return memcmp(&vma->ggtt_view.params,
> + &view->params,
> + sizeof(view->params));
> +}
> +
> +void i915_vma_unpin_and_release(struct i915_vma **p_vma);
> +
> +static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
> +{
> + return vma->flags & I915_VMA_GGTT;
> +}
> +
> +static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
> +{
> + return vma->flags & I915_VMA_CAN_FENCE;
> +}
> +
> +static inline bool i915_vma_is_closed(const struct i915_vma *vma)
> +{
> + return vma->flags & I915_VMA_CLOSED;
> +}
> +
> +static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
> +{
> + return vma->active;
> +}
> +
> +static inline bool i915_vma_is_active(const struct i915_vma *vma)
> +{
> + return i915_vma_get_active(vma);
> +}
> +
> +static inline void i915_vma_set_active(struct i915_vma *vma,
> + unsigned int engine)
> +{
> + vma->active |= BIT(engine);
> +}
> +
> +static inline void i915_vma_clear_active(struct i915_vma *vma,
> + unsigned int engine)
> +{
> + vma->active &= ~BIT(engine);
> +}
> +
> +static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
> + unsigned int engine)
> +{
> + return vma->active & BIT(engine);
> +}
> +
> +static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
> +{
> + GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> + GEM_BUG_ON(!vma->node.allocated);
> + GEM_BUG_ON(upper_32_bits(vma->node.start));
> + GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
> + return lower_32_bits(vma->node.start);
> +}
> +
> +static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
> +{
> + i915_gem_object_get(vma->obj);
> + return vma;
> +}
> +
> +static inline void i915_vma_put(struct i915_vma *vma)
> +{
> + i915_gem_object_put(vma->obj);
> +}
> +
> +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> + u32 flags);
> +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level);
> +bool
> +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
> +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
> +int __must_check i915_vma_unbind(struct i915_vma *vma);
> +void i915_vma_close(struct i915_vma *vma);
> +void i915_vma_destroy(struct i915_vma *vma);
> +
> +int __i915_vma_do_pin(struct i915_vma *vma,
> + u64 size, u64 alignment, u64 flags);
> +static inline int __must_check
> +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> +{
> + BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
> + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
> + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
> +
> + /* Pin early to prevent the shrinker/eviction logic from destroying
> + * our vma as we insert and bind.
> + */
> + if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0))
> + return 0;
> +
> + return __i915_vma_do_pin(vma, size, alignment, flags);
> +}
> +
> +static inline int i915_vma_pin_count(const struct i915_vma *vma)
> +{
> + return vma->flags & I915_VMA_PIN_MASK;
> +}
> +
> +static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
> +{
> + return i915_vma_pin_count(vma);
> +}
> +
> +static inline void __i915_vma_pin(struct i915_vma *vma)
> +{
> + vma->flags++;
> + GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
> +}
> +
> +static inline void __i915_vma_unpin(struct i915_vma *vma)
> +{
> + GEM_BUG_ON(!i915_vma_is_pinned(vma));
> + vma->flags--;
> +}
> +
> +static inline void i915_vma_unpin(struct i915_vma *vma)
> +{
> + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> + __i915_vma_unpin(vma);
> +}
> +
> +/**
> + * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
> + * @vma: VMA to iomap
> + *
> + * The passed in VMA has to be pinned in the global GTT mappable region.
> + * An extra pinning of the VMA is acquired for the return iomapping,
> + * the caller must call i915_vma_unpin_iomap to relinquish the pinning
> + * after the iomapping is no longer required.
> + *
> + * Callers must hold the struct_mutex.
> + *
> + * Returns a valid iomapped pointer or ERR_PTR.
> + */
> +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
> +#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
> +
> +/**
> + * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
> + * @vma: VMA to unpin
> + *
> + * Unpins the previously iomapped VMA from i915_vma_pin_iomap().
> + *
> + * Callers must hold the struct_mutex. This function is only valid to be
> + * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
> + */
> +static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
> +{
> + lockdep_assert_held(&vma->vm->dev->struct_mutex);
> + GEM_BUG_ON(vma->iomap == NULL);
> + i915_vma_unpin(vma);
> +}
> +
> +static inline struct page *i915_vma_first_page(struct i915_vma *vma)
> +{
> + GEM_BUG_ON(!vma->pages);
> + return sg_page(vma->pages->sgl);
> +}
> +
> +/**
> + * i915_vma_pin_fence - pin fencing state
> + * @vma: vma to pin fencing for
> + *
> + * This pins the fencing state (whether tiled or untiled) to make sure the
> + * vma (and its object) is ready to be used as a scanout target. Fencing
> + * status must be synchronize first by calling i915_vma_get_fence():
> + *
> + * The resulting fence pin reference must be released again with
> + * i915_vma_unpin_fence().
> + *
> + * Returns:
> + *
> + * True if the vma has a fence, false otherwise.
> + */
> +static inline bool
> +i915_vma_pin_fence(struct i915_vma *vma)
> +{
> + lockdep_assert_held(&vma->vm->dev->struct_mutex);
> + if (vma->fence) {
> + vma->fence->pin_count++;
> + return true;
> + } else
> + return false;
> +}
> +
> +/**
> + * i915_vma_unpin_fence - unpin fencing state
> + * @vma: vma to unpin fencing for
> + *
> + * This releases the fence pin reference acquired through
> + * i915_vma_pin_fence. It will handle both objects with and without an
> + * attached fence correctly, callers do not need to distinguish this.
> + */
> +static inline void
> +i915_vma_unpin_fence(struct i915_vma *vma)
> +{
> + lockdep_assert_held(&vma->vm->dev->struct_mutex);
> + if (vma->fence) {
> + GEM_BUG_ON(vma->fence->pin_count <= 0);
> + vma->fence->pin_count--;
> + }
> +}
> +
> +#endif
> +
>
Looks like code movement to me and I like the idea of more separation so
based on that:
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list