[Intel-gfx] [PATCH v2] drm/i915: Split out i915_vma.c
Joonas Lahtinen
joonas.lahtinen at linux.intel.com
Thu Nov 10 10:05:04 UTC 2016
As a side product, had to split two other files;
- i915_gem_fence_reg.h
- i915_gem_object.h (only parts that needed immediate untanglement)
I tried to move code in as big chunks as possible, to make review
easier. i915_vma_compare was moved to a header.
v2:
- Use i915_gem_fence_reg.{c,h}
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
drivers/gpu/drm/i915/Makefile | 3 +-
drivers/gpu/drm/i915/i915_drv.h | 385 +---------------
drivers/gpu/drm/i915/i915_gem.c | 370 ---------------
drivers/gpu/drm/i915/i915_gem_fence.c | 722 ------------------------------
drivers/gpu/drm/i915/i915_gem_fence_reg.c | 722 ++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_fence_reg.h | 51 +++
drivers/gpu/drm/i915/i915_gem_gtt.c | 276 +-----------
drivers/gpu/drm/i915/i915_gem_gtt.h | 225 +---------
drivers/gpu/drm/i915/i915_gem_object.h | 337 ++++++++++++++
drivers/gpu/drm/i915/i915_gem_request.h | 3 +
drivers/gpu/drm/i915/i915_vma.c | 648 +++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_vma.h | 342 ++++++++++++++
12 files changed, 2124 insertions(+), 1960 deletions(-)
delete mode 100644 drivers/gpu/drm/i915/i915_gem_fence.c
create mode 100644 drivers/gpu/drm/i915/i915_gem_fence_reg.c
create mode 100644 drivers/gpu/drm/i915/i915_gem_fence_reg.h
create mode 100644 drivers/gpu/drm/i915/i915_gem_object.h
create mode 100644 drivers/gpu/drm/i915/i915_vma.c
create mode 100644 drivers/gpu/drm/i915/i915_vma.h
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0857e50..3dea46a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -33,7 +33,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_dmabuf.o \
i915_gem_evict.o \
i915_gem_execbuffer.o \
- i915_gem_fence.o \
+ i915_gem_fence_reg.o \
i915_gem_gtt.o \
i915_gem_internal.o \
i915_gem.o \
@@ -45,6 +45,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_timeline.o \
i915_gem_userptr.o \
i915_trace_points.o \
+ i915_vma.o \
intel_breadcrumbs.o \
intel_engine_cs.o \
intel_hangcheck.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d868979..414a5b0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -60,11 +60,15 @@
#include "intel_ringbuffer.h"
#include "i915_gem.h"
+#include "i915_gem_fence_reg.h"
+#include "i915_gem_object.h"
#include "i915_gem_gtt.h"
#include "i915_gem_render_state.h"
#include "i915_gem_request.h"
#include "i915_gem_timeline.h"
+#include "i915_vma.h"
+
#include "intel_gvt.h"
/* General customization:
@@ -459,23 +463,6 @@ struct intel_opregion {
struct intel_overlay;
struct intel_overlay_error_state;
-struct drm_i915_fence_reg {
- struct list_head link;
- struct drm_i915_private *i915;
- struct i915_vma *vma;
- int pin_count;
- int id;
- /**
- * Whether the tiling parameters for the currently
- * associated fence register have changed. Note that
- * for the purposes of tracking tiling changes we also
- * treat the unfenced register, the register slot that
- * the object occupies whilst it executes a fenced
- * command (such as BLT on gen2/3), as a "fence".
- */
- bool dirty;
-};
-
struct sdvo_device_mapping {
u8 initialized;
u8 dvo_port;
@@ -2178,31 +2165,6 @@ enum hdmi_force_audio {
#define I915_GTT_OFFSET_NONE ((u32)-1)
-struct drm_i915_gem_object_ops {
- unsigned int flags;
-#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
-#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2
-
- /* Interface between the GEM object and its backing storage.
- * get_pages() is called once prior to the use of the associated set
- * of pages before to binding them into the GTT, and put_pages() is
- * called after we no longer need them. As we expect there to be
- * associated cost with migrating pages between the backing storage
- * and making them available for the GPU (e.g. clflush), we may hold
- * onto the pages after they are no longer referenced by the GPU
- * in case they may be used again shortly (for example migrating the
- * pages to a different memory domain within the GTT). put_pages()
- * will therefore most likely be called when the object itself is
- * being released or under memory pressure (where we attempt to
- * reap pages for the shrinker).
- */
- struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
- void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
-
- int (*dmabuf_export)(struct drm_i915_gem_object *);
- void (*release)(struct drm_i915_gem_object *);
-};
-
/*
* Frontbuffer tracking bits. Set in obj->frontbuffer_bits while a gem bo is
* considered to be the frontbuffer for the given plane interface-wise. This
@@ -2224,292 +2186,6 @@ struct drm_i915_gem_object_ops {
#define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
(0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
-struct drm_i915_gem_object {
- struct drm_gem_object base;
-
- const struct drm_i915_gem_object_ops *ops;
-
- /** List of VMAs backed by this object */
- struct list_head vma_list;
- struct rb_root vma_tree;
-
- /** Stolen memory for this object, instead of being backed by shmem. */
- struct drm_mm_node *stolen;
- struct list_head global_list;
- union {
- struct rcu_head rcu;
- struct llist_node freed;
- };
-
- /**
- * Whether the object is currently in the GGTT mmap.
- */
- struct list_head userfault_link;
-
- /** Used in execbuf to temporarily hold a ref */
- struct list_head obj_exec_link;
-
- struct list_head batch_pool_link;
-
- unsigned long flags;
-
- /**
- * Have we taken a reference for the object for incomplete GPU
- * activity?
- */
-#define I915_BO_ACTIVE_REF 0
-
- /*
- * Is the object to be mapped as read-only to the GPU
- * Only honoured if hardware has relevant pte bit
- */
- unsigned long gt_ro:1;
- unsigned int cache_level:3;
- unsigned int cache_dirty:1;
-
- atomic_t frontbuffer_bits;
- unsigned int frontbuffer_ggtt_origin; /* write once */
-
- /** Current tiling stride for the object, if it's tiled. */
- unsigned int tiling_and_stride;
-#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
-#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
-#define STRIDE_MASK (~TILING_MASK)
-
- /** Count of VMA actually bound by this object */
- unsigned int bind_count;
- unsigned int active_count;
- unsigned int pin_display;
-
- struct {
- struct mutex lock; /* protects the pages and their use */
- atomic_t pages_pin_count;
-
- struct sg_table *pages;
- void *mapping;
-
- struct i915_gem_object_page_iter {
- struct scatterlist *sg_pos;
- unsigned int sg_idx; /* in pages, but 32bit eek! */
-
- struct radix_tree_root radix;
- struct mutex lock; /* protects this cache */
- } get_page;
-
- /**
- * Advice: are the backing pages purgeable?
- */
- unsigned int madv:2;
-
- /**
- * This is set if the object has been written to since the
- * pages were last acquired.
- */
- bool dirty:1;
-
- /**
- * This is set if the object has been pinned due to unknown
- * swizzling.
- */
- bool quirked:1;
- } mm;
-
- /** Breadcrumb of last rendering to the buffer.
- * There can only be one writer, but we allow for multiple readers.
- * If there is a writer that necessarily implies that all other
- * read requests are complete - but we may only be lazily clearing
- * the read requests. A read request is naturally the most recent
- * request on a ring, so we may have two different write and read
- * requests on one ring where the write request is older than the
- * read request. This allows for the CPU to read from an active
- * buffer by only waiting for the write to complete.
- */
- struct reservation_object *resv;
-
- /** References from framebuffers, locks out tiling changes. */
- unsigned long framebuffer_references;
-
- /** Record of address bit 17 of each page at last unbind. */
- unsigned long *bit_17;
-
- struct i915_gem_userptr {
- uintptr_t ptr;
- unsigned read_only :1;
-
- struct i915_mm_struct *mm;
- struct i915_mmu_object *mmu_object;
- struct work_struct *work;
- } userptr;
-
- /** for phys allocated objects */
- struct drm_dma_handle *phys_handle;
-
- struct reservation_object __builtin_resv;
-};
-
-static inline struct drm_i915_gem_object *
-to_intel_bo(struct drm_gem_object *gem)
-{
- /* Assert that to_intel_bo(NULL) == NULL */
- BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
-
- return container_of(gem, struct drm_i915_gem_object, base);
-}
-
-/**
- * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
- * @filp: DRM file private date
- * @handle: userspace handle
- *
- * Returns:
- *
- * A pointer to the object named by the handle if such exists on @filp, NULL
- * otherwise. This object is only valid whilst under the RCU read lock, and
- * note carefully the object may be in the process of being destroyed.
- */
-static inline struct drm_i915_gem_object *
-i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
-{
-#ifdef CONFIG_LOCKDEP
- WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map));
-#endif
- return idr_find(&file->object_idr, handle);
-}
-
-static inline struct drm_i915_gem_object *
-i915_gem_object_lookup(struct drm_file *file, u32 handle)
-{
- struct drm_i915_gem_object *obj;
-
- rcu_read_lock();
- obj = i915_gem_object_lookup_rcu(file, handle);
- if (obj && !kref_get_unless_zero(&obj->base.refcount))
- obj = NULL;
- rcu_read_unlock();
-
- return obj;
-}
-
-__deprecated
-extern struct drm_gem_object *
-drm_gem_object_lookup(struct drm_file *file, u32 handle);
-
-__attribute__((nonnull))
-static inline struct drm_i915_gem_object *
-i915_gem_object_get(struct drm_i915_gem_object *obj)
-{
- drm_gem_object_reference(&obj->base);
- return obj;
-}
-
-__deprecated
-extern void drm_gem_object_reference(struct drm_gem_object *);
-
-__attribute__((nonnull))
-static inline void
-i915_gem_object_put(struct drm_i915_gem_object *obj)
-{
- __drm_gem_object_unreference(&obj->base);
-}
-
-__deprecated
-extern void drm_gem_object_unreference(struct drm_gem_object *);
-
-__deprecated
-extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
-
-static inline bool
-i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
-{
- return atomic_read(&obj->base.refcount.refcount) == 0;
-}
-
-static inline bool
-i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
-{
- return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
-}
-
-static inline bool
-i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
-{
- return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE;
-}
-
-static inline bool
-i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
-{
- return obj->active_count;
-}
-
-static inline bool
-i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
-{
- return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
-{
- lockdep_assert_held(&obj->base.dev->struct_mutex);
- __set_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
-{
- lockdep_assert_held(&obj->base.dev->struct_mutex);
- __clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
-
-static inline unsigned int
-i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
-{
- return obj->tiling_and_stride & TILING_MASK;
-}
-
-static inline bool
-i915_gem_object_is_tiled(struct drm_i915_gem_object *obj)
-{
- return i915_gem_object_get_tiling(obj) != I915_TILING_NONE;
-}
-
-static inline unsigned int
-i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
-{
- return obj->tiling_and_stride & STRIDE_MASK;
-}
-
-static inline struct intel_engine_cs *
-i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
-{
- struct intel_engine_cs *engine = NULL;
- struct dma_fence *fence;
-
- rcu_read_lock();
- fence = reservation_object_get_excl_rcu(obj->resv);
- rcu_read_unlock();
-
- if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
- engine = to_request(fence)->engine;
- dma_fence_put(fence);
-
- return engine;
-}
-
-static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
-{
- i915_gem_object_get(vma->obj);
- return vma;
-}
-
-static inline void i915_vma_put(struct i915_vma *vma)
-{
- i915_gem_object_put(vma->obj);
-}
-
/*
* Optimised SGL iterator for GEM objects
*/
@@ -3217,13 +2893,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
u64 alignment,
u64 flags);
-int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
- u32 flags);
-void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
-int __must_check i915_vma_unbind(struct i915_vma *vma);
-void i915_vma_close(struct i915_vma *vma);
-void i915_vma_destroy(struct i915_vma *vma);
-
int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
@@ -3473,54 +3142,10 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view));
}
-/* i915_gem_fence.c */
+/* i915_gem_fence_reg.c */
int __must_check i915_vma_get_fence(struct i915_vma *vma);
int __must_check i915_vma_put_fence(struct i915_vma *vma);
-/**
- * i915_vma_pin_fence - pin fencing state
- * @vma: vma to pin fencing for
- *
- * This pins the fencing state (whether tiled or untiled) to make sure the
- * vma (and its object) is ready to be used as a scanout target. Fencing
- * status must be synchronize first by calling i915_vma_get_fence():
- *
- * The resulting fence pin reference must be released again with
- * i915_vma_unpin_fence().
- *
- * Returns:
- *
- * True if the vma has a fence, false otherwise.
- */
-static inline bool
-i915_vma_pin_fence(struct i915_vma *vma)
-{
- lockdep_assert_held(&vma->vm->dev->struct_mutex);
- if (vma->fence) {
- vma->fence->pin_count++;
- return true;
- } else
- return false;
-}
-
-/**
- * i915_vma_unpin_fence - unpin fencing state
- * @vma: vma to unpin fencing for
- *
- * This releases the fence pin reference acquired through
- * i915_vma_pin_fence. It will handle both objects with and without an
- * attached fence correctly, callers do not need to distinguish this.
- */
-static inline void
-i915_vma_unpin_fence(struct i915_vma *vma)
-{
- lockdep_assert_held(&vma->vm->dev->struct_mutex);
- if (vma->fence) {
- GEM_BUG_ON(vma->fence->pin_count <= 0);
- vma->fence->pin_count--;
- }
-}
-
void i915_gem_restore_fences(struct drm_device *dev);
void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a97fdfa..657a72b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2872,117 +2872,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
return ret;
}
-static void __i915_vma_iounmap(struct i915_vma *vma)
-{
- GEM_BUG_ON(i915_vma_is_pinned(vma));
-
- if (vma->iomap == NULL)
- return;
-
- io_mapping_unmap(vma->iomap);
- vma->iomap = NULL;
-}
-
-int i915_vma_unbind(struct i915_vma *vma)
-{
- struct drm_i915_gem_object *obj = vma->obj;
- unsigned long active;
- int ret;
-
- lockdep_assert_held(&obj->base.dev->struct_mutex);
-
- /* First wait upon any activity as retiring the request may
- * have side-effects such as unpinning or even unbinding this vma.
- */
- active = i915_vma_get_active(vma);
- if (active) {
- int idx;
-
- /* When a closed VMA is retired, it is unbound - eek.
- * In order to prevent it from being recursively closed,
- * take a pin on the vma so that the second unbind is
- * aborted.
- *
- * Even more scary is that the retire callback may free
- * the object (last active vma). To prevent the explosion
- * we defer the actual object free to a worker that can
- * only proceed once it acquires the struct_mutex (which
- * we currently hold, therefore it cannot free this object
- * before we are finished).
- */
- __i915_vma_pin(vma);
-
- for_each_active(active, idx) {
- ret = i915_gem_active_retire(&vma->last_read[idx],
- &vma->vm->dev->struct_mutex);
- if (ret)
- break;
- }
-
- __i915_vma_unpin(vma);
- if (ret)
- return ret;
-
- GEM_BUG_ON(i915_vma_is_active(vma));
- }
-
- if (i915_vma_is_pinned(vma))
- return -EBUSY;
-
- if (!drm_mm_node_allocated(&vma->node))
- goto destroy;
-
- GEM_BUG_ON(obj->bind_count == 0);
- GEM_BUG_ON(!obj->mm.pages);
-
- if (i915_vma_is_map_and_fenceable(vma)) {
- /* release the fence reg _after_ flushing */
- ret = i915_vma_put_fence(vma);
- if (ret)
- return ret;
-
- /* Force a pagefault for domain tracking on next user access */
- i915_gem_release_mmap(obj);
-
- __i915_vma_iounmap(vma);
- vma->flags &= ~I915_VMA_CAN_FENCE;
- }
-
- if (likely(!vma->vm->closed)) {
- trace_i915_vma_unbind(vma);
- vma->vm->unbind_vma(vma);
- }
- vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
-
- drm_mm_remove_node(&vma->node);
- list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
-
- if (vma->pages != obj->mm.pages) {
- GEM_BUG_ON(!vma->pages);
- sg_free_table(vma->pages);
- kfree(vma->pages);
- }
- vma->pages = NULL;
-
- /* Since the unbound list is global, only move to that list if
- * no more VMAs exist. */
- if (--obj->bind_count == 0)
- list_move_tail(&obj->global_list,
- &to_i915(obj->base.dev)->mm.unbound_list);
-
- /* And finally now the object is completely decoupled from this vma,
- * we can drop its hold on the backing storage and allow it to be
- * reaped by the shrinker.
- */
- i915_gem_object_unpin_pages(obj);
-
-destroy:
- if (unlikely(i915_vma_is_closed(vma)))
- i915_vma_destroy(vma);
-
- return 0;
-}
-
static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
{
int ret, i;
@@ -3010,171 +2899,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
return 0;
}
-static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
- unsigned long cache_level)
-{
- struct drm_mm_node *gtt_space = &vma->node;
- struct drm_mm_node *other;
-
- /*
- * On some machines we have to be careful when putting differing types
- * of snoopable memory together to avoid the prefetcher crossing memory
- * domains and dying. During vm initialisation, we decide whether or not
- * these constraints apply and set the drm_mm.color_adjust
- * appropriately.
- */
- if (vma->vm->mm.color_adjust == NULL)
- return true;
-
- if (!drm_mm_node_allocated(gtt_space))
- return true;
-
- if (list_empty(>t_space->node_list))
- return true;
-
- other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
- if (other->allocated && !other->hole_follows && other->color != cache_level)
- return false;
-
- other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
- if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
- return false;
-
- return true;
-}
-
-/**
- * i915_vma_insert - finds a slot for the vma in its address space
- * @vma: the vma
- * @size: requested size in bytes (can be larger than the VMA)
- * @alignment: required alignment
- * @flags: mask of PIN_* flags to use
- *
- * First we try to allocate some free space that meets the requirements for
- * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
- * preferrably the oldest idle entry to make room for the new VMA.
- *
- * Returns:
- * 0 on success, negative error code otherwise.
- */
-static int
-i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
-{
- struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
- struct drm_i915_gem_object *obj = vma->obj;
- u64 start, end;
- int ret;
-
- GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
- GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
-
- size = max(size, vma->size);
- if (flags & PIN_MAPPABLE)
- size = i915_gem_get_ggtt_size(dev_priv, size,
- i915_gem_object_get_tiling(obj));
-
- alignment = max(max(alignment, vma->display_alignment),
- i915_gem_get_ggtt_alignment(dev_priv, size,
- i915_gem_object_get_tiling(obj),
- flags & PIN_MAPPABLE));
-
- start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
-
- end = vma->vm->total;
- if (flags & PIN_MAPPABLE)
- end = min_t(u64, end, dev_priv->ggtt.mappable_end);
- if (flags & PIN_ZONE_4G)
- end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
-
- /* If binding the object/GGTT view requires more space than the entire
- * aperture has, reject it early before evicting everything in a vain
- * attempt to find space.
- */
- if (size > end) {
- DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
- size, obj->base.size,
- flags & PIN_MAPPABLE ? "mappable" : "total",
- end);
- return -E2BIG;
- }
-
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
- return ret;
-
- if (flags & PIN_OFFSET_FIXED) {
- u64 offset = flags & PIN_OFFSET_MASK;
- if (offset & (alignment - 1) || offset > end - size) {
- ret = -EINVAL;
- goto err_unpin;
- }
-
- vma->node.start = offset;
- vma->node.size = size;
- vma->node.color = obj->cache_level;
- ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
- if (ret) {
- ret = i915_gem_evict_for_vma(vma);
- if (ret == 0)
- ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
- if (ret)
- goto err_unpin;
- }
- } else {
- u32 search_flag, alloc_flag;
-
- if (flags & PIN_HIGH) {
- search_flag = DRM_MM_SEARCH_BELOW;
- alloc_flag = DRM_MM_CREATE_TOP;
- } else {
- search_flag = DRM_MM_SEARCH_DEFAULT;
- alloc_flag = DRM_MM_CREATE_DEFAULT;
- }
-
- /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
- * so we know that we always have a minimum alignment of 4096.
- * The drm_mm range manager is optimised to return results
- * with zero alignment, so where possible use the optimal
- * path.
- */
- if (alignment <= 4096)
- alignment = 0;
-
-search_free:
- ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
- &vma->node,
- size, alignment,
- obj->cache_level,
- start, end,
- search_flag,
- alloc_flag);
- if (ret) {
- ret = i915_gem_evict_something(vma->vm, size, alignment,
- obj->cache_level,
- start, end,
- flags);
- if (ret == 0)
- goto search_free;
-
- goto err_unpin;
- }
-
- GEM_BUG_ON(vma->node.start < start);
- GEM_BUG_ON(vma->node.start + vma->node.size > end);
- }
- GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
-
- list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
- list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
- obj->bind_count++;
-
- return 0;
-
-err_unpin:
- i915_gem_object_unpin_pages(obj);
- return ret;
-}
-
bool
i915_gem_clflush_object(struct drm_i915_gem_object *obj,
bool force)
@@ -3763,100 +3487,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
return ret < 0 ? ret : 0;
}
-static bool
-i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
-{
- if (!drm_mm_node_allocated(&vma->node))
- return false;
-
- if (vma->node.size < size)
- return true;
-
- if (alignment && vma->node.start & (alignment - 1))
- return true;
-
- if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
- return true;
-
- if (flags & PIN_OFFSET_BIAS &&
- vma->node.start < (flags & PIN_OFFSET_MASK))
- return true;
-
- if (flags & PIN_OFFSET_FIXED &&
- vma->node.start != (flags & PIN_OFFSET_MASK))
- return true;
-
- return false;
-}
-
-void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
-{
- struct drm_i915_gem_object *obj = vma->obj;
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- bool mappable, fenceable;
- u32 fence_size, fence_alignment;
-
- fence_size = i915_gem_get_ggtt_size(dev_priv,
- vma->size,
- i915_gem_object_get_tiling(obj));
- fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
- vma->size,
- i915_gem_object_get_tiling(obj),
- true);
-
- fenceable = (vma->node.size == fence_size &&
- (vma->node.start & (fence_alignment - 1)) == 0);
-
- mappable = (vma->node.start + fence_size <=
- dev_priv->ggtt.mappable_end);
-
- /*
- * Explicitly disable for rotated VMA since the display does not
- * need the fence and the VMA is not accessible to other users.
- */
- if (mappable && fenceable &&
- vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED)
- vma->flags |= I915_VMA_CAN_FENCE;
- else
- vma->flags &= ~I915_VMA_CAN_FENCE;
-}
-
-int __i915_vma_do_pin(struct i915_vma *vma,
- u64 size, u64 alignment, u64 flags)
-{
- unsigned int bound = vma->flags;
- int ret;
-
- lockdep_assert_held(&vma->vm->dev->struct_mutex);
- GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
- GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
-
- if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
- ret = -EBUSY;
- goto err;
- }
-
- if ((bound & I915_VMA_BIND_MASK) == 0) {
- ret = i915_vma_insert(vma, size, alignment, flags);
- if (ret)
- goto err;
- }
-
- ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
- if (ret)
- goto err;
-
- if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
- __i915_vma_set_map_and_fenceable(vma);
-
- GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
- return 0;
-
-err:
- __i915_vma_unpin(vma);
- return ret;
-}
-
struct i915_vma *
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
deleted file mode 100644
index b820584..0000000
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ /dev/null
@@ -1,722 +0,0 @@
-/*
- * Copyright © 2008-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <drm/drmP.h>
-#include <drm/i915_drm.h>
-#include "i915_drv.h"
-
-/**
- * DOC: fence register handling
- *
- * Important to avoid confusions: "fences" in the i915 driver are not execution
- * fences used to track command completion but hardware detiler objects which
- * wrap a given range of the global GTT. Each platform has only a fairly limited
- * set of these objects.
- *
- * Fences are used to detile GTT memory mappings. They're also connected to the
- * hardware frontbuffer render tracking and hence interact with frontbuffer
- * compression. Furthermore on older platforms fences are required for tiled
- * objects used by the display engine. They can also be used by the render
- * engine - they're required for blitter commands and are optional for render
- * commands. But on gen4+ both display (with the exception of fbc) and rendering
- * have their own tiling state bits and don't need fences.
- *
- * Also note that fences only support X and Y tiling and hence can't be used for
- * the fancier new tiling formats like W, Ys and Yf.
- *
- * Finally note that because fences are such a restricted resource they're
- * dynamically associated with objects. Furthermore fence state is committed to
- * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
- * explicitly call i915_gem_object_get_fence() to synchronize fencing status
- * for cpu access. Also note that some code wants an unfenced view, for those
- * cases the fence can be removed forcefully with i915_gem_object_put_fence().
- *
- * Internally these functions will synchronize with userspace access by removing
- * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
- */
-
-#define pipelined 0
-
-static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
- struct i915_vma *vma)
-{
- i915_reg_t fence_reg_lo, fence_reg_hi;
- int fence_pitch_shift;
- u64 val;
-
- if (INTEL_INFO(fence->i915)->gen >= 6) {
- fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
- fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
- fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
-
- } else {
- fence_reg_lo = FENCE_REG_965_LO(fence->id);
- fence_reg_hi = FENCE_REG_965_HI(fence->id);
- fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
- }
-
- val = 0;
- if (vma) {
- unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
- bool is_y_tiled = tiling == I915_TILING_Y;
- unsigned int stride = i915_gem_object_get_stride(vma->obj);
- u32 row_size = stride * (is_y_tiled ? 32 : 8);
- u32 size = rounddown((u32)vma->node.size, row_size);
-
- val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
- val |= vma->node.start & 0xfffff000;
- val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
- if (is_y_tiled)
- val |= BIT(I965_FENCE_TILING_Y_SHIFT);
- val |= I965_FENCE_REG_VALID;
- }
-
- if (!pipelined) {
- struct drm_i915_private *dev_priv = fence->i915;
-
- /* To w/a incoherency with non-atomic 64-bit register updates,
- * we split the 64-bit update into two 32-bit writes. In order
- * for a partial fence not to be evaluated between writes, we
- * precede the update with write to turn off the fence register,
- * and only enable the fence as the last step.
- *
- * For extra levels of paranoia, we make sure each step lands
- * before applying the next step.
- */
- I915_WRITE(fence_reg_lo, 0);
- POSTING_READ(fence_reg_lo);
-
- I915_WRITE(fence_reg_hi, upper_32_bits(val));
- I915_WRITE(fence_reg_lo, lower_32_bits(val));
- POSTING_READ(fence_reg_lo);
- }
-}
-
-static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
- struct i915_vma *vma)
-{
- u32 val;
-
- val = 0;
- if (vma) {
- unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
- bool is_y_tiled = tiling == I915_TILING_Y;
- unsigned int stride = i915_gem_object_get_stride(vma->obj);
- int pitch_val;
- int tile_width;
-
- WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
- !is_power_of_2(vma->node.size) ||
- (vma->node.start & (vma->node.size - 1)),
- "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n",
- vma->node.start,
- i915_vma_is_map_and_fenceable(vma),
- vma->node.size);
-
- if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
- tile_width = 128;
- else
- tile_width = 512;
-
- /* Note: pitch better be a power of two tile widths */
- pitch_val = stride / tile_width;
- pitch_val = ffs(pitch_val) - 1;
-
- val = vma->node.start;
- if (is_y_tiled)
- val |= BIT(I830_FENCE_TILING_Y_SHIFT);
- val |= I915_FENCE_SIZE_BITS(vma->node.size);
- val |= pitch_val << I830_FENCE_PITCH_SHIFT;
- val |= I830_FENCE_REG_VALID;
- }
-
- if (!pipelined) {
- struct drm_i915_private *dev_priv = fence->i915;
- i915_reg_t reg = FENCE_REG(fence->id);
-
- I915_WRITE(reg, val);
- POSTING_READ(reg);
- }
-}
-
-static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
- struct i915_vma *vma)
-{
- u32 val;
-
- val = 0;
- if (vma) {
- unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
- bool is_y_tiled = tiling == I915_TILING_Y;
- unsigned int stride = i915_gem_object_get_stride(vma->obj);
- u32 pitch_val;
-
- WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
- !is_power_of_2(vma->node.size) ||
- (vma->node.start & (vma->node.size - 1)),
- "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n",
- vma->node.start, vma->node.size);
-
- pitch_val = stride / 128;
- pitch_val = ffs(pitch_val) - 1;
-
- val = vma->node.start;
- if (is_y_tiled)
- val |= BIT(I830_FENCE_TILING_Y_SHIFT);
- val |= I830_FENCE_SIZE_BITS(vma->node.size);
- val |= pitch_val << I830_FENCE_PITCH_SHIFT;
- val |= I830_FENCE_REG_VALID;
- }
-
- if (!pipelined) {
- struct drm_i915_private *dev_priv = fence->i915;
- i915_reg_t reg = FENCE_REG(fence->id);
-
- I915_WRITE(reg, val);
- POSTING_READ(reg);
- }
-}
-
-static void fence_write(struct drm_i915_fence_reg *fence,
- struct i915_vma *vma)
-{
- /* Previous access through the fence register is marshalled by
- * the mb() inside the fault handlers (i915_gem_release_mmaps)
- * and explicitly managed for internal users.
- */
-
- if (IS_GEN2(fence->i915))
- i830_write_fence_reg(fence, vma);
- else if (IS_GEN3(fence->i915))
- i915_write_fence_reg(fence, vma);
- else
- i965_write_fence_reg(fence, vma);
-
- /* Access through the fenced region afterwards is
- * ordered by the posting reads whilst writing the registers.
- */
-
- fence->dirty = false;
-}
-
-static int fence_update(struct drm_i915_fence_reg *fence,
- struct i915_vma *vma)
-{
- int ret;
-
- if (vma) {
- if (!i915_vma_is_map_and_fenceable(vma))
- return -EINVAL;
-
- if (WARN(!i915_gem_object_get_stride(vma->obj) ||
- !i915_gem_object_get_tiling(vma->obj),
- "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
- i915_gem_object_get_stride(vma->obj),
- i915_gem_object_get_tiling(vma->obj)))
- return -EINVAL;
-
- ret = i915_gem_active_retire(&vma->last_fence,
- &vma->obj->base.dev->struct_mutex);
- if (ret)
- return ret;
- }
-
- if (fence->vma) {
- ret = i915_gem_active_retire(&fence->vma->last_fence,
- &fence->vma->obj->base.dev->struct_mutex);
- if (ret)
- return ret;
- }
-
- if (fence->vma && fence->vma != vma) {
- /* Ensure that all userspace CPU access is completed before
- * stealing the fence.
- */
- i915_gem_release_mmap(fence->vma->obj);
-
- fence->vma->fence = NULL;
- fence->vma = NULL;
-
- list_move(&fence->link, &fence->i915->mm.fence_list);
- }
-
- fence_write(fence, vma);
-
- if (vma) {
- if (fence->vma != vma) {
- vma->fence = fence;
- fence->vma = vma;
- }
-
- list_move_tail(&fence->link, &fence->i915->mm.fence_list);
- }
-
- return 0;
-}
-
-/**
- * i915_vma_put_fence - force-remove fence for a VMA
- * @vma: vma to map linearly (not through a fence reg)
- *
- * This function force-removes any fence from the given object, which is useful
- * if the kernel wants to do untiled GTT access.
- *
- * Returns:
- *
- * 0 on success, negative error code on failure.
- */
-int
-i915_vma_put_fence(struct i915_vma *vma)
-{
- struct drm_i915_fence_reg *fence = vma->fence;
-
- assert_rpm_wakelock_held(to_i915(vma->vm->dev));
-
- if (!fence)
- return 0;
-
- if (fence->pin_count)
- return -EBUSY;
-
- return fence_update(fence, NULL);
-}
-
-static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
-{
- struct drm_i915_fence_reg *fence;
-
- list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
- if (fence->pin_count)
- continue;
-
- return fence;
- }
-
- /* Wait for completion of pending flips which consume fences */
- if (intel_has_pending_fb_unpin(&dev_priv->drm))
- return ERR_PTR(-EAGAIN);
-
- return ERR_PTR(-EDEADLK);
-}
-
-/**
- * i915_vma_get_fence - set up fencing for a vma
- * @vma: vma to map through a fence reg
- *
- * When mapping objects through the GTT, userspace wants to be able to write
- * to them without having to worry about swizzling if the object is tiled.
- * This function walks the fence regs looking for a free one for @obj,
- * stealing one if it can't find any.
- *
- * It then sets up the reg based on the object's properties: address, pitch
- * and tiling format.
- *
- * For an untiled surface, this removes any existing fence.
- *
- * Returns:
- *
- * 0 on success, negative error code on failure.
- */
-int
-i915_vma_get_fence(struct i915_vma *vma)
-{
- struct drm_i915_fence_reg *fence;
- struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
-
- /* Note that we revoke fences on runtime suspend. Therefore the user
- * must keep the device awake whilst using the fence.
- */
- assert_rpm_wakelock_held(to_i915(vma->vm->dev));
-
- /* Just update our place in the LRU if our fence is getting reused. */
- if (vma->fence) {
- fence = vma->fence;
- if (!fence->dirty) {
- list_move_tail(&fence->link,
- &fence->i915->mm.fence_list);
- return 0;
- }
- } else if (set) {
- fence = fence_find(to_i915(vma->vm->dev));
- if (IS_ERR(fence))
- return PTR_ERR(fence);
- } else
- return 0;
-
- return fence_update(fence, set);
-}
-
-/**
- * i915_gem_restore_fences - restore fence state
- * @dev: DRM device
- *
- * Restore the hw fence state to match the software tracking again, to be called
- * after a gpu reset and on resume. Note that on runtime suspend we only cancel
- * the fences, to be reacquired by the user later.
- */
-void i915_gem_restore_fences(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = to_i915(dev);
- int i;
-
- /* Note that this may be called outside of struct_mutex, by
- * runtime suspend/resume. The barrier we require is enforced by
- * rpm itself - all access to fences/GTT are only within an rpm
- * wakeref, and to acquire that wakeref you must pass through here.
- */
-
- for (i = 0; i < dev_priv->num_fence_regs; i++) {
- struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
- struct i915_vma *vma = reg->vma;
-
- /*
- * Commit delayed tiling changes if we have an object still
- * attached to the fence, otherwise just clear the fence.
- */
- if (vma && !i915_gem_object_is_tiled(vma->obj)) {
- GEM_BUG_ON(!reg->dirty);
- GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
-
- list_move(®->link, &dev_priv->mm.fence_list);
- vma->fence = NULL;
- vma = NULL;
- }
-
- fence_write(reg, vma);
- reg->vma = vma;
- }
-}
-
-/**
- * DOC: tiling swizzling details
- *
- * The idea behind tiling is to increase cache hit rates by rearranging
- * pixel data so that a group of pixel accesses are in the same cacheline.
- * Performance improvement from doing this on the back/depth buffer are on
- * the order of 30%.
- *
- * Intel architectures make this somewhat more complicated, though, by
- * adjustments made to addressing of data when the memory is in interleaved
- * mode (matched pairs of DIMMS) to improve memory bandwidth.
- * For interleaved memory, the CPU sends every sequential 64 bytes
- * to an alternate memory channel so it can get the bandwidth from both.
- *
- * The GPU also rearranges its accesses for increased bandwidth to interleaved
- * memory, and it matches what the CPU does for non-tiled. However, when tiled
- * it does it a little differently, since one walks addresses not just in the
- * X direction but also Y. So, along with alternating channels when bit
- * 6 of the address flips, it also alternates when other bits flip -- Bits 9
- * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
- * are common to both the 915 and 965-class hardware.
- *
- * The CPU also sometimes XORs in higher bits as well, to improve
- * bandwidth doing strided access like we do so frequently in graphics. This
- * is called "Channel XOR Randomization" in the MCH documentation. The result
- * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
- * decode.
- *
- * All of this bit 6 XORing has an effect on our memory management,
- * as we need to make sure that the 3d driver can correctly address object
- * contents.
- *
- * If we don't have interleaved memory, all tiling is safe and no swizzling is
- * required.
- *
- * When bit 17 is XORed in, we simply refuse to tile at all. Bit
- * 17 is not just a page offset, so as we page an object out and back in,
- * individual pages in it will have different bit 17 addresses, resulting in
- * each 64 bytes being swapped with its neighbor!
- *
- * Otherwise, if interleaved, we have to tell the 3d driver what the address
- * swizzling it needs to do is, since it's writing with the CPU to the pages
- * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
- * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
- * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
- * to match what the GPU expects.
- */
-
-/**
- * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
- * @dev: DRM device
- *
- * Detects bit 6 swizzling of address lookup between IGD access and CPU
- * access through main memory.
- */
-void
-i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = to_i915(dev);
- uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
- uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
-
- if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) {
- /*
- * On BDW+, swizzling is not used. We leave the CPU memory
- * controller in charge of optimizing memory accesses without
- * the extra address manipulation GPU side.
- *
- * VLV and CHV don't have GPU swizzling.
- */
- swizzle_x = I915_BIT_6_SWIZZLE_NONE;
- swizzle_y = I915_BIT_6_SWIZZLE_NONE;
- } else if (INTEL_INFO(dev)->gen >= 6) {
- if (dev_priv->preserve_bios_swizzle) {
- if (I915_READ(DISP_ARB_CTL) &
- DISP_TILE_SURFACE_SWIZZLING) {
- swizzle_x = I915_BIT_6_SWIZZLE_9_10;
- swizzle_y = I915_BIT_6_SWIZZLE_9;
- } else {
- swizzle_x = I915_BIT_6_SWIZZLE_NONE;
- swizzle_y = I915_BIT_6_SWIZZLE_NONE;
- }
- } else {
- uint32_t dimm_c0, dimm_c1;
- dimm_c0 = I915_READ(MAD_DIMM_C0);
- dimm_c1 = I915_READ(MAD_DIMM_C1);
- dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
- dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
- /* Enable swizzling when the channels are populated
- * with identically sized dimms. We don't need to check
- * the 3rd channel because no cpu with gpu attached
- * ships in that configuration. Also, swizzling only
- * makes sense for 2 channels anyway. */
- if (dimm_c0 == dimm_c1) {
- swizzle_x = I915_BIT_6_SWIZZLE_9_10;
- swizzle_y = I915_BIT_6_SWIZZLE_9;
- } else {
- swizzle_x = I915_BIT_6_SWIZZLE_NONE;
- swizzle_y = I915_BIT_6_SWIZZLE_NONE;
- }
- }
- } else if (IS_GEN5(dev_priv)) {
- /* On Ironlake whatever DRAM config, GPU always do
- * same swizzling setup.
- */
- swizzle_x = I915_BIT_6_SWIZZLE_9_10;
- swizzle_y = I915_BIT_6_SWIZZLE_9;
- } else if (IS_GEN2(dev_priv)) {
- /* As far as we know, the 865 doesn't have these bit 6
- * swizzling issues.
- */
- swizzle_x = I915_BIT_6_SWIZZLE_NONE;
- swizzle_y = I915_BIT_6_SWIZZLE_NONE;
- } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) &&
- !IS_G33(dev_priv))) {
- uint32_t dcc;
-
- /* On 9xx chipsets, channel interleave by the CPU is
- * determined by DCC. For single-channel, neither the CPU
- * nor the GPU do swizzling. For dual channel interleaved,
- * the GPU's interleave is bit 9 and 10 for X tiled, and bit
- * 9 for Y tiled. The CPU's interleave is independent, and
- * can be based on either bit 11 (haven't seen this yet) or
- * bit 17 (common).
- */
- dcc = I915_READ(DCC);
- switch (dcc & DCC_ADDRESSING_MODE_MASK) {
- case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
- case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
- swizzle_x = I915_BIT_6_SWIZZLE_NONE;
- swizzle_y = I915_BIT_6_SWIZZLE_NONE;
- break;
- case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
- if (dcc & DCC_CHANNEL_XOR_DISABLE) {
- /* This is the base swizzling by the GPU for
- * tiled buffers.
- */
- swizzle_x = I915_BIT_6_SWIZZLE_9_10;
- swizzle_y = I915_BIT_6_SWIZZLE_9;
- } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
- /* Bit 11 swizzling by the CPU in addition. */
- swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
- swizzle_y = I915_BIT_6_SWIZZLE_9_11;
- } else {
- /* Bit 17 swizzling by the CPU in addition. */
- swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
- swizzle_y = I915_BIT_6_SWIZZLE_9_17;
- }
- break;
- }
-
- /* check for L-shaped memory aka modified enhanced addressing */
- if (IS_GEN4(dev_priv) &&
- !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
- swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
- swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
- }
-
- if (dcc == 0xffffffff) {
- DRM_ERROR("Couldn't read from MCHBAR. "
- "Disabling tiling.\n");
- swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
- swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
- }
- } else {
- /* The 965, G33, and newer, have a very flexible memory
- * configuration. It will enable dual-channel mode
- * (interleaving) on as much memory as it can, and the GPU
- * will additionally sometimes enable different bit 6
- * swizzling for tiled objects from the CPU.
- *
- * Here's what I found on the G965:
- * slot fill memory size swizzling
- * 0A 0B 1A 1B 1-ch 2-ch
- * 512 0 0 0 512 0 O
- * 512 0 512 0 16 1008 X
- * 512 0 0 512 16 1008 X
- * 0 512 0 512 16 1008 X
- * 1024 1024 1024 0 2048 1024 O
- *
- * We could probably detect this based on either the DRB
- * matching, which was the case for the swizzling required in
- * the table above, or from the 1-ch value being less than
- * the minimum size of a rank.
- *
- * Reports indicate that the swizzling actually
- * varies depending upon page placement inside the
- * channels, i.e. we see swizzled pages where the
- * banks of memory are paired and unswizzled on the
- * uneven portion, so leave that as unknown.
- */
- if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
- swizzle_x = I915_BIT_6_SWIZZLE_9_10;
- swizzle_y = I915_BIT_6_SWIZZLE_9;
- }
- }
-
- if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
- swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
- /* Userspace likes to explode if it sees unknown swizzling,
- * so lie. We will finish the lie when reporting through
- * the get-tiling-ioctl by reporting the physical swizzle
- * mode as unknown instead.
- *
- * As we don't strictly know what the swizzling is, it may be
- * bit17 dependent, and so we need to also prevent the pages
- * from being moved.
- */
- dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
- swizzle_x = I915_BIT_6_SWIZZLE_NONE;
- swizzle_y = I915_BIT_6_SWIZZLE_NONE;
- }
-
- dev_priv->mm.bit_6_swizzle_x = swizzle_x;
- dev_priv->mm.bit_6_swizzle_y = swizzle_y;
-}
-
-/*
- * Swap every 64 bytes of this page around, to account for it having a new
- * bit 17 of its physical address and therefore being interpreted differently
- * by the GPU.
- */
-static void
-i915_gem_swizzle_page(struct page *page)
-{
- char temp[64];
- char *vaddr;
- int i;
-
- vaddr = kmap(page);
-
- for (i = 0; i < PAGE_SIZE; i += 128) {
- memcpy(temp, &vaddr[i], 64);
- memcpy(&vaddr[i], &vaddr[i + 64], 64);
- memcpy(&vaddr[i + 64], temp, 64);
- }
-
- kunmap(page);
-}
-
-/**
- * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
- * @obj: i915 GEM buffer object
- * @pages: the scattergather list of physical pages
- *
- * This function fixes up the swizzling in case any page frame number for this
- * object has changed in bit 17 since that state has been saved with
- * i915_gem_object_save_bit_17_swizzle().
- *
- * This is called when pinning backing storage again, since the kernel is free
- * to move unpinned backing storage around (either by directly moving pages or
- * by swapping them out and back in again).
- */
-void
-i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
- struct sg_table *pages)
-{
- struct sgt_iter sgt_iter;
- struct page *page;
- int i;
-
- if (obj->bit_17 == NULL)
- return;
-
- i = 0;
- for_each_sgt_page(page, sgt_iter, pages) {
- char new_bit_17 = page_to_phys(page) >> 17;
- if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
- i915_gem_swizzle_page(page);
- set_page_dirty(page);
- }
- i++;
- }
-}
-
-/**
- * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
- * @obj: i915 GEM buffer object
- * @pages: the scattergather list of physical pages
- *
- * This function saves the bit 17 of each page frame number so that swizzling
- * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
- * be called before the backing storage can be unpinned.
- */
-void
-i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
- struct sg_table *pages)
-{
- const unsigned int page_count = obj->base.size >> PAGE_SHIFT;
- struct sgt_iter sgt_iter;
- struct page *page;
- int i;
-
- if (obj->bit_17 == NULL) {
- obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
- sizeof(long), GFP_KERNEL);
- if (obj->bit_17 == NULL) {
- DRM_ERROR("Failed to allocate memory for bit 17 "
- "record\n");
- return;
- }
- }
-
- i = 0;
-
- for_each_sgt_page(page, sgt_iter, pages) {
- if (page_to_phys(page) & (1 << 17))
- __set_bit(i, obj->bit_17);
- else
- __clear_bit(i, obj->bit_17);
- i++;
- }
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
new file mode 100644
index 0000000..b820584
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -0,0 +1,722 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+
+/**
+ * DOC: fence register handling
+ *
+ * Important to avoid confusions: "fences" in the i915 driver are not execution
+ * fences used to track command completion but hardware detiler objects which
+ * wrap a given range of the global GTT. Each platform has only a fairly limited
+ * set of these objects.
+ *
+ * Fences are used to detile GTT memory mappings. They're also connected to the
+ * hardware frontbuffer render tracking and hence interact with frontbuffer
+ * compression. Furthermore on older platforms fences are required for tiled
+ * objects used by the display engine. They can also be used by the render
+ * engine - they're required for blitter commands and are optional for render
+ * commands. But on gen4+ both display (with the exception of fbc) and rendering
+ * have their own tiling state bits and don't need fences.
+ *
+ * Also note that fences only support X and Y tiling and hence can't be used for
+ * the fancier new tiling formats like W, Ys and Yf.
+ *
+ * Finally note that because fences are such a restricted resource they're
+ * dynamically associated with objects. Furthermore fence state is committed to
+ * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
+ * explicitly call i915_gem_object_get_fence() to synchronize fencing status
+ * for cpu access. Also note that some code wants an unfenced view, for those
+ * cases the fence can be removed forcefully with i915_gem_object_put_fence().
+ *
+ * Internally these functions will synchronize with userspace access by removing
+ * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
+ */
+
+#define pipelined 0
+
+static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
+{
+ i915_reg_t fence_reg_lo, fence_reg_hi;
+ int fence_pitch_shift;
+ u64 val;
+
+ if (INTEL_INFO(fence->i915)->gen >= 6) {
+ fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
+ fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
+ fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
+
+ } else {
+ fence_reg_lo = FENCE_REG_965_LO(fence->id);
+ fence_reg_hi = FENCE_REG_965_HI(fence->id);
+ fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
+ }
+
+ val = 0;
+ if (vma) {
+ unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+ bool is_y_tiled = tiling == I915_TILING_Y;
+ unsigned int stride = i915_gem_object_get_stride(vma->obj);
+ u32 row_size = stride * (is_y_tiled ? 32 : 8);
+ u32 size = rounddown((u32)vma->node.size, row_size);
+
+ val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
+ val |= vma->node.start & 0xfffff000;
+ val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
+ if (is_y_tiled)
+ val |= BIT(I965_FENCE_TILING_Y_SHIFT);
+ val |= I965_FENCE_REG_VALID;
+ }
+
+ if (!pipelined) {
+ struct drm_i915_private *dev_priv = fence->i915;
+
+ /* To w/a incoherency with non-atomic 64-bit register updates,
+ * we split the 64-bit update into two 32-bit writes. In order
+ * for a partial fence not to be evaluated between writes, we
+ * precede the update with write to turn off the fence register,
+ * and only enable the fence as the last step.
+ *
+ * For extra levels of paranoia, we make sure each step lands
+ * before applying the next step.
+ */
+ I915_WRITE(fence_reg_lo, 0);
+ POSTING_READ(fence_reg_lo);
+
+ I915_WRITE(fence_reg_hi, upper_32_bits(val));
+ I915_WRITE(fence_reg_lo, lower_32_bits(val));
+ POSTING_READ(fence_reg_lo);
+ }
+}
+
+static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
+{
+ u32 val;
+
+ val = 0;
+ if (vma) {
+ unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+ bool is_y_tiled = tiling == I915_TILING_Y;
+ unsigned int stride = i915_gem_object_get_stride(vma->obj);
+ int pitch_val;
+ int tile_width;
+
+ WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
+ !is_power_of_2(vma->node.size) ||
+ (vma->node.start & (vma->node.size - 1)),
+ "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n",
+ vma->node.start,
+ i915_vma_is_map_and_fenceable(vma),
+ vma->node.size);
+
+ if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
+ tile_width = 128;
+ else
+ tile_width = 512;
+
+ /* Note: pitch better be a power of two tile widths */
+ pitch_val = stride / tile_width;
+ pitch_val = ffs(pitch_val) - 1;
+
+ val = vma->node.start;
+ if (is_y_tiled)
+ val |= BIT(I830_FENCE_TILING_Y_SHIFT);
+ val |= I915_FENCE_SIZE_BITS(vma->node.size);
+ val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+ val |= I830_FENCE_REG_VALID;
+ }
+
+ if (!pipelined) {
+ struct drm_i915_private *dev_priv = fence->i915;
+ i915_reg_t reg = FENCE_REG(fence->id);
+
+ I915_WRITE(reg, val);
+ POSTING_READ(reg);
+ }
+}
+
+static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
+{
+ u32 val;
+
+ val = 0;
+ if (vma) {
+ unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+ bool is_y_tiled = tiling == I915_TILING_Y;
+ unsigned int stride = i915_gem_object_get_stride(vma->obj);
+ u32 pitch_val;
+
+ WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
+ !is_power_of_2(vma->node.size) ||
+ (vma->node.start & (vma->node.size - 1)),
+ "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n",
+ vma->node.start, vma->node.size);
+
+ pitch_val = stride / 128;
+ pitch_val = ffs(pitch_val) - 1;
+
+ val = vma->node.start;
+ if (is_y_tiled)
+ val |= BIT(I830_FENCE_TILING_Y_SHIFT);
+ val |= I830_FENCE_SIZE_BITS(vma->node.size);
+ val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+ val |= I830_FENCE_REG_VALID;
+ }
+
+ if (!pipelined) {
+ struct drm_i915_private *dev_priv = fence->i915;
+ i915_reg_t reg = FENCE_REG(fence->id);
+
+ I915_WRITE(reg, val);
+ POSTING_READ(reg);
+ }
+}
+
+static void fence_write(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
+{
+ /* Previous access through the fence register is marshalled by
+ * the mb() inside the fault handlers (i915_gem_release_mmaps)
+ * and explicitly managed for internal users.
+ */
+
+ if (IS_GEN2(fence->i915))
+ i830_write_fence_reg(fence, vma);
+ else if (IS_GEN3(fence->i915))
+ i915_write_fence_reg(fence, vma);
+ else
+ i965_write_fence_reg(fence, vma);
+
+ /* Access through the fenced region afterwards is
+ * ordered by the posting reads whilst writing the registers.
+ */
+
+ fence->dirty = false;
+}
+
+static int fence_update(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
+{
+ int ret;
+
+ if (vma) {
+ if (!i915_vma_is_map_and_fenceable(vma))
+ return -EINVAL;
+
+ if (WARN(!i915_gem_object_get_stride(vma->obj) ||
+ !i915_gem_object_get_tiling(vma->obj),
+ "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+ i915_gem_object_get_stride(vma->obj),
+ i915_gem_object_get_tiling(vma->obj)))
+ return -EINVAL;
+
+ ret = i915_gem_active_retire(&vma->last_fence,
+ &vma->obj->base.dev->struct_mutex);
+ if (ret)
+ return ret;
+ }
+
+ if (fence->vma) {
+ ret = i915_gem_active_retire(&fence->vma->last_fence,
+ &fence->vma->obj->base.dev->struct_mutex);
+ if (ret)
+ return ret;
+ }
+
+ if (fence->vma && fence->vma != vma) {
+ /* Ensure that all userspace CPU access is completed before
+ * stealing the fence.
+ */
+ i915_gem_release_mmap(fence->vma->obj);
+
+ fence->vma->fence = NULL;
+ fence->vma = NULL;
+
+ list_move(&fence->link, &fence->i915->mm.fence_list);
+ }
+
+ fence_write(fence, vma);
+
+ if (vma) {
+ if (fence->vma != vma) {
+ vma->fence = fence;
+ fence->vma = vma;
+ }
+
+ list_move_tail(&fence->link, &fence->i915->mm.fence_list);
+ }
+
+ return 0;
+}
+
+/**
+ * i915_vma_put_fence - force-remove fence for a VMA
+ * @vma: vma to map linearly (not through a fence reg)
+ *
+ * This function force-removes any fence from the given object, which is useful
+ * if the kernel wants to do untiled GTT access.
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+int
+i915_vma_put_fence(struct i915_vma *vma)
+{
+ struct drm_i915_fence_reg *fence = vma->fence;
+
+ assert_rpm_wakelock_held(to_i915(vma->vm->dev));
+
+ if (!fence)
+ return 0;
+
+ if (fence->pin_count)
+ return -EBUSY;
+
+ return fence_update(fence, NULL);
+}
+
+static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
+{
+ struct drm_i915_fence_reg *fence;
+
+ list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
+ if (fence->pin_count)
+ continue;
+
+ return fence;
+ }
+
+ /* Wait for completion of pending flips which consume fences */
+ if (intel_has_pending_fb_unpin(&dev_priv->drm))
+ return ERR_PTR(-EAGAIN);
+
+ return ERR_PTR(-EDEADLK);
+}
+
+/**
+ * i915_vma_get_fence - set up fencing for a vma
+ * @vma: vma to map through a fence reg
+ *
+ * When mapping objects through the GTT, userspace wants to be able to write
+ * to them without having to worry about swizzling if the object is tiled.
+ * This function walks the fence regs looking for a free one for @obj,
+ * stealing one if it can't find any.
+ *
+ * It then sets up the reg based on the object's properties: address, pitch
+ * and tiling format.
+ *
+ * For an untiled surface, this removes any existing fence.
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+int
+i915_vma_get_fence(struct i915_vma *vma)
+{
+ struct drm_i915_fence_reg *fence;
+ struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
+
+ /* Note that we revoke fences on runtime suspend. Therefore the user
+ * must keep the device awake whilst using the fence.
+ */
+ assert_rpm_wakelock_held(to_i915(vma->vm->dev));
+
+ /* Just update our place in the LRU if our fence is getting reused. */
+ if (vma->fence) {
+ fence = vma->fence;
+ if (!fence->dirty) {
+ list_move_tail(&fence->link,
+ &fence->i915->mm.fence_list);
+ return 0;
+ }
+ } else if (set) {
+ fence = fence_find(to_i915(vma->vm->dev));
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ } else
+ return 0;
+
+ return fence_update(fence, set);
+}
+
+/**
+ * i915_gem_restore_fences - restore fence state
+ * @dev: DRM device
+ *
+ * Restore the hw fence state to match the software tracking again, to be called
+ * after a gpu reset and on resume. Note that on runtime suspend we only cancel
+ * the fences, to be reacquired by the user later.
+ */
+void i915_gem_restore_fences(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = to_i915(dev);
+ int i;
+
+ /* Note that this may be called outside of struct_mutex, by
+ * runtime suspend/resume. The barrier we require is enforced by
+ * rpm itself - all access to fences/GTT are only within an rpm
+ * wakeref, and to acquire that wakeref you must pass through here.
+ */
+
+ for (i = 0; i < dev_priv->num_fence_regs; i++) {
+ struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+ struct i915_vma *vma = reg->vma;
+
+ /*
+ * Commit delayed tiling changes if we have an object still
+ * attached to the fence, otherwise just clear the fence.
+ */
+ if (vma && !i915_gem_object_is_tiled(vma->obj)) {
+ GEM_BUG_ON(!reg->dirty);
+ GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
+
+ list_move(®->link, &dev_priv->mm.fence_list);
+ vma->fence = NULL;
+ vma = NULL;
+ }
+
+ fence_write(reg, vma);
+ reg->vma = vma;
+ }
+}
+
+/**
+ * DOC: tiling swizzling details
+ *
+ * The idea behind tiling is to increase cache hit rates by rearranging
+ * pixel data so that a group of pixel accesses are in the same cacheline.
+ * Performance improvement from doing this on the back/depth buffer are on
+ * the order of 30%.
+ *
+ * Intel architectures make this somewhat more complicated, though, by
+ * adjustments made to addressing of data when the memory is in interleaved
+ * mode (matched pairs of DIMMS) to improve memory bandwidth.
+ * For interleaved memory, the CPU sends every sequential 64 bytes
+ * to an alternate memory channel so it can get the bandwidth from both.
+ *
+ * The GPU also rearranges its accesses for increased bandwidth to interleaved
+ * memory, and it matches what the CPU does for non-tiled. However, when tiled
+ * it does it a little differently, since one walks addresses not just in the
+ * X direction but also Y. So, along with alternating channels when bit
+ * 6 of the address flips, it also alternates when other bits flip -- Bits 9
+ * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
+ * are common to both the 915 and 965-class hardware.
+ *
+ * The CPU also sometimes XORs in higher bits as well, to improve
+ * bandwidth doing strided access like we do so frequently in graphics. This
+ * is called "Channel XOR Randomization" in the MCH documentation. The result
+ * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
+ * decode.
+ *
+ * All of this bit 6 XORing has an effect on our memory management,
+ * as we need to make sure that the 3d driver can correctly address object
+ * contents.
+ *
+ * If we don't have interleaved memory, all tiling is safe and no swizzling is
+ * required.
+ *
+ * When bit 17 is XORed in, we simply refuse to tile at all. Bit
+ * 17 is not just a page offset, so as we page an object out and back in,
+ * individual pages in it will have different bit 17 addresses, resulting in
+ * each 64 bytes being swapped with its neighbor!
+ *
+ * Otherwise, if interleaved, we have to tell the 3d driver what the address
+ * swizzling it needs to do is, since it's writing with the CPU to the pages
+ * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
+ * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
+ * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
+ * to match what the GPU expects.
+ */
+
+/**
+ * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
+ * @dev: DRM device
+ *
+ * Detects bit 6 swizzling of address lookup between IGD access and CPU
+ * access through main memory.
+ */
+void
+i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = to_i915(dev);
+ uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+
+ if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) {
+ /*
+ * On BDW+, swizzling is not used. We leave the CPU memory
+ * controller in charge of optimizing memory accesses without
+ * the extra address manipulation GPU side.
+ *
+ * VLV and CHV don't have GPU swizzling.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ } else if (INTEL_INFO(dev)->gen >= 6) {
+ if (dev_priv->preserve_bios_swizzle) {
+ if (I915_READ(DISP_ARB_CTL) &
+ DISP_TILE_SURFACE_SWIZZLING) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else {
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+ } else {
+ uint32_t dimm_c0, dimm_c1;
+ dimm_c0 = I915_READ(MAD_DIMM_C0);
+ dimm_c1 = I915_READ(MAD_DIMM_C1);
+ dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+ dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+ /* Enable swizzling when the channels are populated
+ * with identically sized dimms. We don't need to check
+ * the 3rd channel because no cpu with gpu attached
+ * ships in that configuration. Also, swizzling only
+ * makes sense for 2 channels anyway. */
+ if (dimm_c0 == dimm_c1) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else {
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+ }
+ } else if (IS_GEN5(dev_priv)) {
+ /* On Ironlake whatever DRAM config, GPU always do
+ * same swizzling setup.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else if (IS_GEN2(dev_priv)) {
+ /* As far as we know, the 865 doesn't have these bit 6
+ * swizzling issues.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) &&
+ !IS_G33(dev_priv))) {
+ uint32_t dcc;
+
+ /* On 9xx chipsets, channel interleave by the CPU is
+ * determined by DCC. For single-channel, neither the CPU
+ * nor the GPU do swizzling. For dual channel interleaved,
+ * the GPU's interleave is bit 9 and 10 for X tiled, and bit
+ * 9 for Y tiled. The CPU's interleave is independent, and
+ * can be based on either bit 11 (haven't seen this yet) or
+ * bit 17 (common).
+ */
+ dcc = I915_READ(DCC);
+ switch (dcc & DCC_ADDRESSING_MODE_MASK) {
+ case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
+ case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ break;
+ case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
+ if (dcc & DCC_CHANNEL_XOR_DISABLE) {
+ /* This is the base swizzling by the GPU for
+ * tiled buffers.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
+ /* Bit 11 swizzling by the CPU in addition. */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
+ swizzle_y = I915_BIT_6_SWIZZLE_9_11;
+ } else {
+ /* Bit 17 swizzling by the CPU in addition. */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
+ swizzle_y = I915_BIT_6_SWIZZLE_9_17;
+ }
+ break;
+ }
+
+ /* check for L-shaped memory aka modified enhanced addressing */
+ if (IS_GEN4(dev_priv) &&
+ !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
+ swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+ }
+
+ if (dcc == 0xffffffff) {
+ DRM_ERROR("Couldn't read from MCHBAR. "
+ "Disabling tiling.\n");
+ swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+ }
+ } else {
+ /* The 965, G33, and newer, have a very flexible memory
+ * configuration. It will enable dual-channel mode
+ * (interleaving) on as much memory as it can, and the GPU
+ * will additionally sometimes enable different bit 6
+ * swizzling for tiled objects from the CPU.
+ *
+ * Here's what I found on the G965:
+ * slot fill memory size swizzling
+ * 0A 0B 1A 1B 1-ch 2-ch
+ * 512 0 0 0 512 0 O
+ * 512 0 512 0 16 1008 X
+ * 512 0 0 512 16 1008 X
+ * 0 512 0 512 16 1008 X
+ * 1024 1024 1024 0 2048 1024 O
+ *
+ * We could probably detect this based on either the DRB
+ * matching, which was the case for the swizzling required in
+ * the table above, or from the 1-ch value being less than
+ * the minimum size of a rank.
+ *
+ * Reports indicate that the swizzling actually
+ * varies depending upon page placement inside the
+ * channels, i.e. we see swizzled pages where the
+ * banks of memory are paired and unswizzled on the
+ * uneven portion, so leave that as unknown.
+ */
+ if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ }
+ }
+
+ if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
+ swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
+ /* Userspace likes to explode if it sees unknown swizzling,
+ * so lie. We will finish the lie when reporting through
+ * the get-tiling-ioctl by reporting the physical swizzle
+ * mode as unknown instead.
+ *
+ * As we don't strictly know what the swizzling is, it may be
+ * bit17 dependent, and so we need to also prevent the pages
+ * from being moved.
+ */
+ dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+
+ dev_priv->mm.bit_6_swizzle_x = swizzle_x;
+ dev_priv->mm.bit_6_swizzle_y = swizzle_y;
+}
+
+/*
+ * Swap every 64 bytes of this page around, to account for it having a new
+ * bit 17 of its physical address and therefore being interpreted differently
+ * by the GPU.
+ */
+static void
+i915_gem_swizzle_page(struct page *page)
+{
+ char temp[64];
+ char *vaddr;
+ int i;
+
+ vaddr = kmap(page);
+
+ for (i = 0; i < PAGE_SIZE; i += 128) {
+ memcpy(temp, &vaddr[i], 64);
+ memcpy(&vaddr[i], &vaddr[i + 64], 64);
+ memcpy(&vaddr[i + 64], temp, 64);
+ }
+
+ kunmap(page);
+}
+
+/**
+ * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ * @pages: the scattergather list of physical pages
+ *
+ * This function fixes up the swizzling in case any page frame number for this
+ * object has changed in bit 17 since that state has been saved with
+ * i915_gem_object_save_bit_17_swizzle().
+ *
+ * This is called when pinning backing storage again, since the kernel is free
+ * to move unpinned backing storage around (either by directly moving pages or
+ * by swapping them out and back in again).
+ */
+void
+i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ struct sgt_iter sgt_iter;
+ struct page *page;
+ int i;
+
+ if (obj->bit_17 == NULL)
+ return;
+
+ i = 0;
+ for_each_sgt_page(page, sgt_iter, pages) {
+ char new_bit_17 = page_to_phys(page) >> 17;
+ if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
+ i915_gem_swizzle_page(page);
+ set_page_dirty(page);
+ }
+ i++;
+ }
+}
+
+/**
+ * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ * @pages: the scattergather list of physical pages
+ *
+ * This function saves the bit 17 of each page frame number so that swizzling
+ * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
+ * be called before the backing storage can be unpinned.
+ */
+void
+i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ const unsigned int page_count = obj->base.size >> PAGE_SHIFT;
+ struct sgt_iter sgt_iter;
+ struct page *page;
+ int i;
+
+ if (obj->bit_17 == NULL) {
+ obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
+ sizeof(long), GFP_KERNEL);
+ if (obj->bit_17 == NULL) {
+ DRM_ERROR("Failed to allocate memory for bit 17 "
+ "record\n");
+ return;
+ }
+ }
+
+ i = 0;
+
+ for_each_sgt_page(page, sgt_iter, pages) {
+ if (page_to_phys(page) & (1 << 17))
+ __set_bit(i, obj->bit_17);
+ else
+ __clear_bit(i, obj->bit_17);
+ i++;
+ }
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
new file mode 100644
index 0000000..22c4a2d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_FENCE_REG_H__
+#define __I915_FENCE_REG_H__
+
+#include <linux/list.h>
+
+struct drm_i915_private;
+struct i915_vma;
+
+struct drm_i915_fence_reg {
+ struct list_head link;
+ struct drm_i915_private *i915;
+ struct i915_vma *vma;
+ int pin_count;
+ int id;
+ /**
+ * Whether the tiling parameters for the currently
+ * associated fence register have changed. Note that
+ * for the purposes of tracking tiling changes we also
+ * treat the unfenced register, the register slot that
+ * the object occupies whilst it executes a fenced
+ * command (such as BLT on gen2/3), as a "fence".
+ */
+ bool dirty;
+};
+
+#endif
+
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 67606bf..2631f4f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -96,13 +96,6 @@
*
*/
-static inline struct i915_ggtt *
-i915_vm_to_ggtt(struct i915_address_space *vm)
-{
- GEM_BUG_ON(!i915_is_ggtt(vm));
- return container_of(vm, struct i915_ggtt, base);
-}
-
static int
i915_get_ggtt_vma_pages(struct i915_vma *vma);
@@ -3348,175 +3341,6 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
i915_ggtt_flush(dev_priv);
}
-static void
-i915_vma_retire(struct i915_gem_active *active,
- struct drm_i915_gem_request *rq)
-{
- const unsigned int idx = rq->engine->id;
- struct i915_vma *vma =
- container_of(active, struct i915_vma, last_read[idx]);
- struct drm_i915_gem_object *obj = vma->obj;
-
- GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
-
- i915_vma_clear_active(vma, idx);
- if (i915_vma_is_active(vma))
- return;
-
- list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
- if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
- WARN_ON(i915_vma_unbind(vma));
-
- GEM_BUG_ON(!i915_gem_object_is_active(obj));
- if (--obj->active_count)
- return;
-
- /* Bump our place on the bound list to keep it roughly in LRU order
- * so that we don't steal from recently used but inactive objects
- * (unless we are forced to ofc!)
- */
- if (obj->bind_count)
- list_move_tail(&obj->global_list, &rq->i915->mm.bound_list);
-
- obj->mm.dirty = true; /* be paranoid */
-
- if (i915_gem_object_has_active_reference(obj)) {
- i915_gem_object_clear_active_reference(obj);
- i915_gem_object_put(obj);
- }
-}
-
-static void
-i915_ggtt_retire__write(struct i915_gem_active *active,
- struct drm_i915_gem_request *request)
-{
- struct i915_vma *vma =
- container_of(active, struct i915_vma, last_write);
-
- intel_fb_obj_flush(vma->obj, true, ORIGIN_CS);
-}
-
-void i915_vma_destroy(struct i915_vma *vma)
-{
- GEM_BUG_ON(vma->node.allocated);
- GEM_BUG_ON(i915_vma_is_active(vma));
- GEM_BUG_ON(!i915_vma_is_closed(vma));
- GEM_BUG_ON(vma->fence);
-
- rb_erase(&vma->obj_node, &vma->obj->vma_tree);
- list_del(&vma->vm_link);
- if (!i915_vma_is_ggtt(vma))
- i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
-
- kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
-}
-
-void i915_vma_close(struct i915_vma *vma)
-{
- GEM_BUG_ON(i915_vma_is_closed(vma));
- vma->flags |= I915_VMA_CLOSED;
-
- list_del_init(&vma->obj_link);
- if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
- WARN_ON(i915_vma_unbind(vma));
-}
-
-static inline long vma_compare(struct i915_vma *vma,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *view)
-{
- GEM_BUG_ON(view && !i915_vma_is_ggtt(vma));
-
- if (vma->vm != vm)
- return vma->vm - vm;
-
- if (!view)
- return vma->ggtt_view.type;
-
- if (vma->ggtt_view.type != view->type)
- return vma->ggtt_view.type - view->type;
-
- return memcmp(&vma->ggtt_view.params,
- &view->params,
- sizeof(view->params));
-}
-
-static struct i915_vma *
-__i915_vma_create(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *view)
-{
- struct i915_vma *vma;
- struct rb_node *rb, **p;
- int i;
-
- GEM_BUG_ON(vm->closed);
-
- vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
- if (vma == NULL)
- return ERR_PTR(-ENOMEM);
-
- INIT_LIST_HEAD(&vma->exec_list);
- for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
- init_request_active(&vma->last_read[i], i915_vma_retire);
- init_request_active(&vma->last_write,
- i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL);
- init_request_active(&vma->last_fence, NULL);
- list_add(&vma->vm_link, &vm->unbound_list);
- vma->vm = vm;
- vma->obj = obj;
- vma->size = obj->base.size;
-
- if (view) {
- vma->ggtt_view = *view;
- if (view->type == I915_GGTT_VIEW_PARTIAL) {
- vma->size = view->params.partial.size;
- vma->size <<= PAGE_SHIFT;
- } else if (view->type == I915_GGTT_VIEW_ROTATED) {
- vma->size =
- intel_rotation_info_size(&view->params.rotated);
- vma->size <<= PAGE_SHIFT;
- }
- }
-
- if (i915_is_ggtt(vm)) {
- vma->flags |= I915_VMA_GGTT;
- list_add(&vma->obj_link, &obj->vma_list);
- } else {
- i915_ppgtt_get(i915_vm_to_ppgtt(vm));
- list_add_tail(&vma->obj_link, &obj->vma_list);
- }
-
- rb = NULL;
- p = &obj->vma_tree.rb_node;
- while (*p) {
- struct i915_vma *pos;
-
- rb = *p;
- pos = rb_entry(rb, struct i915_vma, obj_node);
- if (vma_compare(pos, vm, view) < 0)
- p = &rb->rb_right;
- else
- p = &rb->rb_left;
- }
- rb_link_node(&vma->obj_node, rb, p);
- rb_insert_color(&vma->obj_node, &obj->vma_tree);
-
- return vma;
-}
-
-struct i915_vma *
-i915_vma_create(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *view)
-{
- lockdep_assert_held(&obj->base.dev->struct_mutex);
- GEM_BUG_ON(view && !i915_is_ggtt(vm));
- GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
-
- return __i915_vma_create(obj, vm, view);
-}
-
struct i915_vma *
i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -3529,7 +3353,7 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
long cmp;
- cmp = vma_compare(vma, vm, view);
+ cmp = i915_vma_compare(vma, vm, view);
if (cmp == 0)
return vma;
@@ -3554,7 +3378,7 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
vma = i915_gem_obj_to_vma(obj, vm, view);
if (!vma) {
- vma = __i915_vma_create(obj, vm, view);
+ vma = i915_vma_create(obj, vm, view);
GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view));
}
@@ -3739,99 +3563,3 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
return ret;
}
-/**
- * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
- * @vma: VMA to map
- * @cache_level: mapping cache level
- * @flags: flags like global or local mapping
- *
- * DMA addresses are taken from the scatter-gather table of this object (or of
- * this VMA in case of non-default GGTT views) and PTE entries set up.
- * Note that DMA addresses are also the only part of the SG table we care about.
- */
-int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
- u32 flags)
-{
- u32 bind_flags;
- u32 vma_flags;
- int ret;
-
- if (WARN_ON(flags == 0))
- return -EINVAL;
-
- bind_flags = 0;
- if (flags & PIN_GLOBAL)
- bind_flags |= I915_VMA_GLOBAL_BIND;
- if (flags & PIN_USER)
- bind_flags |= I915_VMA_LOCAL_BIND;
-
- vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
- if (flags & PIN_UPDATE)
- bind_flags |= vma_flags;
- else
- bind_flags &= ~vma_flags;
- if (bind_flags == 0)
- return 0;
-
- if (vma_flags == 0 && vma->vm->allocate_va_range) {
- trace_i915_va_alloc(vma);
- ret = vma->vm->allocate_va_range(vma->vm,
- vma->node.start,
- vma->node.size);
- if (ret)
- return ret;
- }
-
- ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
- if (ret)
- return ret;
-
- vma->flags |= bind_flags;
- return 0;
-}
-
-void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
-{
- void __iomem *ptr;
-
- /* Access through the GTT requires the device to be awake. */
- assert_rpm_wakelock_held(to_i915(vma->vm->dev));
-
- lockdep_assert_held(&vma->vm->dev->struct_mutex);
- if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
- return IO_ERR_PTR(-ENODEV);
-
- GEM_BUG_ON(!i915_vma_is_ggtt(vma));
- GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
-
- ptr = vma->iomap;
- if (ptr == NULL) {
- ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
- vma->node.start,
- vma->node.size);
- if (ptr == NULL)
- return IO_ERR_PTR(-ENOMEM);
-
- vma->iomap = ptr;
- }
-
- __i915_vma_pin(vma);
- return ptr;
-}
-
-void i915_vma_unpin_and_release(struct i915_vma **p_vma)
-{
- struct i915_vma *vma;
- struct drm_i915_gem_object *obj;
-
- vma = fetch_and_zero(p_vma);
- if (!vma)
- return;
-
- obj = vma->obj;
-
- i915_vma_unpin(vma);
- i915_vma_close(vma);
-
- __i915_gem_object_release_unless_active(obj);
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c23ef9d..57b5849 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -35,7 +35,9 @@
#define __I915_GEM_GTT_H__
#include <linux/io-mapping.h>
+#include <linux/mm.h>
+#include "i915_gem_timeline.h"
#include "i915_gem_request.h"
#define I915_FENCE_REG_NONE -1
@@ -138,6 +140,8 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
+struct sg_table;
+
enum i915_ggtt_view_type {
I915_GGTT_VIEW_NORMAL = 0,
I915_GGTT_VIEW_ROTATED,
@@ -168,135 +172,7 @@ extern const struct i915_ggtt_view i915_ggtt_view_rotated;
enum i915_cache_level;
-/**
- * A VMA represents a GEM BO that is bound into an address space. Therefore, a
- * VMA's presence cannot be guaranteed before binding, or after unbinding the
- * object into/from the address space.
- *
- * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
- * will always be <= an objects lifetime. So object refcounting should cover us.
- */
-struct i915_vma {
- struct drm_mm_node node;
- struct drm_i915_gem_object *obj;
- struct i915_address_space *vm;
- struct drm_i915_fence_reg *fence;
- struct sg_table *pages;
- void __iomem *iomap;
- u64 size;
- u64 display_alignment;
-
- unsigned int flags;
- /**
- * How many users have pinned this object in GTT space. The following
- * users can each hold at most one reference: pwrite/pread, execbuffer
- * (objects are not allowed multiple times for the same batchbuffer),
- * and the framebuffer code. When switching/pageflipping, the
- * framebuffer code has at most two buffers pinned per crtc.
- *
- * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
- * bits with absolutely no headroom. So use 4 bits.
- */
-#define I915_VMA_PIN_MASK 0xf
-#define I915_VMA_PIN_OVERFLOW BIT(5)
-
- /** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND BIT(6)
-#define I915_VMA_LOCAL_BIND BIT(7)
-#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
-
-#define I915_VMA_GGTT BIT(8)
-#define I915_VMA_CAN_FENCE BIT(9)
-#define I915_VMA_CLOSED BIT(10)
-
- unsigned int active;
- struct i915_gem_active last_read[I915_NUM_ENGINES];
- struct i915_gem_active last_write;
- struct i915_gem_active last_fence;
-
- /**
- * Support different GGTT views into the same object.
- * This means there can be multiple VMA mappings per object and per VM.
- * i915_ggtt_view_type is used to distinguish between those entries.
- * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also
- * assumed in GEM functions which take no ggtt view parameter.
- */
- struct i915_ggtt_view ggtt_view;
-
- /** This object's place on the active/inactive lists */
- struct list_head vm_link;
-
- struct list_head obj_link; /* Link in the object's VMA list */
- struct rb_node obj_node;
-
- /** This vma's place in the batchbuffer or on the eviction list */
- struct list_head exec_list;
-
- /**
- * Used for performing relocations during execbuffer insertion.
- */
- struct hlist_node exec_node;
- unsigned long exec_handle;
- struct drm_i915_gem_exec_object2 *exec_entry;
-};
-
-struct i915_vma *
-i915_vma_create(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *view);
-void i915_vma_unpin_and_release(struct i915_vma **p_vma);
-
-static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
-{
- return vma->flags & I915_VMA_GGTT;
-}
-
-static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
-{
- return vma->flags & I915_VMA_CAN_FENCE;
-}
-
-static inline bool i915_vma_is_closed(const struct i915_vma *vma)
-{
- return vma->flags & I915_VMA_CLOSED;
-}
-
-static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
-{
- return vma->active;
-}
-
-static inline bool i915_vma_is_active(const struct i915_vma *vma)
-{
- return i915_vma_get_active(vma);
-}
-
-static inline void i915_vma_set_active(struct i915_vma *vma,
- unsigned int engine)
-{
- vma->active |= BIT(engine);
-}
-
-static inline void i915_vma_clear_active(struct i915_vma *vma,
- unsigned int engine)
-{
- vma->active &= ~BIT(engine);
-}
-
-static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
- unsigned int engine)
-{
- return vma->active & BIT(engine);
-}
-
-static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
-{
- GEM_BUG_ON(!i915_vma_is_ggtt(vma));
- GEM_BUG_ON(!vma->node.allocated);
- GEM_BUG_ON(upper_32_bits(vma->node.start));
- GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
- return lower_32_bits(vma->node.start);
-}
+struct i915_vma;
struct i915_page_dma {
struct page *page;
@@ -606,6 +482,13 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
px_dma(ppgtt->base.scratch_pd);
}
+static inline struct i915_ggtt *
+i915_vm_to_ggtt(struct i915_address_space *vm)
+{
+ GEM_BUG_ON(!i915_is_ggtt(vm));
+ return container_of(vm, struct i915_ggtt, base);
+}
+
int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
@@ -653,88 +536,4 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
#define PIN_OFFSET_FIXED BIT(11)
#define PIN_OFFSET_MASK (~4095)
-int __i915_vma_do_pin(struct i915_vma *vma,
- u64 size, u64 alignment, u64 flags);
-static inline int __must_check
-i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
-{
- BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
- BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
- BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
-
- /* Pin early to prevent the shrinker/eviction logic from destroying
- * our vma as we insert and bind.
- */
- if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0))
- return 0;
-
- return __i915_vma_do_pin(vma, size, alignment, flags);
-}
-
-static inline int i915_vma_pin_count(const struct i915_vma *vma)
-{
- return vma->flags & I915_VMA_PIN_MASK;
-}
-
-static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
-{
- return i915_vma_pin_count(vma);
-}
-
-static inline void __i915_vma_pin(struct i915_vma *vma)
-{
- vma->flags++;
- GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
-}
-
-static inline void __i915_vma_unpin(struct i915_vma *vma)
-{
- GEM_BUG_ON(!i915_vma_is_pinned(vma));
- vma->flags--;
-}
-
-static inline void i915_vma_unpin(struct i915_vma *vma)
-{
- GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
- __i915_vma_unpin(vma);
-}
-
-/**
- * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
- * @vma: VMA to iomap
- *
- * The passed in VMA has to be pinned in the global GTT mappable region.
- * An extra pinning of the VMA is acquired for the return iomapping,
- * the caller must call i915_vma_unpin_iomap to relinquish the pinning
- * after the iomapping is no longer required.
- *
- * Callers must hold the struct_mutex.
- *
- * Returns a valid iomapped pointer or ERR_PTR.
- */
-void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
-#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
-
-/**
- * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
- * @vma: VMA to unpin
- *
- * Unpins the previously iomapped VMA from i915_vma_pin_iomap().
- *
- * Callers must hold the struct_mutex. This function is only valid to be
- * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
- */
-static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
-{
- lockdep_assert_held(&vma->vm->dev->struct_mutex);
- GEM_BUG_ON(vma->iomap == NULL);
- i915_vma_unpin(vma);
-}
-
-static inline struct page *i915_vma_first_page(struct i915_vma *vma)
-{
- GEM_BUG_ON(!vma->pages);
- return sg_page(vma->pages->sgl);
-}
-
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
new file mode 100644
index 0000000..0826c25
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_GEM_OBJECT_H__
+#define __I915_GEM_OBJECT_H__
+
+#include <linux/reservation.h>
+
+#include <drm/drm_vma_manager.h>
+#include <drm/drm_gem.h>
+#include <drm/drmP.h>
+
+#include <drm/i915_drm.h>
+
+struct drm_i915_gem_object_ops {
+ unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
+#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2
+
+ /* Interface between the GEM object and its backing storage.
+ * get_pages() is called once prior to the use of the associated set
+ * of pages before to binding them into the GTT, and put_pages() is
+ * called after we no longer need them. As we expect there to be
+ * associated cost with migrating pages between the backing storage
+ * and making them available for the GPU (e.g. clflush), we may hold
+ * onto the pages after they are no longer referenced by the GPU
+ * in case they may be used again shortly (for example migrating the
+ * pages to a different memory domain within the GTT). put_pages()
+ * will therefore most likely be called when the object itself is
+ * being released or under memory pressure (where we attempt to
+ * reap pages for the shrinker).
+ */
+ struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
+ void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
+
+ int (*dmabuf_export)(struct drm_i915_gem_object *);
+ void (*release)(struct drm_i915_gem_object *);
+};
+
+struct drm_i915_gem_object {
+ struct drm_gem_object base;
+
+ const struct drm_i915_gem_object_ops *ops;
+
+ /** List of VMAs backed by this object */
+ struct list_head vma_list;
+ struct rb_root vma_tree;
+
+ /** Stolen memory for this object, instead of being backed by shmem. */
+ struct drm_mm_node *stolen;
+ struct list_head global_list;
+ union {
+ struct rcu_head rcu;
+ struct llist_node freed;
+ };
+
+ /**
+ * Whether the object is currently in the GGTT mmap.
+ */
+ struct list_head userfault_link;
+
+ /** Used in execbuf to temporarily hold a ref */
+ struct list_head obj_exec_link;
+
+ struct list_head batch_pool_link;
+
+ unsigned long flags;
+
+ /**
+ * Have we taken a reference for the object for incomplete GPU
+ * activity?
+ */
+#define I915_BO_ACTIVE_REF 0
+
+ /*
+ * Is the object to be mapped as read-only to the GPU
+ * Only honoured if hardware has relevant pte bit
+ */
+ unsigned long gt_ro:1;
+ unsigned int cache_level:3;
+ unsigned int cache_dirty:1;
+
+ atomic_t frontbuffer_bits;
+ unsigned int frontbuffer_ggtt_origin; /* write once */
+
+ /** Current tiling stride for the object, if it's tiled. */
+ unsigned int tiling_and_stride;
+#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
+#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
+#define STRIDE_MASK (~TILING_MASK)
+
+ /** Count of VMA actually bound by this object */
+ unsigned int bind_count;
+ unsigned int active_count;
+ unsigned int pin_display;
+
+ struct {
+ struct mutex lock; /* protects the pages and their use */
+ atomic_t pages_pin_count;
+
+ struct sg_table *pages;
+ void *mapping;
+
+ struct i915_gem_object_page_iter {
+ struct scatterlist *sg_pos;
+ unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+ struct radix_tree_root radix;
+ struct mutex lock; /* protects this cache */
+ } get_page;
+
+ /**
+ * Advice: are the backing pages purgeable?
+ */
+ unsigned int madv:2;
+
+ /**
+ * This is set if the object has been written to since the
+ * pages were last acquired.
+ */
+ bool dirty:1;
+
+ /**
+ * This is set if the object has been pinned due to unknown
+ * swizzling.
+ */
+ bool quirked:1;
+ } mm;
+
+ /** Breadcrumb of last rendering to the buffer.
+ * There can only be one writer, but we allow for multiple readers.
+ * If there is a writer that necessarily implies that all other
+ * read requests are complete - but we may only be lazily clearing
+ * the read requests. A read request is naturally the most recent
+ * request on a ring, so we may have two different write and read
+ * requests on one ring where the write request is older than the
+ * read request. This allows for the CPU to read from an active
+ * buffer by only waiting for the write to complete.
+ */
+ struct reservation_object *resv;
+
+ /** References from framebuffers, locks out tiling changes. */
+ unsigned long framebuffer_references;
+
+ /** Record of address bit 17 of each page at last unbind. */
+ unsigned long *bit_17;
+
+ struct i915_gem_userptr {
+ uintptr_t ptr;
+ unsigned read_only :1;
+
+ struct i915_mm_struct *mm;
+ struct i915_mmu_object *mmu_object;
+ struct work_struct *work;
+ } userptr;
+
+ /** for phys allocated objects */
+ struct drm_dma_handle *phys_handle;
+
+ struct reservation_object __builtin_resv;
+};
+
+static inline struct drm_i915_gem_object *
+to_intel_bo(struct drm_gem_object *gem)
+{
+ /* Assert that to_intel_bo(NULL) == NULL */
+ BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
+
+ return container_of(gem, struct drm_i915_gem_object, base);
+}
+
+/**
+ * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
+ * @filp: DRM file private date
+ * @handle: userspace handle
+ *
+ * Returns:
+ *
+ * A pointer to the object named by the handle if such exists on @filp, NULL
+ * otherwise. This object is only valid whilst under the RCU read lock, and
+ * note carefully the object may be in the process of being destroyed.
+ */
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map));
+#endif
+ return idr_find(&file->object_idr, handle);
+}
+
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup(struct drm_file *file, u32 handle)
+{
+ struct drm_i915_gem_object *obj;
+
+ rcu_read_lock();
+ obj = i915_gem_object_lookup_rcu(file, handle);
+ if (obj && !kref_get_unless_zero(&obj->base.refcount))
+ obj = NULL;
+ rcu_read_unlock();
+
+ return obj;
+}
+
+__deprecated
+extern struct drm_gem_object *
+drm_gem_object_lookup(struct drm_file *file, u32 handle);
+
+__attribute__((nonnull))
+static inline struct drm_i915_gem_object *
+i915_gem_object_get(struct drm_i915_gem_object *obj)
+{
+ drm_gem_object_reference(&obj->base);
+ return obj;
+}
+
+__deprecated
+extern void drm_gem_object_reference(struct drm_gem_object *);
+
+__attribute__((nonnull))
+static inline void
+i915_gem_object_put(struct drm_i915_gem_object *obj)
+{
+ __drm_gem_object_unreference(&obj->base);
+}
+
+__deprecated
+extern void drm_gem_object_unreference(struct drm_gem_object *);
+
+__deprecated
+extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
+
+static inline bool
+i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
+{
+ return atomic_read(&obj->base.refcount.refcount) == 0;
+}
+
+static inline bool
+i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
+{
+ return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
+}
+
+static inline bool
+i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
+{
+ return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE;
+}
+
+static inline bool
+i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
+{
+ return obj->active_count;
+}
+
+static inline bool
+i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
+{
+ return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+static inline void
+i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
+{
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ __set_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+static inline void
+i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
+{
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ __clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
+
+static inline unsigned int
+i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
+{
+ return obj->tiling_and_stride & TILING_MASK;
+}
+
+static inline bool
+i915_gem_object_is_tiled(struct drm_i915_gem_object *obj)
+{
+ return i915_gem_object_get_tiling(obj) != I915_TILING_NONE;
+}
+
+static inline unsigned int
+i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
+{
+ return obj->tiling_and_stride & STRIDE_MASK;
+}
+
+static inline struct intel_engine_cs *
+i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
+{
+ struct intel_engine_cs *engine = NULL;
+ struct dma_fence *fence;
+
+ rcu_read_lock();
+ fence = reservation_object_get_excl_rcu(obj->resv);
+ rcu_read_unlock();
+
+ if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
+ engine = to_request(fence)->engine;
+ dma_fence_put(fence);
+
+ return engine;
+}
+
+#endif
+
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 75f8360..b0b59b3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -30,6 +30,9 @@
#include "i915_gem.h"
#include "i915_sw_fence.h"
+struct drm_file;
+struct drm_i915_gem_object;
+
struct intel_wait {
struct rb_node node;
struct task_struct *tsk;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
new file mode 100644
index 0000000..a5b84dd
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -0,0 +1,648 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_vma.h"
+
+#include "i915_drv.h"
+#include "intel_ringbuffer.h"
+#include "intel_frontbuffer.h"
+
+#include <drm/drm_gem.h>
+
+static void
+i915_vma_retire(struct i915_gem_active *active,
+ struct drm_i915_gem_request *rq)
+{
+ const unsigned int idx = rq->engine->id;
+ struct i915_vma *vma =
+ container_of(active, struct i915_vma, last_read[idx]);
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
+
+ i915_vma_clear_active(vma, idx);
+ if (i915_vma_is_active(vma))
+ return;
+
+ list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
+ WARN_ON(i915_vma_unbind(vma));
+
+ GEM_BUG_ON(!i915_gem_object_is_active(obj));
+ if (--obj->active_count)
+ return;
+
+ /* Bump our place on the bound list to keep it roughly in LRU order
+ * so that we don't steal from recently used but inactive objects
+ * (unless we are forced to ofc!)
+ */
+ if (obj->bind_count)
+ list_move_tail(&obj->global_list, &rq->i915->mm.bound_list);
+
+ obj->mm.dirty = true; /* be paranoid */
+
+ if (i915_gem_object_has_active_reference(obj)) {
+ i915_gem_object_clear_active_reference(obj);
+ i915_gem_object_put(obj);
+ }
+}
+
+static void
+i915_ggtt_retire__write(struct i915_gem_active *active,
+ struct drm_i915_gem_request *request)
+{
+ struct i915_vma *vma =
+ container_of(active, struct i915_vma, last_write);
+
+ intel_fb_obj_flush(vma->obj, true, ORIGIN_CS);
+}
+
+static struct i915_vma *
+__i915_vma_create(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view)
+{
+ struct i915_vma *vma;
+ struct rb_node *rb, **p;
+ int i;
+
+ GEM_BUG_ON(vm->closed);
+
+ vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
+ if (vma == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&vma->exec_list);
+ for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
+ init_request_active(&vma->last_read[i], i915_vma_retire);
+ init_request_active(&vma->last_write,
+ i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL);
+ init_request_active(&vma->last_fence, NULL);
+ list_add(&vma->vm_link, &vm->unbound_list);
+ vma->vm = vm;
+ vma->obj = obj;
+ vma->size = obj->base.size;
+
+ if (view) {
+ vma->ggtt_view = *view;
+ if (view->type == I915_GGTT_VIEW_PARTIAL) {
+ vma->size = view->params.partial.size;
+ vma->size <<= PAGE_SHIFT;
+ } else if (view->type == I915_GGTT_VIEW_ROTATED) {
+ vma->size =
+ intel_rotation_info_size(&view->params.rotated);
+ vma->size <<= PAGE_SHIFT;
+ }
+ }
+
+ if (i915_is_ggtt(vm)) {
+ vma->flags |= I915_VMA_GGTT;
+ list_add(&vma->obj_link, &obj->vma_list);
+ } else {
+ i915_ppgtt_get(i915_vm_to_ppgtt(vm));
+ list_add_tail(&vma->obj_link, &obj->vma_list);
+ }
+
+ rb = NULL;
+ p = &obj->vma_tree.rb_node;
+ while (*p) {
+ struct i915_vma *pos;
+
+ rb = *p;
+ pos = rb_entry(rb, struct i915_vma, obj_node);
+ if (i915_vma_compare(pos, vm, view) < 0)
+ p = &rb->rb_right;
+ else
+ p = &rb->rb_left;
+ }
+ rb_link_node(&vma->obj_node, rb, p);
+ rb_insert_color(&vma->obj_node, &obj->vma_tree);
+
+ return vma;
+}
+
+struct i915_vma *
+i915_vma_create(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view)
+{
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ GEM_BUG_ON(view && !i915_is_ggtt(vm));
+ GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
+
+ return __i915_vma_create(obj, vm, view);
+}
+
+/**
+ * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
+ * @vma: VMA to map
+ * @cache_level: mapping cache level
+ * @flags: flags like global or local mapping
+ *
+ * DMA addresses are taken from the scatter-gather table of this object (or of
+ * this VMA in case of non-default GGTT views) and PTE entries set up.
+ * Note that DMA addresses are also the only part of the SG table we care about.
+ */
+int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
+ u32 flags)
+{
+ u32 bind_flags;
+ u32 vma_flags;
+ int ret;
+
+ if (WARN_ON(flags == 0))
+ return -EINVAL;
+
+ bind_flags = 0;
+ if (flags & PIN_GLOBAL)
+ bind_flags |= I915_VMA_GLOBAL_BIND;
+ if (flags & PIN_USER)
+ bind_flags |= I915_VMA_LOCAL_BIND;
+
+ vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
+ if (flags & PIN_UPDATE)
+ bind_flags |= vma_flags;
+ else
+ bind_flags &= ~vma_flags;
+ if (bind_flags == 0)
+ return 0;
+
+ if (vma_flags == 0 && vma->vm->allocate_va_range) {
+ trace_i915_va_alloc(vma);
+ ret = vma->vm->allocate_va_range(vma->vm,
+ vma->node.start,
+ vma->node.size);
+ if (ret)
+ return ret;
+ }
+
+ ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
+ if (ret)
+ return ret;
+
+ vma->flags |= bind_flags;
+ return 0;
+}
+
+void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
+{
+ void __iomem *ptr;
+
+ /* Access through the GTT requires the device to be awake. */
+ assert_rpm_wakelock_held(to_i915(vma->vm->dev));
+
+ lockdep_assert_held(&vma->vm->dev->struct_mutex);
+ if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
+ return IO_ERR_PTR(-ENODEV);
+
+ GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+ GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
+
+ ptr = vma->iomap;
+ if (ptr == NULL) {
+ ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
+ vma->node.start,
+ vma->node.size);
+ if (ptr == NULL)
+ return IO_ERR_PTR(-ENOMEM);
+
+ vma->iomap = ptr;
+ }
+
+ __i915_vma_pin(vma);
+ return ptr;
+}
+
+void i915_vma_unpin_and_release(struct i915_vma **p_vma)
+{
+ struct i915_vma *vma;
+ struct drm_i915_gem_object *obj;
+
+ vma = fetch_and_zero(p_vma);
+ if (!vma)
+ return;
+
+ obj = vma->obj;
+
+ i915_vma_unpin(vma);
+ i915_vma_close(vma);
+
+ __i915_gem_object_release_unless_active(obj);
+}
+
+bool
+i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+{
+ if (!drm_mm_node_allocated(&vma->node))
+ return false;
+
+ if (vma->node.size < size)
+ return true;
+
+ if (alignment && vma->node.start & (alignment - 1))
+ return true;
+
+ if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
+ return true;
+
+ if (flags & PIN_OFFSET_BIAS &&
+ vma->node.start < (flags & PIN_OFFSET_MASK))
+ return true;
+
+ if (flags & PIN_OFFSET_FIXED &&
+ vma->node.start != (flags & PIN_OFFSET_MASK))
+ return true;
+
+ return false;
+}
+
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ bool mappable, fenceable;
+ u32 fence_size, fence_alignment;
+
+ fence_size = i915_gem_get_ggtt_size(dev_priv,
+ vma->size,
+ i915_gem_object_get_tiling(obj));
+ fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
+ vma->size,
+ i915_gem_object_get_tiling(obj),
+ true);
+
+ fenceable = (vma->node.size == fence_size &&
+ (vma->node.start & (fence_alignment - 1)) == 0);
+
+ mappable = (vma->node.start + fence_size <=
+ dev_priv->ggtt.mappable_end);
+
+ /*
+ * Explicitly disable for rotated VMA since the display does not
+ * need the fence and the VMA is not accessible to other users.
+ */
+ if (mappable && fenceable &&
+ vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED)
+ vma->flags |= I915_VMA_CAN_FENCE;
+ else
+ vma->flags &= ~I915_VMA_CAN_FENCE;
+}
+
+bool i915_gem_valid_gtt_space(struct i915_vma *vma,
+ unsigned long cache_level)
+{
+ struct drm_mm_node *gtt_space = &vma->node;
+ struct drm_mm_node *other;
+
+ /*
+ * On some machines we have to be careful when putting differing types
+ * of snoopable memory together to avoid the prefetcher crossing memory
+ * domains and dying. During vm initialisation, we decide whether or not
+ * these constraints apply and set the drm_mm.color_adjust
+ * appropriately.
+ */
+ if (vma->vm->mm.color_adjust == NULL)
+ return true;
+
+ if (!drm_mm_node_allocated(gtt_space))
+ return true;
+
+ if (list_empty(>t_space->node_list))
+ return true;
+
+ other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
+ if (other->allocated && !other->hole_follows && other->color != cache_level)
+ return false;
+
+ other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
+ if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
+ return false;
+
+ return true;
+}
+
+/**
+ * i915_vma_insert - finds a slot for the vma in its address space
+ * @vma: the vma
+ * @size: requested size in bytes (can be larger than the VMA)
+ * @alignment: required alignment
+ * @flags: mask of PIN_* flags to use
+ *
+ * First we try to allocate some free space that meets the requirements for
+ * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
+ * preferrably the oldest idle entry to make room for the new VMA.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+static int
+i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+{
+ struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
+ struct drm_i915_gem_object *obj = vma->obj;
+ u64 start, end;
+ int ret;
+
+ GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+ GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+
+ size = max(size, vma->size);
+ if (flags & PIN_MAPPABLE)
+ size = i915_gem_get_ggtt_size(dev_priv, size,
+ i915_gem_object_get_tiling(obj));
+
+ alignment = max(max(alignment, vma->display_alignment),
+ i915_gem_get_ggtt_alignment(dev_priv, size,
+ i915_gem_object_get_tiling(obj),
+ flags & PIN_MAPPABLE));
+
+ start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
+
+ end = vma->vm->total;
+ if (flags & PIN_MAPPABLE)
+ end = min_t(u64, end, dev_priv->ggtt.mappable_end);
+ if (flags & PIN_ZONE_4G)
+ end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
+
+ /* If binding the object/GGTT view requires more space than the entire
+ * aperture has, reject it early before evicting everything in a vain
+ * attempt to find space.
+ */
+ if (size > end) {
+ DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
+ size, obj->base.size,
+ flags & PIN_MAPPABLE ? "mappable" : "total",
+ end);
+ return -E2BIG;
+ }
+
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ return ret;
+
+ if (flags & PIN_OFFSET_FIXED) {
+ u64 offset = flags & PIN_OFFSET_MASK;
+ if (offset & (alignment - 1) || offset > end - size) {
+ ret = -EINVAL;
+ goto err_unpin;
+ }
+
+ vma->node.start = offset;
+ vma->node.size = size;
+ vma->node.color = obj->cache_level;
+ ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
+ if (ret) {
+ ret = i915_gem_evict_for_vma(vma);
+ if (ret == 0)
+ ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
+ if (ret)
+ goto err_unpin;
+ }
+ } else {
+ u32 search_flag, alloc_flag;
+
+ if (flags & PIN_HIGH) {
+ search_flag = DRM_MM_SEARCH_BELOW;
+ alloc_flag = DRM_MM_CREATE_TOP;
+ } else {
+ search_flag = DRM_MM_SEARCH_DEFAULT;
+ alloc_flag = DRM_MM_CREATE_DEFAULT;
+ }
+
+ /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
+ * so we know that we always have a minimum alignment of 4096.
+ * The drm_mm range manager is optimised to return results
+ * with zero alignment, so where possible use the optimal
+ * path.
+ */
+ if (alignment <= 4096)
+ alignment = 0;
+
+search_free:
+ ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
+ &vma->node,
+ size, alignment,
+ obj->cache_level,
+ start, end,
+ search_flag,
+ alloc_flag);
+ if (ret) {
+ ret = i915_gem_evict_something(vma->vm, size, alignment,
+ obj->cache_level,
+ start, end,
+ flags);
+ if (ret == 0)
+ goto search_free;
+
+ goto err_unpin;
+ }
+
+ GEM_BUG_ON(vma->node.start < start);
+ GEM_BUG_ON(vma->node.start + vma->node.size > end);
+ }
+ GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
+
+ list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
+ list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ obj->bind_count++;
+
+ return 0;
+
+err_unpin:
+ i915_gem_object_unpin_pages(obj);
+ return ret;
+}
+
+int __i915_vma_do_pin(struct i915_vma *vma,
+ u64 size, u64 alignment, u64 flags)
+{
+ unsigned int bound = vma->flags;
+ int ret;
+
+ lockdep_assert_held(&vma->vm->dev->struct_mutex);
+ GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
+ GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
+
+ if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ if ((bound & I915_VMA_BIND_MASK) == 0) {
+ ret = i915_vma_insert(vma, size, alignment, flags);
+ if (ret)
+ goto err;
+ }
+
+ ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
+ if (ret)
+ goto err;
+
+ if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
+ __i915_vma_set_map_and_fenceable(vma);
+
+ GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
+ return 0;
+
+err:
+ __i915_vma_unpin(vma);
+ return ret;
+}
+
+void i915_vma_destroy(struct i915_vma *vma)
+{
+ GEM_BUG_ON(vma->node.allocated);
+ GEM_BUG_ON(i915_vma_is_active(vma));
+ GEM_BUG_ON(!i915_vma_is_closed(vma));
+ GEM_BUG_ON(vma->fence);
+
+ rb_erase(&vma->obj_node, &vma->obj->vma_tree);
+ list_del(&vma->vm_link);
+ if (!i915_vma_is_ggtt(vma))
+ i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
+
+ kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
+}
+
+void i915_vma_close(struct i915_vma *vma)
+{
+ GEM_BUG_ON(i915_vma_is_closed(vma));
+ vma->flags |= I915_VMA_CLOSED;
+
+ list_del_init(&vma->obj_link);
+ if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
+ WARN_ON(i915_vma_unbind(vma));
+}
+
+static void __i915_vma_iounmap(struct i915_vma *vma)
+{
+ GEM_BUG_ON(i915_vma_is_pinned(vma));
+
+ if (vma->iomap == NULL)
+ return;
+
+ io_mapping_unmap(vma->iomap);
+ vma->iomap = NULL;
+}
+
+int i915_vma_unbind(struct i915_vma *vma)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+ unsigned long active;
+ int ret;
+
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+ /* First wait upon any activity as retiring the request may
+ * have side-effects such as unpinning or even unbinding this vma.
+ */
+ active = i915_vma_get_active(vma);
+ if (active) {
+ int idx;
+
+ /* When a closed VMA is retired, it is unbound - eek.
+ * In order to prevent it from being recursively closed,
+ * take a pin on the vma so that the second unbind is
+ * aborted.
+ *
+ * Even more scary is that the retire callback may free
+ * the object (last active vma). To prevent the explosion
+ * we defer the actual object free to a worker that can
+ * only proceed once it acquires the struct_mutex (which
+ * we currently hold, therefore it cannot free this object
+ * before we are finished).
+ */
+ __i915_vma_pin(vma);
+
+ for_each_active(active, idx) {
+ ret = i915_gem_active_retire(&vma->last_read[idx],
+ &vma->vm->dev->struct_mutex);
+ if (ret)
+ break;
+ }
+
+ __i915_vma_unpin(vma);
+ if (ret)
+ return ret;
+
+ GEM_BUG_ON(i915_vma_is_active(vma));
+ }
+
+ if (i915_vma_is_pinned(vma))
+ return -EBUSY;
+
+ if (!drm_mm_node_allocated(&vma->node))
+ goto destroy;
+
+ GEM_BUG_ON(obj->bind_count == 0);
+ GEM_BUG_ON(!obj->mm.pages);
+
+ if (i915_vma_is_map_and_fenceable(vma)) {
+ /* release the fence reg _after_ flushing */
+ ret = i915_vma_put_fence(vma);
+ if (ret)
+ return ret;
+
+ /* Force a pagefault for domain tracking on next user access */
+ i915_gem_release_mmap(obj);
+
+ __i915_vma_iounmap(vma);
+ vma->flags &= ~I915_VMA_CAN_FENCE;
+ }
+
+ if (likely(!vma->vm->closed)) {
+ trace_i915_vma_unbind(vma);
+ vma->vm->unbind_vma(vma);
+ }
+ vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
+
+ drm_mm_remove_node(&vma->node);
+ list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+
+ if (vma->pages != obj->mm.pages) {
+ GEM_BUG_ON(!vma->pages);
+ sg_free_table(vma->pages);
+ kfree(vma->pages);
+ }
+ vma->pages = NULL;
+
+ /* Since the unbound list is global, only move to that list if
+ * no more VMAs exist. */
+ if (--obj->bind_count == 0)
+ list_move_tail(&obj->global_list,
+ &to_i915(obj->base.dev)->mm.unbound_list);
+
+ /* And finally now the object is completely decoupled from this vma,
+ * we can drop its hold on the backing storage and allow it to be
+ * reaped by the shrinker.
+ */
+ i915_gem_object_unpin_pages(obj);
+
+destroy:
+ if (unlikely(i915_vma_is_closed(vma)))
+ i915_vma_destroy(vma);
+
+ return 0;
+}
+
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
new file mode 100644
index 0000000..d358b30
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -0,0 +1,342 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_VMA_H__
+#define __I915_VMA_H__
+
+#include <linux/io-mapping.h>
+
+#include <drm/drm_mm.h>
+
+#include "i915_gem_gtt.h"
+#include "i915_gem_fence_reg.h"
+#include "i915_gem_object.h"
+#include "i915_gem_request.h"
+
+
+enum i915_cache_level;
+
+/**
+ * A VMA represents a GEM BO that is bound into an address space. Therefore, a
+ * VMA's presence cannot be guaranteed before binding, or after unbinding the
+ * object into/from the address space.
+ *
+ * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
+ * will always be <= an objects lifetime. So object refcounting should cover us.
+ */
+struct i915_vma {
+ struct drm_mm_node node;
+ struct drm_i915_gem_object *obj;
+ struct i915_address_space *vm;
+ struct drm_i915_fence_reg *fence;
+ struct sg_table *pages;
+ void __iomem *iomap;
+ u64 size;
+ u64 display_alignment;
+
+ unsigned int flags;
+ /**
+ * How many users have pinned this object in GTT space. The following
+ * users can each hold at most one reference: pwrite/pread, execbuffer
+ * (objects are not allowed multiple times for the same batchbuffer),
+ * and the framebuffer code. When switching/pageflipping, the
+ * framebuffer code has at most two buffers pinned per crtc.
+ *
+ * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
+ * bits with absolutely no headroom. So use 4 bits.
+ */
+#define I915_VMA_PIN_MASK 0xf
+#define I915_VMA_PIN_OVERFLOW BIT(5)
+
+ /** Flags and address space this VMA is bound to */
+#define I915_VMA_GLOBAL_BIND BIT(6)
+#define I915_VMA_LOCAL_BIND BIT(7)
+#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
+
+#define I915_VMA_GGTT BIT(8)
+#define I915_VMA_CAN_FENCE BIT(9)
+#define I915_VMA_CLOSED BIT(10)
+
+ unsigned int active;
+ struct i915_gem_active last_read[I915_NUM_ENGINES];
+ struct i915_gem_active last_write;
+ struct i915_gem_active last_fence;
+
+ /**
+ * Support different GGTT views into the same object.
+ * This means there can be multiple VMA mappings per object and per VM.
+ * i915_ggtt_view_type is used to distinguish between those entries.
+ * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also
+ * assumed in GEM functions which take no ggtt view parameter.
+ */
+ struct i915_ggtt_view ggtt_view;
+
+ /** This object's place on the active/inactive lists */
+ struct list_head vm_link;
+
+ struct list_head obj_link; /* Link in the object's VMA list */
+ struct rb_node obj_node;
+
+ /** This vma's place in the batchbuffer or on the eviction list */
+ struct list_head exec_list;
+
+ /**
+ * Used for performing relocations during execbuffer insertion.
+ */
+ struct hlist_node exec_node;
+ unsigned long exec_handle;
+ struct drm_i915_gem_exec_object2 *exec_entry;
+};
+
+struct i915_vma *
+i915_vma_create(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view);
+
+static inline long
+i915_vma_compare(struct i915_vma *vma,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view)
+{
+ GEM_BUG_ON(view && !i915_vma_is_ggtt(vma));
+
+ if (vma->vm != vm)
+ return vma->vm - vm;
+
+ if (!view)
+ return vma->ggtt_view.type;
+
+ if (vma->ggtt_view.type != view->type)
+ return vma->ggtt_view.type - view->type;
+
+ return memcmp(&vma->ggtt_view.params,
+ &view->params,
+ sizeof(view->params));
+}
+
+void i915_vma_unpin_and_release(struct i915_vma **p_vma);
+
+static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
+{
+ return vma->flags & I915_VMA_GGTT;
+}
+
+static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
+{
+ return vma->flags & I915_VMA_CAN_FENCE;
+}
+
+static inline bool i915_vma_is_closed(const struct i915_vma *vma)
+{
+ return vma->flags & I915_VMA_CLOSED;
+}
+
+static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
+{
+ return vma->active;
+}
+
+static inline bool i915_vma_is_active(const struct i915_vma *vma)
+{
+ return i915_vma_get_active(vma);
+}
+
+static inline void i915_vma_set_active(struct i915_vma *vma,
+ unsigned int engine)
+{
+ vma->active |= BIT(engine);
+}
+
+static inline void i915_vma_clear_active(struct i915_vma *vma,
+ unsigned int engine)
+{
+ vma->active &= ~BIT(engine);
+}
+
+static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
+ unsigned int engine)
+{
+ return vma->active & BIT(engine);
+}
+
+static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
+{
+ GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+ GEM_BUG_ON(!vma->node.allocated);
+ GEM_BUG_ON(upper_32_bits(vma->node.start));
+ GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
+ return lower_32_bits(vma->node.start);
+}
+
+static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
+{
+ i915_gem_object_get(vma->obj);
+ return vma;
+}
+
+static inline void i915_vma_put(struct i915_vma *vma)
+{
+ i915_gem_object_put(vma->obj);
+}
+
+int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
+ u32 flags);
+bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level);
+bool
+i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
+int __must_check i915_vma_unbind(struct i915_vma *vma);
+void i915_vma_close(struct i915_vma *vma);
+void i915_vma_destroy(struct i915_vma *vma);
+
+int __i915_vma_do_pin(struct i915_vma *vma,
+ u64 size, u64 alignment, u64 flags);
+static inline int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+{
+ BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
+ BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
+ BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
+
+ /* Pin early to prevent the shrinker/eviction logic from destroying
+ * our vma as we insert and bind.
+ */
+ if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0))
+ return 0;
+
+ return __i915_vma_do_pin(vma, size, alignment, flags);
+}
+
+static inline int i915_vma_pin_count(const struct i915_vma *vma)
+{
+ return vma->flags & I915_VMA_PIN_MASK;
+}
+
+static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
+{
+ return i915_vma_pin_count(vma);
+}
+
+static inline void __i915_vma_pin(struct i915_vma *vma)
+{
+ vma->flags++;
+ GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
+}
+
+static inline void __i915_vma_unpin(struct i915_vma *vma)
+{
+ GEM_BUG_ON(!i915_vma_is_pinned(vma));
+ vma->flags--;
+}
+
+static inline void i915_vma_unpin(struct i915_vma *vma)
+{
+ GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+ __i915_vma_unpin(vma);
+}
+
+/**
+ * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
+ * @vma: VMA to iomap
+ *
+ * The passed in VMA has to be pinned in the global GTT mappable region.
+ * An extra pinning of the VMA is acquired for the return iomapping,
+ * the caller must call i915_vma_unpin_iomap to relinquish the pinning
+ * after the iomapping is no longer required.
+ *
+ * Callers must hold the struct_mutex.
+ *
+ * Returns a valid iomapped pointer or ERR_PTR.
+ */
+void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
+#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
+
+/**
+ * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
+ * @vma: VMA to unpin
+ *
+ * Unpins the previously iomapped VMA from i915_vma_pin_iomap().
+ *
+ * Callers must hold the struct_mutex. This function is only valid to be
+ * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
+ */
+static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
+{
+ lockdep_assert_held(&vma->vm->dev->struct_mutex);
+ GEM_BUG_ON(vma->iomap == NULL);
+ i915_vma_unpin(vma);
+}
+
+static inline struct page *i915_vma_first_page(struct i915_vma *vma)
+{
+ GEM_BUG_ON(!vma->pages);
+ return sg_page(vma->pages->sgl);
+}
+
+/**
+ * i915_vma_pin_fence - pin fencing state
+ * @vma: vma to pin fencing for
+ *
+ * This pins the fencing state (whether tiled or untiled) to make sure the
+ * vma (and its object) is ready to be used as a scanout target. Fencing
+ * status must be synchronize first by calling i915_vma_get_fence():
+ *
+ * The resulting fence pin reference must be released again with
+ * i915_vma_unpin_fence().
+ *
+ * Returns:
+ *
+ * True if the vma has a fence, false otherwise.
+ */
+static inline bool
+i915_vma_pin_fence(struct i915_vma *vma)
+{
+ lockdep_assert_held(&vma->vm->dev->struct_mutex);
+ if (vma->fence) {
+ vma->fence->pin_count++;
+ return true;
+ } else
+ return false;
+}
+
+/**
+ * i915_vma_unpin_fence - unpin fencing state
+ * @vma: vma to unpin fencing for
+ *
+ * This releases the fence pin reference acquired through
+ * i915_vma_pin_fence. It will handle both objects with and without an
+ * attached fence correctly, callers do not need to distinguish this.
+ */
+static inline void
+i915_vma_unpin_fence(struct i915_vma *vma)
+{
+ lockdep_assert_held(&vma->vm->dev->struct_mutex);
+ if (vma->fence) {
+ GEM_BUG_ON(vma->fence->pin_count <= 0);
+ vma->fence->pin_count--;
+ }
+}
+
+#endif
+
--
2.7.4
More information about the Intel-gfx
mailing list