[Intel-gfx] [PATCH 29/45] drm/i915: Move mmap and friends to its own file
Chris Wilson
chris at chris-wilson.co.uk
Thu Apr 25 09:19:48 UTC 2019
Continuing the decluttering of i915_gem.c, now the turn of do_mmap and
the faulthandlers
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/gem/i915_gem_mman.c | 505 ++++++++++++++++
drivers/gpu/drm/i915/gem/i915_gem_object.c | 56 ++
drivers/gpu/drm/i915/gem/i915_gem_object.h | 7 +
.../drm/i915/gem/selftests/i915_gem_mman.c | 503 ++++++++++++++++
drivers/gpu/drm/i915/i915_drv.h | 1 -
drivers/gpu/drm/i915/i915_gem.c | 561 +-----------------
drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +-
.../gpu/drm/i915/selftests/i915_gem_object.c | 487 ---------------
.../drm/i915/selftests/i915_live_selftests.h | 1 +
10 files changed, 1088 insertions(+), 1036 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_mman.c
create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 84277b29389c..b86af182b1ac 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -88,6 +88,7 @@ i915-y += $(gt-y)
obj-y += gem/
gem-y += \
gem/i915_gem_object.o \
+ gem/i915_gem_mman.o \
gem/i915_gem_pages.o \
gem/i915_gem_phys.o \
gem/i915_gem_shmem.o
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
new file mode 100644
index 000000000000..1bcc6e1091e9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -0,0 +1,505 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/mman.h>
+#include <linux/sizes.h>
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+#include "../i915_drv.h"
+#include "../intel_drv.h"
+
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+ unsigned long addr, unsigned long size)
+{
+ if (vma->vm_file != filp)
+ return false;
+
+ return vma->vm_start == addr &&
+ (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
+}
+
+/**
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ * it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ *
+ * While the mapping holds a reference on the contents of the object, it doesn't
+ * imply a ref on the object itself.
+ *
+ * IMPORTANT:
+ *
+ * DRM driver writers who look a this function as an example for how to do GEM
+ * mmap support, please don't implement mmap support like here. The modern way
+ * to implement DRM mmap support is with an mmap offset ioctl (like
+ * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
+ * That way debug tooling like valgrind will understand what's going on, hiding
+ * the mmap call in a driver private ioctl will break that. The i915 driver only
+ * does cpu mmaps this way because we didn't know better.
+ */
+int
+i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_i915_gem_mmap *args = data;
+ struct drm_i915_gem_object *obj;
+ unsigned long addr;
+
+ if (args->flags & ~(I915_MMAP_WC))
+ return -EINVAL;
+
+ if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+ return -ENODEV;
+
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ /* prime objects have no backing filp to GEM mmap
+ * pages from.
+ */
+ if (!obj->base.filp) {
+ addr = -ENXIO;
+ goto err;
+ }
+
+ if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
+ addr = -EINVAL;
+ goto err;
+ }
+
+ addr = vm_mmap(obj->base.filp, 0, args->size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ args->offset);
+ if (IS_ERR_VALUE(addr))
+ goto err;
+
+ if (args->flags & I915_MMAP_WC) {
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ if (down_write_killable(&mm->mmap_sem)) {
+ addr = -EINTR;
+ goto err;
+ }
+ vma = find_vma(mm, addr);
+ if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
+ vma->vm_page_prot =
+ pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+ else
+ addr = -ENOMEM;
+ up_write(&mm->mmap_sem);
+ if (IS_ERR_VALUE(addr))
+ goto err;
+
+ /* This may race, but that's ok, it only gets set */
+ WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
+ }
+ i915_gem_object_put(obj);
+
+ args->addr_ptr = (u64)addr;
+ return 0;
+
+err:
+ i915_gem_object_put(obj);
+ return addr;
+}
+
+static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
+{
+ return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
+}
+
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ * aligned and suitable for fencing, and still fit into the available
+ * mappable space left by the pinned display objects. A classic problem
+ * we called the page-fault-of-doom where we would ping-pong between
+ * two objects that could not fit inside the GTT and so the memcpy
+ * would page one object in at the expense of the other between every
+ * single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ * object is too large for the available space (or simply too large
+ * for the mappable aperture!), a view is created instead and faulted
+ * into userspace. (This view is aligned and sized appropriately for
+ * fenced access.)
+ *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ * delayed writes via GTT before performing direct access via WC.
+ *
+ * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
+ * pagefault; swapin remains transparent.
+ *
+ * Restrictions:
+ *
+ * * snoopable objects cannot be accessed via the GTT. It can cause machine
+ * hangs on some architectures, corruption on others. An attempt to service
+ * a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ * * the object must be able to fit into RAM (physical memory, though no
+ * limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ * * a new GTT page fault will synchronize rendering from the GPU and flush
+ * all data to system memory. Subsequent access will not be synchronized.
+ *
+ * * all mappings are revoked on runtime device suspend.
+ *
+ * * there are only 8, 16 or 32 fence registers to share between all users
+ * (older machines require fence register for display and blitter access
+ * as well). Contention of the fence registers will cause the previous users
+ * to be unmapped and any new access will generate new page faults.
+ *
+ * * running out of memory while servicing a fault may generate a SIGBUS,
+ * rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+ return 3;
+}
+
+static inline struct i915_ggtt_view
+compute_partial_view(const struct drm_i915_gem_object *obj,
+ pgoff_t page_offset,
+ unsigned int chunk)
+{
+ struct i915_ggtt_view view;
+
+ if (i915_gem_object_is_tiled(obj))
+ chunk = roundup(chunk, tile_row_pages(obj));
+
+ view.type = I915_GGTT_VIEW_PARTIAL;
+ view.partial.offset = rounddown(page_offset, chunk);
+ view.partial.size =
+ min_t(unsigned int, chunk,
+ (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
+
+ /* If the partial covers the entire object, just create a normal VMA. */
+ if (chunk >= obj->base.size >> PAGE_SHIFT)
+ view.type = I915_GGTT_VIEW_NORMAL;
+
+ return view;
+}
+
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * @vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace. The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room. So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ *
+ * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
+ */
+vm_fault_t i915_gem_fault(struct vm_fault *vmf)
+{
+#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
+ struct vm_area_struct *area = vmf->vma;
+ struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
+ struct drm_device *dev = obj->base.dev;
+ struct drm_i915_private *i915 = to_i915(dev);
+ struct i915_ggtt *ggtt = &i915->ggtt;
+ bool write = area->vm_flags & VM_WRITE;
+ intel_wakeref_t wakeref;
+ struct i915_vma *vma;
+ pgoff_t page_offset;
+ int srcu;
+ int ret;
+
+ /* Sanity check that we allow writing into this object */
+ if (i915_gem_object_is_readonly(obj) && write)
+ return VM_FAULT_SIGBUS;
+
+ /* We don't use vmf->pgoff since that has the fake offset */
+ page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+
+ trace_i915_gem_object_fault(obj, page_offset, true, write);
+
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ goto err;
+
+ wakeref = intel_runtime_pm_get(i915);
+
+ srcu = i915_reset_trylock(i915);
+ if (srcu < 0) {
+ ret = srcu;
+ goto err_rpm;
+ }
+
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret)
+ goto err_reset;
+
+ /* Access to snoopable pages through the GTT is incoherent. */
+ if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+ ret = -EFAULT;
+ goto err_unlock;
+ }
+
+ /* Now pin it into the GTT as needed */
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK |
+ PIN_NONFAULT);
+ if (IS_ERR(vma)) {
+ /* Use a partial view if it is bigger than available space */
+ struct i915_ggtt_view view =
+ compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+ unsigned int flags;
+
+ flags = PIN_MAPPABLE;
+ if (view.type == I915_GGTT_VIEW_NORMAL)
+ flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
+
+ /*
+ * Userspace is now writing through an untracked VMA, abandon
+ * all hope that the hardware is able to track future writes.
+ */
+ obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
+
+ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+ if (IS_ERR(vma) && !view.type) {
+ flags = PIN_MAPPABLE;
+ view.type = I915_GGTT_VIEW_PARTIAL;
+ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+ }
+ }
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err_unlock;
+ }
+
+ ret = i915_vma_pin_fence(vma);
+ if (ret)
+ goto err_unpin;
+
+ /* Finally, remap it using the new GTT offset */
+ ret = remap_io_mapping(area,
+ area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
+ (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
+ min_t(u64, vma->size, area->vm_end - area->vm_start),
+ &ggtt->iomap);
+ if (ret)
+ goto err_fence;
+
+ /* Mark as being mmapped into userspace for later revocation */
+ assert_rpm_wakelock_held(i915);
+ if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
+ list_add(&obj->userfault_link, &i915->mm.userfault_list);
+ GEM_BUG_ON(!obj->userfault_count);
+
+ i915_vma_set_ggtt_write(vma);
+
+err_fence:
+ i915_vma_unpin_fence(vma);
+err_unpin:
+ __i915_vma_unpin(vma);
+err_unlock:
+ mutex_unlock(&dev->struct_mutex);
+err_reset:
+ i915_reset_unlock(i915, srcu);
+err_rpm:
+ intel_runtime_pm_put(i915, wakeref);
+ i915_gem_object_unpin_pages(obj);
+err:
+ switch (ret) {
+ case -EIO:
+ /*
+ * We eat errors when the gpu is terminally wedged to avoid
+ * userspace unduly crashing (gl has no provisions for mmaps to
+ * fail). But any other -EIO isn't ours (e.g. swap in failure)
+ * and so needs to be reported.
+ */
+ if (!i915_terminally_wedged(i915))
+ return VM_FAULT_SIGBUS;
+ /* else: fall through */
+ case -EAGAIN:
+ /*
+ * EAGAIN means the gpu is hung and we'll wait for the error
+ * handler to reset everything when re-faulting in
+ * i915_mutex_lock_interruptible.
+ */
+ case 0:
+ case -ERESTARTSYS:
+ case -EINTR:
+ case -EBUSY:
+ /*
+ * EBUSY is ok: this just means that another thread
+ * already did the job.
+ */
+ return VM_FAULT_NOPAGE;
+ case -ENOMEM:
+ return VM_FAULT_OOM;
+ case -ENOSPC:
+ case -EFAULT:
+ return VM_FAULT_SIGBUS;
+ default:
+ WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
+ return VM_FAULT_SIGBUS;
+ }
+}
+
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!obj->userfault_count);
+
+ obj->userfault_count = 0;
+ list_del(&obj->userfault_link);
+ drm_vma_node_unmap(&obj->base.vma_node,
+ obj->base.dev->anon_inode->i_mapping);
+
+ for_each_ggtt_vma(vma, obj)
+ i915_vma_unset_userfault(vma);
+}
+
+/**
+ * i915_gem_object_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmapping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ *
+ * It is vital that we remove the page mapping if we have mapped a tiled
+ * object through the GTT and then lose the fence register due to
+ * resource pressure. Similarly if the object has been moved out of the
+ * aperture, than pages mapped into userspace must be revoked. Removing the
+ * mapping will then trigger a page fault on the next user access, allowing
+ * fixup by i915_gem_fault().
+ */
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ intel_wakeref_t wakeref;
+
+ /* Serialisation between user GTT access and our code depends upon
+ * revoking the CPU's PTE whilst the mutex is held. The next user
+ * pagefault then has to wait until we release the mutex.
+ *
+ * Note that RPM complicates somewhat by adding an additional
+ * requirement that operations to the GGTT be made holding the RPM
+ * wakeref.
+ */
+ lockdep_assert_held(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ if (!obj->userfault_count)
+ goto out;
+
+ __i915_gem_object_release_mmap(obj);
+
+ /* Ensure that the CPU's PTE are revoked and there are not outstanding
+ * memory transactions from userspace before we return. The TLB
+ * flushing implied above by changing the PTE above *should* be
+ * sufficient, an extra barrier here just provides us with a bit
+ * of paranoid documentation about our requirement to serialise
+ * memory writes before touching registers / GSM.
+ */
+ wmb();
+
+out:
+ intel_runtime_pm_put(i915, wakeref);
+}
+
+static int create_mmap_offset(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ int err;
+
+ err = drm_gem_create_mmap_offset(&obj->base);
+ if (likely(!err))
+ return 0;
+
+ /* Attempt to reap some mmap space from dead objects */
+ do {
+ err = i915_gem_wait_for_idle(i915,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ break;
+
+ i915_gem_drain_freed_objects(i915);
+ err = drm_gem_create_mmap_offset(&obj->base);
+ if (!err)
+ break;
+
+ } while (flush_delayed_work(&i915->gem.retire_work));
+
+ return err;
+}
+
+int
+i915_gem_mmap_gtt(struct drm_file *file,
+ struct drm_device *dev,
+ u32 handle,
+ u64 *offset)
+{
+ struct drm_i915_gem_object *obj;
+ int ret;
+
+ obj = i915_gem_object_lookup(file, handle);
+ if (!obj)
+ return -ENOENT;
+
+ ret = create_mmap_offset(obj);
+ if (ret == 0)
+ *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+
+ i915_gem_object_put(obj);
+ return ret;
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_i915_gem_mmap_gtt *args = data;
+
+ return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_mman.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 86e7e88817af..005e17b3acce 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -26,6 +26,7 @@
#include "../i915_drv.h"
#include "../i915_globals.h"
+#include "../i915_gem_clflush.h"
#include "../intel_frontbuffer.h"
static struct i915_global_object {
@@ -357,6 +358,61 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
i915_gem_object_put(obj);
}
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+ return (domain == I915_GEM_DOMAIN_GTT ?
+ obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ return !(obj->cache_level == I915_CACHE_NONE ||
+ obj->cache_level == I915_CACHE_WT);
+}
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+ unsigned int flush_domains)
+{
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ struct i915_vma *vma;
+
+ if (!(obj->write_domain & flush_domains))
+ return;
+
+ switch (obj->write_domain) {
+ case I915_GEM_DOMAIN_GTT:
+ i915_gem_flush_ggtt_writes(dev_priv);
+
+ intel_fb_obj_flush(obj,
+ fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+ for_each_ggtt_vma(vma, obj) {
+ if (vma->iomap)
+ continue;
+
+ i915_vma_unset_ggtt_write(vma);
+ }
+ break;
+
+ case I915_GEM_DOMAIN_WC:
+ wmb();
+ break;
+
+ case I915_GEM_DOMAIN_CPU:
+ i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+ break;
+
+ case I915_GEM_DOMAIN_RENDER:
+ if (gpu_write_needs_clflush(obj))
+ obj->cache_dirty = true;
+ break;
+ }
+
+ obj->write_domain = 0;
+}
+
void i915_gem_init__objects(struct drm_i915_private *i915)
{
INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 2e963a593245..9bf8155d27de 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -346,6 +346,13 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
i915_gem_object_unpin_pages(obj);
}
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+ unsigned int flush_domains);
+
static inline struct intel_engine_cs *
i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
{
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
new file mode 100644
index 000000000000..031e7ad875e3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -0,0 +1,503 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "gt/intel_gt_pm.h"
+#include "i915_selftest.h"
+#include "selftests/huge_gem_object.h"
+#include "selftests/igt_flush_test.h"
+
+struct tile {
+ unsigned int width;
+ unsigned int height;
+ unsigned int stride;
+ unsigned int size;
+ unsigned int tiling;
+ unsigned int swizzle;
+};
+
+static u64 swizzle_bit(unsigned int bit, u64 offset)
+{
+ return (offset & BIT_ULL(bit)) >> (bit - 6);
+}
+
+static u64 tiled_offset(const struct tile *tile, u64 v)
+{
+ u64 x, y;
+
+ if (tile->tiling == I915_TILING_NONE)
+ return v;
+
+ y = div64_u64_rem(v, tile->stride, &x);
+ v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
+
+ if (tile->tiling == I915_TILING_X) {
+ v += y * tile->width;
+ v += div64_u64_rem(x, tile->width, &x) << tile->size;
+ v += x;
+ } else if (tile->width == 128) {
+ const unsigned int ytile_span = 16;
+ const unsigned int ytile_height = 512;
+
+ v += y * ytile_span;
+ v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+ v += x;
+ } else {
+ const unsigned int ytile_span = 32;
+ const unsigned int ytile_height = 256;
+
+ v += y * ytile_span;
+ v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+ v += x;
+ }
+
+ switch (tile->swizzle) {
+ case I915_BIT_6_SWIZZLE_9:
+ v ^= swizzle_bit(9, v);
+ break;
+ case I915_BIT_6_SWIZZLE_9_10:
+ v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
+ break;
+ case I915_BIT_6_SWIZZLE_9_11:
+ v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
+ break;
+ case I915_BIT_6_SWIZZLE_9_10_11:
+ v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
+ break;
+ }
+
+ return v;
+}
+
+static int check_partial_mapping(struct drm_i915_gem_object *obj,
+ const struct tile *tile,
+ unsigned long end_time)
+{
+ const unsigned int nreal = obj->scratch / PAGE_SIZE;
+ const unsigned long npages = obj->base.size / PAGE_SIZE;
+ struct i915_vma *vma;
+ unsigned long page;
+ int err;
+
+ if (igt_timeout(end_time,
+ "%s: timed out before tiling=%d stride=%d\n",
+ __func__, tile->tiling, tile->stride))
+ return -EINTR;
+
+ err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
+ if (err) {
+ pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
+ tile->tiling, tile->stride, err);
+ return err;
+ }
+
+ GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
+ GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
+
+ for_each_prime_number_from(page, 1, npages) {
+ struct i915_ggtt_view view =
+ compute_partial_view(obj, page, MIN_CHUNK_PAGES);
+ u32 __iomem *io;
+ struct page *p;
+ unsigned int n;
+ u64 offset;
+ u32 *cpu;
+
+ GEM_BUG_ON(view.partial.size > nreal);
+ cond_resched();
+
+ err = i915_gem_object_set_to_gtt_domain(obj, true);
+ if (err) {
+ pr_err("Failed to flush to GTT write domain; err=%d\n",
+ err);
+ return err;
+ }
+
+ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+ if (IS_ERR(vma)) {
+ pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
+ page, (int)PTR_ERR(vma));
+ return PTR_ERR(vma);
+ }
+
+ n = page - view.partial.offset;
+ GEM_BUG_ON(n >= view.partial.size);
+
+ io = i915_vma_pin_iomap(vma);
+ i915_vma_unpin(vma);
+ if (IS_ERR(io)) {
+ pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+ page, (int)PTR_ERR(io));
+ return PTR_ERR(io);
+ }
+
+ iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
+ i915_vma_unpin_iomap(vma);
+
+ offset = tiled_offset(tile, page << PAGE_SHIFT);
+ if (offset >= obj->base.size)
+ continue;
+
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+ p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+ cpu = kmap(p) + offset_in_page(offset);
+ drm_clflush_virt_range(cpu, sizeof(*cpu));
+ if (*cpu != (u32)page) {
+ pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
+ page, n,
+ view.partial.offset,
+ view.partial.size,
+ vma->size >> PAGE_SHIFT,
+ tile->tiling ? tile_row_pages(obj) : 0,
+ vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
+ offset >> PAGE_SHIFT,
+ (unsigned int)offset_in_page(offset),
+ offset,
+ (u32)page, *cpu);
+ err = -EINVAL;
+ }
+ *cpu = 0;
+ drm_clflush_virt_range(cpu, sizeof(*cpu));
+ kunmap(p);
+ if (err)
+ return err;
+
+ i915_vma_destroy(vma);
+ }
+
+ return 0;
+}
+
+static int igt_partial_tiling(void *arg)
+{
+ const unsigned int nreal = 1 << 12; /* largest tile row x2 */
+ struct drm_i915_private *i915 = arg;
+ struct drm_i915_gem_object *obj;
+ intel_wakeref_t wakeref;
+ int tiling;
+ int err;
+
+ /* We want to check the page mapping and fencing of a large object
+ * mmapped through the GTT. The object we create is larger than can
+ * possibly be mmaped as a whole, and so we must use partial GGTT vma.
+ * We then check that a write through each partial GGTT vma ends up
+ * in the right set of pages within the object, and with the expected
+ * tiling, which we verify by manual swizzling.
+ */
+
+ obj = huge_gem_object(i915,
+ nreal << PAGE_SHIFT,
+ (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err) {
+ pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+ nreal, obj->base.size / PAGE_SIZE, err);
+ goto out;
+ }
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ if (1) {
+ IGT_TIMEOUT(end);
+ struct tile tile;
+
+ tile.height = 1;
+ tile.width = 1;
+ tile.size = 0;
+ tile.stride = 0;
+ tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
+ tile.tiling = I915_TILING_NONE;
+
+ err = check_partial_mapping(obj, &tile, end);
+ if (err && err != -EINTR)
+ goto out_unlock;
+ }
+
+ for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
+ IGT_TIMEOUT(end);
+ unsigned int max_pitch;
+ unsigned int pitch;
+ struct tile tile;
+
+ if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+ /*
+ * The swizzling pattern is actually unknown as it
+ * varies based on physical address of each page.
+ * See i915_gem_detect_bit_6_swizzle().
+ */
+ break;
+
+ tile.tiling = tiling;
+ switch (tiling) {
+ case I915_TILING_X:
+ tile.swizzle = i915->mm.bit_6_swizzle_x;
+ break;
+ case I915_TILING_Y:
+ tile.swizzle = i915->mm.bit_6_swizzle_y;
+ break;
+ }
+
+ GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
+ if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
+ tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
+ continue;
+
+ if (INTEL_GEN(i915) <= 2) {
+ tile.height = 16;
+ tile.width = 128;
+ tile.size = 11;
+ } else if (tile.tiling == I915_TILING_Y &&
+ HAS_128_BYTE_Y_TILING(i915)) {
+ tile.height = 32;
+ tile.width = 128;
+ tile.size = 12;
+ } else {
+ tile.height = 8;
+ tile.width = 512;
+ tile.size = 12;
+ }
+
+ if (INTEL_GEN(i915) < 4)
+ max_pitch = 8192 / tile.width;
+ else if (INTEL_GEN(i915) < 7)
+ max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
+ else
+ max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
+
+ for (pitch = max_pitch; pitch; pitch >>= 1) {
+ tile.stride = tile.width * pitch;
+ err = check_partial_mapping(obj, &tile, end);
+ if (err == -EINTR)
+ goto next_tiling;
+ if (err)
+ goto out_unlock;
+
+ if (pitch > 2 && INTEL_GEN(i915) >= 4) {
+ tile.stride = tile.width * (pitch - 1);
+ err = check_partial_mapping(obj, &tile, end);
+ if (err == -EINTR)
+ goto next_tiling;
+ if (err)
+ goto out_unlock;
+ }
+
+ if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
+ tile.stride = tile.width * (pitch + 1);
+ err = check_partial_mapping(obj, &tile, end);
+ if (err == -EINTR)
+ goto next_tiling;
+ if (err)
+ goto out_unlock;
+ }
+ }
+
+ if (INTEL_GEN(i915) >= 4) {
+ for_each_prime_number(pitch, max_pitch) {
+ tile.stride = tile.width * pitch;
+ err = check_partial_mapping(obj, &tile, end);
+ if (err == -EINTR)
+ goto next_tiling;
+ if (err)
+ goto out_unlock;
+ }
+ }
+
+next_tiling: ;
+ }
+
+out_unlock:
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ i915_gem_object_unpin_pages(obj);
+out:
+ i915_gem_object_put(obj);
+ return err;
+}
+
+static int make_obj_busy(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ int err;
+
+ vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ return err;
+
+ rq = i915_request_create(i915->engine[RCS0]->kernel_context);
+ if (IS_ERR(rq)) {
+ i915_vma_unpin(vma);
+ return PTR_ERR(rq);
+ }
+
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+ i915_request_add(rq);
+
+ __i915_gem_object_release_unless_active(obj);
+ i915_vma_unpin(vma);
+
+ return err;
+}
+
+static bool assert_mmap_offset(struct drm_i915_private *i915,
+ unsigned long size,
+ int expected)
+{
+ struct drm_i915_gem_object *obj;
+ int err;
+
+ obj = i915_gem_object_create_internal(i915, size);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = create_mmap_offset(obj);
+ i915_gem_object_put(obj);
+
+ return err == expected;
+}
+
+static void disable_retire_worker(struct drm_i915_private *i915)
+{
+ i915_gem_shrinker_unregister(i915);
+
+ intel_gt_pm_get(i915);
+
+ cancel_delayed_work_sync(&i915->gem.retire_work);
+ cancel_delayed_work_sync(&i915->gem.idle_work);
+}
+
+static void restore_retire_worker(struct drm_i915_private *i915)
+{
+ intel_gt_pm_put(i915);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ igt_flush_test(i915, I915_WAIT_LOCKED);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ i915_gem_shrinker_register(i915);
+}
+
+static int igt_mmap_offset_exhaustion(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
+ struct drm_i915_gem_object *obj;
+ struct drm_mm_node resv, *hole;
+ u64 hole_start, hole_end;
+ int loop, err;
+
+ /* Disable background reaper */
+ disable_retire_worker(i915);
+ GEM_BUG_ON(!i915->gt.awake);
+
+ /* Trim the device mmap space to only a page */
+ memset(&resv, 0, sizeof(resv));
+ drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
+ resv.start = hole_start;
+ resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
+ err = drm_mm_reserve_node(mm, &resv);
+ if (err) {
+ pr_err("Failed to trim VMA manager, err=%d\n", err);
+ goto out_park;
+ }
+ break;
+ }
+
+ /* Just fits! */
+ if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
+ pr_err("Unable to insert object into single page hole\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Too large */
+ if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
+ pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Fill the hole, further allocation attempts should then fail */
+ obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out;
+ }
+
+ err = create_mmap_offset(obj);
+ if (err) {
+ pr_err("Unable to insert object into reclaimed hole\n");
+ goto err_obj;
+ }
+
+ if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
+ pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
+ err = -EINVAL;
+ goto err_obj;
+ }
+
+ i915_gem_object_put(obj);
+
+ /* Now fill with busy dead objects that we expect to reap */
+ for (loop = 0; loop < 3; loop++) {
+ if (i915_terminally_wedged(i915))
+ break;
+
+ obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out;
+ }
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = make_obj_busy(obj);
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (err) {
+ pr_err("[loop %d] Failed to busy the object\n", loop);
+ goto err_obj;
+ }
+
+ /* NB we rely on the _active_ reference to access obj now */
+ GEM_BUG_ON(!i915_gem_object_is_active(obj));
+ err = create_mmap_offset(obj);
+ if (err) {
+ pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
+ loop, err);
+ goto out;
+ }
+ }
+
+out:
+ drm_mm_remove_node(&resv);
+out_park:
+ restore_retire_worker(i915);
+ return err;
+err_obj:
+ i915_gem_object_put(obj);
+ goto out;
+}
+
+int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_partial_tiling),
+ SUBTEST(igt_mmap_offset_exhaustion),
+ };
+
+ return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ac9723466488..c4893dd9489f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2878,7 +2878,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
u64 flags);
int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
-void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0549b73f01ff..78d99841cf76 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -380,12 +380,6 @@ i915_gem_dumb_create(struct drm_file *file,
&args->size, &args->handle);
}
-static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
- return !(obj->cache_level == I915_CACHE_NONE ||
- obj->cache_level == I915_CACHE_WT);
-}
-
/**
* Creates a new mm object and returns a handle to it.
* @dev: drm device pointer
@@ -405,13 +399,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
&args->size, &args->handle);
}
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
- return (domain == I915_GEM_DOMAIN_GTT ?
- obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
{
intel_wakeref_t wakeref;
@@ -451,47 +438,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
}
}
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
-{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- struct i915_vma *vma;
-
- if (!(obj->write_domain & flush_domains))
- return;
-
- switch (obj->write_domain) {
- case I915_GEM_DOMAIN_GTT:
- i915_gem_flush_ggtt_writes(dev_priv);
-
- intel_fb_obj_flush(obj,
- fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
-
- for_each_ggtt_vma(vma, obj) {
- if (vma->iomap)
- continue;
-
- i915_vma_unset_ggtt_write(vma);
- }
- break;
-
- case I915_GEM_DOMAIN_WC:
- wmb();
- break;
-
- case I915_GEM_DOMAIN_CPU:
- i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
- break;
-
- case I915_GEM_DOMAIN_RENDER:
- if (gpu_write_needs_clflush(obj))
- obj->cache_dirty = true;
- break;
- }
-
- obj->write_domain = 0;
-}
-
/*
* Pins the specified object's pages and synchronizes the object with
* GPU accesses. Sets needs_clflush to non-zero if the caller should
@@ -528,7 +474,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
goto out;
}
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* If we're not in the cpu read domain, set ourself into the gtt
* read domain and manually flush cachelines (if required). This
@@ -580,7 +526,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
goto out;
}
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* If we're not in the cpu write domain, set ourself into the
* gtt write domain and manually flush cachelines (as required).
@@ -1183,6 +1129,13 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
spin_unlock(&i915->mm.obj_lock);
}
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+ return (domain == I915_GEM_DOMAIN_GTT ?
+ obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
/**
* Called when user space prepares to use an object with the CPU, either
* through the mmap ioctl's mapping or a GTT mapping.
@@ -1326,420 +1279,6 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
return 0;
}
-static inline bool
-__vma_matches(struct vm_area_struct *vma, struct file *filp,
- unsigned long addr, unsigned long size)
-{
- if (vma->vm_file != filp)
- return false;
-
- return vma->vm_start == addr &&
- (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
-}
-
-/**
- * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
- * it is mapped to.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- *
- * While the mapping holds a reference on the contents of the object, it doesn't
- * imply a ref on the object itself.
- *
- * IMPORTANT:
- *
- * DRM driver writers who look a this function as an example for how to do GEM
- * mmap support, please don't implement mmap support like here. The modern way
- * to implement DRM mmap support is with an mmap offset ioctl (like
- * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
- * That way debug tooling like valgrind will understand what's going on, hiding
- * the mmap call in a driver private ioctl will break that. The i915 driver only
- * does cpu mmaps this way because we didn't know better.
- */
-int
-i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file)
-{
- struct drm_i915_gem_mmap *args = data;
- struct drm_i915_gem_object *obj;
- unsigned long addr;
-
- if (args->flags & ~(I915_MMAP_WC))
- return -EINVAL;
-
- if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
- return -ENODEV;
-
- obj = i915_gem_object_lookup(file, args->handle);
- if (!obj)
- return -ENOENT;
-
- /* prime objects have no backing filp to GEM mmap
- * pages from.
- */
- if (!obj->base.filp) {
- addr = -ENXIO;
- goto err;
- }
-
- if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
- addr = -EINVAL;
- goto err;
- }
-
- addr = vm_mmap(obj->base.filp, 0, args->size,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- args->offset);
- if (IS_ERR_VALUE(addr))
- goto err;
-
- if (args->flags & I915_MMAP_WC) {
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
-
- if (down_write_killable(&mm->mmap_sem)) {
- addr = -EINTR;
- goto err;
- }
- vma = find_vma(mm, addr);
- if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
- vma->vm_page_prot =
- pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
- else
- addr = -ENOMEM;
- up_write(&mm->mmap_sem);
- if (IS_ERR_VALUE(addr))
- goto err;
-
- /* This may race, but that's ok, it only gets set */
- WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
- }
- i915_gem_object_put(obj);
-
- args->addr_ptr = (u64)addr;
- return 0;
-
-err:
- i915_gem_object_put(obj);
- return addr;
-}
-
-static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
-{
- return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
-}
-
-/**
- * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
- *
- * A history of the GTT mmap interface:
- *
- * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
- * aligned and suitable for fencing, and still fit into the available
- * mappable space left by the pinned display objects. A classic problem
- * we called the page-fault-of-doom where we would ping-pong between
- * two objects that could not fit inside the GTT and so the memcpy
- * would page one object in at the expense of the other between every
- * single byte.
- *
- * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
- * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
- * object is too large for the available space (or simply too large
- * for the mappable aperture!), a view is created instead and faulted
- * into userspace. (This view is aligned and sized appropriately for
- * fenced access.)
- *
- * 2 - Recognise WC as a separate cache domain so that we can flush the
- * delayed writes via GTT before performing direct access via WC.
- *
- * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
- * pagefault; swapin remains transparent.
- *
- * Restrictions:
- *
- * * snoopable objects cannot be accessed via the GTT. It can cause machine
- * hangs on some architectures, corruption on others. An attempt to service
- * a GTT page fault from a snoopable object will generate a SIGBUS.
- *
- * * the object must be able to fit into RAM (physical memory, though no
- * limited to the mappable aperture).
- *
- *
- * Caveats:
- *
- * * a new GTT page fault will synchronize rendering from the GPU and flush
- * all data to system memory. Subsequent access will not be synchronized.
- *
- * * all mappings are revoked on runtime device suspend.
- *
- * * there are only 8, 16 or 32 fence registers to share between all users
- * (older machines require fence register for display and blitter access
- * as well). Contention of the fence registers will cause the previous users
- * to be unmapped and any new access will generate new page faults.
- *
- * * running out of memory while servicing a fault may generate a SIGBUS,
- * rather than the expected SIGSEGV.
- */
-int i915_gem_mmap_gtt_version(void)
-{
- return 3;
-}
-
-static inline struct i915_ggtt_view
-compute_partial_view(const struct drm_i915_gem_object *obj,
- pgoff_t page_offset,
- unsigned int chunk)
-{
- struct i915_ggtt_view view;
-
- if (i915_gem_object_is_tiled(obj))
- chunk = roundup(chunk, tile_row_pages(obj));
-
- view.type = I915_GGTT_VIEW_PARTIAL;
- view.partial.offset = rounddown(page_offset, chunk);
- view.partial.size =
- min_t(unsigned int, chunk,
- (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
-
- /* If the partial covers the entire object, just create a normal VMA. */
- if (chunk >= obj->base.size >> PAGE_SHIFT)
- view.type = I915_GGTT_VIEW_NORMAL;
-
- return view;
-}
-
-/**
- * i915_gem_fault - fault a page into the GTT
- * @vmf: fault info
- *
- * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
- * from userspace. The fault handler takes care of binding the object to
- * the GTT (if needed), allocating and programming a fence register (again,
- * only if needed based on whether the old reg is still valid or the object
- * is tiled) and inserting a new PTE into the faulting process.
- *
- * Note that the faulting process may involve evicting existing objects
- * from the GTT and/or fence registers to make room. So performance may
- * suffer if the GTT working set is large or there are few fence registers
- * left.
- *
- * The current feature set supported by i915_gem_fault() and thus GTT mmaps
- * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
- */
-vm_fault_t i915_gem_fault(struct vm_fault *vmf)
-{
-#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
- struct vm_area_struct *area = vmf->vma;
- struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
- bool write = area->vm_flags & VM_WRITE;
- intel_wakeref_t wakeref;
- struct i915_vma *vma;
- pgoff_t page_offset;
- int srcu;
- int ret;
-
- /* Sanity check that we allow writing into this object */
- if (i915_gem_object_is_readonly(obj) && write)
- return VM_FAULT_SIGBUS;
-
- /* We don't use vmf->pgoff since that has the fake offset */
- page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
-
- trace_i915_gem_object_fault(obj, page_offset, true, write);
-
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
- goto err;
-
- wakeref = intel_runtime_pm_get(dev_priv);
-
- srcu = i915_reset_trylock(dev_priv);
- if (srcu < 0) {
- ret = srcu;
- goto err_rpm;
- }
-
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- goto err_reset;
-
- /* Access to snoopable pages through the GTT is incoherent. */
- if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
- ret = -EFAULT;
- goto err_unlock;
- }
-
- /* Now pin it into the GTT as needed */
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK |
- PIN_NONFAULT);
- if (IS_ERR(vma)) {
- /* Use a partial view if it is bigger than available space */
- struct i915_ggtt_view view =
- compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
- unsigned int flags;
-
- flags = PIN_MAPPABLE;
- if (view.type == I915_GGTT_VIEW_NORMAL)
- flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
-
- /*
- * Userspace is now writing through an untracked VMA, abandon
- * all hope that the hardware is able to track future writes.
- */
- obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
-
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
- if (IS_ERR(vma) && !view.type) {
- flags = PIN_MAPPABLE;
- view.type = I915_GGTT_VIEW_PARTIAL;
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
- }
- }
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto err_unlock;
- }
-
- ret = i915_vma_pin_fence(vma);
- if (ret)
- goto err_unpin;
-
- /* Finally, remap it using the new GTT offset */
- ret = remap_io_mapping(area,
- area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
- (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
- min_t(u64, vma->size, area->vm_end - area->vm_start),
- &ggtt->iomap);
- if (ret)
- goto err_fence;
-
- /* Mark as being mmapped into userspace for later revocation */
- assert_rpm_wakelock_held(dev_priv);
- if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
- list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
- GEM_BUG_ON(!obj->userfault_count);
-
- i915_vma_set_ggtt_write(vma);
-
-err_fence:
- i915_vma_unpin_fence(vma);
-err_unpin:
- __i915_vma_unpin(vma);
-err_unlock:
- mutex_unlock(&dev->struct_mutex);
-err_reset:
- i915_reset_unlock(dev_priv, srcu);
-err_rpm:
- intel_runtime_pm_put(dev_priv, wakeref);
- i915_gem_object_unpin_pages(obj);
-err:
- switch (ret) {
- case -EIO:
- /*
- * We eat errors when the gpu is terminally wedged to avoid
- * userspace unduly crashing (gl has no provisions for mmaps to
- * fail). But any other -EIO isn't ours (e.g. swap in failure)
- * and so needs to be reported.
- */
- if (!i915_terminally_wedged(dev_priv))
- return VM_FAULT_SIGBUS;
- /* else: fall through */
- case -EAGAIN:
- /*
- * EAGAIN means the gpu is hung and we'll wait for the error
- * handler to reset everything when re-faulting in
- * i915_mutex_lock_interruptible.
- */
- case 0:
- case -ERESTARTSYS:
- case -EINTR:
- case -EBUSY:
- /*
- * EBUSY is ok: this just means that another thread
- * already did the job.
- */
- return VM_FAULT_NOPAGE;
- case -ENOMEM:
- return VM_FAULT_OOM;
- case -ENOSPC:
- case -EFAULT:
- return VM_FAULT_SIGBUS;
- default:
- WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
- return VM_FAULT_SIGBUS;
- }
-}
-
-static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
-{
- struct i915_vma *vma;
-
- GEM_BUG_ON(!obj->userfault_count);
-
- obj->userfault_count = 0;
- list_del(&obj->userfault_link);
- drm_vma_node_unmap(&obj->base.vma_node,
- obj->base.dev->anon_inode->i_mapping);
-
- for_each_ggtt_vma(vma, obj)
- i915_vma_unset_userfault(vma);
-}
-
-/**
- * i915_gem_release_mmap - remove physical page mappings
- * @obj: obj in question
- *
- * Preserve the reservation of the mmapping with the DRM core code, but
- * relinquish ownership of the pages back to the system.
- *
- * It is vital that we remove the page mapping if we have mapped a tiled
- * object through the GTT and then lose the fence register due to
- * resource pressure. Similarly if the object has been moved out of the
- * aperture, than pages mapped into userspace must be revoked. Removing the
- * mapping will then trigger a page fault on the next user access, allowing
- * fixup by i915_gem_fault().
- */
-void
-i915_gem_release_mmap(struct drm_i915_gem_object *obj)
-{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- intel_wakeref_t wakeref;
-
- /* Serialisation between user GTT access and our code depends upon
- * revoking the CPU's PTE whilst the mutex is held. The next user
- * pagefault then has to wait until we release the mutex.
- *
- * Note that RPM complicates somewhat by adding an additional
- * requirement that operations to the GGTT be made holding the RPM
- * wakeref.
- */
- lockdep_assert_held(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- if (!obj->userfault_count)
- goto out;
-
- __i915_gem_object_release_mmap(obj);
-
- /* Ensure that the CPU's PTE are revoked and there are not outstanding
- * memory transactions from userspace before we return. The TLB
- * flushing implied above by changing the PTE above *should* be
- * sufficient, an extra barrier here just provides us with a bit
- * of paranoid documentation about our requirement to serialise
- * memory writes before touching registers / GSM.
- */
- wmb();
-
-out:
- intel_runtime_pm_put(i915, wakeref);
-}
-
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
{
struct drm_i915_gem_object *obj, *on;
@@ -1782,78 +1321,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
}
}
-static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
-{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- int err;
-
- err = drm_gem_create_mmap_offset(&obj->base);
- if (likely(!err))
- return 0;
-
- /* Attempt to reap some mmap space from dead objects */
- do {
- err = i915_gem_wait_for_idle(dev_priv,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- if (err)
- break;
-
- i915_gem_drain_freed_objects(dev_priv);
- err = drm_gem_create_mmap_offset(&obj->base);
- if (!err)
- break;
-
- } while (flush_delayed_work(&dev_priv->gem.retire_work));
-
- return err;
-}
-
-int
-i915_gem_mmap_gtt(struct drm_file *file,
- struct drm_device *dev,
- u32 handle,
- u64 *offset)
-{
- struct drm_i915_gem_object *obj;
- int ret;
-
- obj = i915_gem_object_lookup(file, handle);
- if (!obj)
- return -ENOENT;
-
- ret = i915_gem_object_create_mmap_offset(obj);
- if (ret == 0)
- *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
-
- i915_gem_object_put(obj);
- return ret;
-}
-
-/**
- * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
- * @dev: DRM device
- * @data: GTT mapping ioctl data
- * @file: GEM object info
- *
- * Simply returns the fake offset to userspace so it can mmap it.
- * The mmap call will end up in drm_gem_mmap(), which will set things
- * up so we can get faults in the handler above.
- *
- * The fault handler will take care of binding the object into the GTT
- * (since it may have been evicted to make room for something), allocating
- * a fence register, and mapping the appropriate aperture address into
- * userspace.
- */
-int
-i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file)
-{
- struct drm_i915_gem_mmap_gtt *args = data;
-
- return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
-}
-
bool i915_sg_trim(struct sg_table *orig_st)
{
struct sg_table new_st;
@@ -2057,7 +1524,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
* We manually flush the CPU domain so that we can override and
* force the flush for the display, and perform it asyncrhonously.
*/
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
if (obj->cache_dirty)
i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
obj->write_domain = 0;
@@ -2111,7 +1578,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
/* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
@@ -2173,7 +1640,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
/* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
@@ -2282,7 +1749,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
* then double check if the GTT mapping is still
* valid for that pointer access.
*/
- i915_gem_release_mmap(obj);
+ i915_gem_object_release_mmap(obj);
/* As we no longer need a fence for GTT access,
* we can relinquish it now (and so prevent having
@@ -2537,7 +2004,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* Flush the CPU cache if it's still invalid. */
if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 7d5cc9ccf6dd..86d6d92ccbc9 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -299,7 +299,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
i915_gem_object_unlock(obj);
/* Force the fence to be reacquired for GTT access */
- i915_gem_release_mmap(obj);
+ i915_gem_object_release_mmap(obj);
/* Try to preallocate memory required to save swizzling on put-pages */
if (i915_gem_object_needs_bit17_swizzle(obj)) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 37f32c2eca19..a3dd2f1be95b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -89,491 +89,6 @@ static int igt_gem_huge(void *arg)
return err;
}
-struct tile {
- unsigned int width;
- unsigned int height;
- unsigned int stride;
- unsigned int size;
- unsigned int tiling;
- unsigned int swizzle;
-};
-
-static u64 swizzle_bit(unsigned int bit, u64 offset)
-{
- return (offset & BIT_ULL(bit)) >> (bit - 6);
-}
-
-static u64 tiled_offset(const struct tile *tile, u64 v)
-{
- u64 x, y;
-
- if (tile->tiling == I915_TILING_NONE)
- return v;
-
- y = div64_u64_rem(v, tile->stride, &x);
- v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
-
- if (tile->tiling == I915_TILING_X) {
- v += y * tile->width;
- v += div64_u64_rem(x, tile->width, &x) << tile->size;
- v += x;
- } else if (tile->width == 128) {
- const unsigned int ytile_span = 16;
- const unsigned int ytile_height = 512;
-
- v += y * ytile_span;
- v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
- v += x;
- } else {
- const unsigned int ytile_span = 32;
- const unsigned int ytile_height = 256;
-
- v += y * ytile_span;
- v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
- v += x;
- }
-
- switch (tile->swizzle) {
- case I915_BIT_6_SWIZZLE_9:
- v ^= swizzle_bit(9, v);
- break;
- case I915_BIT_6_SWIZZLE_9_10:
- v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
- break;
- case I915_BIT_6_SWIZZLE_9_11:
- v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
- break;
- case I915_BIT_6_SWIZZLE_9_10_11:
- v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
- break;
- }
-
- return v;
-}
-
-static int check_partial_mapping(struct drm_i915_gem_object *obj,
- const struct tile *tile,
- unsigned long end_time)
-{
- const unsigned int nreal = obj->scratch / PAGE_SIZE;
- const unsigned long npages = obj->base.size / PAGE_SIZE;
- struct i915_vma *vma;
- unsigned long page;
- int err;
-
- if (igt_timeout(end_time,
- "%s: timed out before tiling=%d stride=%d\n",
- __func__, tile->tiling, tile->stride))
- return -EINTR;
-
- err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
- if (err) {
- pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
- tile->tiling, tile->stride, err);
- return err;
- }
-
- GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
- GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
-
- for_each_prime_number_from(page, 1, npages) {
- struct i915_ggtt_view view =
- compute_partial_view(obj, page, MIN_CHUNK_PAGES);
- u32 __iomem *io;
- struct page *p;
- unsigned int n;
- u64 offset;
- u32 *cpu;
-
- GEM_BUG_ON(view.partial.size > nreal);
- cond_resched();
-
- err = i915_gem_object_set_to_gtt_domain(obj, true);
- if (err) {
- pr_err("Failed to flush to GTT write domain; err=%d\n",
- err);
- return err;
- }
-
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
- if (IS_ERR(vma)) {
- pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
- page, (int)PTR_ERR(vma));
- return PTR_ERR(vma);
- }
-
- n = page - view.partial.offset;
- GEM_BUG_ON(n >= view.partial.size);
-
- io = i915_vma_pin_iomap(vma);
- i915_vma_unpin(vma);
- if (IS_ERR(io)) {
- pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
- page, (int)PTR_ERR(io));
- return PTR_ERR(io);
- }
-
- iowrite32(page, io + n * PAGE_SIZE/sizeof(*io));
- i915_vma_unpin_iomap(vma);
-
- offset = tiled_offset(tile, page << PAGE_SHIFT);
- if (offset >= obj->base.size)
- continue;
-
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
- p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
- cpu = kmap(p) + offset_in_page(offset);
- drm_clflush_virt_range(cpu, sizeof(*cpu));
- if (*cpu != (u32)page) {
- pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
- page, n,
- view.partial.offset,
- view.partial.size,
- vma->size >> PAGE_SHIFT,
- tile->tiling ? tile_row_pages(obj) : 0,
- vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
- offset >> PAGE_SHIFT,
- (unsigned int)offset_in_page(offset),
- offset,
- (u32)page, *cpu);
- err = -EINVAL;
- }
- *cpu = 0;
- drm_clflush_virt_range(cpu, sizeof(*cpu));
- kunmap(p);
- if (err)
- return err;
-
- i915_vma_destroy(vma);
- }
-
- return 0;
-}
-
-static int igt_partial_tiling(void *arg)
-{
- const unsigned int nreal = 1 << 12; /* largest tile row x2 */
- struct drm_i915_private *i915 = arg;
- struct drm_i915_gem_object *obj;
- intel_wakeref_t wakeref;
- int tiling;
- int err;
-
- /* We want to check the page mapping and fencing of a large object
- * mmapped through the GTT. The object we create is larger than can
- * possibly be mmaped as a whole, and so we must use partial GGTT vma.
- * We then check that a write through each partial GGTT vma ends up
- * in the right set of pages within the object, and with the expected
- * tiling, which we verify by manual swizzling.
- */
-
- obj = huge_gem_object(i915,
- nreal << PAGE_SHIFT,
- (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- err = i915_gem_object_pin_pages(obj);
- if (err) {
- pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
- nreal, obj->base.size / PAGE_SIZE, err);
- goto out;
- }
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- if (1) {
- IGT_TIMEOUT(end);
- struct tile tile;
-
- tile.height = 1;
- tile.width = 1;
- tile.size = 0;
- tile.stride = 0;
- tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
- tile.tiling = I915_TILING_NONE;
-
- err = check_partial_mapping(obj, &tile, end);
- if (err && err != -EINTR)
- goto out_unlock;
- }
-
- for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
- IGT_TIMEOUT(end);
- unsigned int max_pitch;
- unsigned int pitch;
- struct tile tile;
-
- if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
- /*
- * The swizzling pattern is actually unknown as it
- * varies based on physical address of each page.
- * See i915_gem_detect_bit_6_swizzle().
- */
- break;
-
- tile.tiling = tiling;
- switch (tiling) {
- case I915_TILING_X:
- tile.swizzle = i915->mm.bit_6_swizzle_x;
- break;
- case I915_TILING_Y:
- tile.swizzle = i915->mm.bit_6_swizzle_y;
- break;
- }
-
- GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
- if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
- tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
- continue;
-
- if (INTEL_GEN(i915) <= 2) {
- tile.height = 16;
- tile.width = 128;
- tile.size = 11;
- } else if (tile.tiling == I915_TILING_Y &&
- HAS_128_BYTE_Y_TILING(i915)) {
- tile.height = 32;
- tile.width = 128;
- tile.size = 12;
- } else {
- tile.height = 8;
- tile.width = 512;
- tile.size = 12;
- }
-
- if (INTEL_GEN(i915) < 4)
- max_pitch = 8192 / tile.width;
- else if (INTEL_GEN(i915) < 7)
- max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
- else
- max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
-
- for (pitch = max_pitch; pitch; pitch >>= 1) {
- tile.stride = tile.width * pitch;
- err = check_partial_mapping(obj, &tile, end);
- if (err == -EINTR)
- goto next_tiling;
- if (err)
- goto out_unlock;
-
- if (pitch > 2 && INTEL_GEN(i915) >= 4) {
- tile.stride = tile.width * (pitch - 1);
- err = check_partial_mapping(obj, &tile, end);
- if (err == -EINTR)
- goto next_tiling;
- if (err)
- goto out_unlock;
- }
-
- if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
- tile.stride = tile.width * (pitch + 1);
- err = check_partial_mapping(obj, &tile, end);
- if (err == -EINTR)
- goto next_tiling;
- if (err)
- goto out_unlock;
- }
- }
-
- if (INTEL_GEN(i915) >= 4) {
- for_each_prime_number(pitch, max_pitch) {
- tile.stride = tile.width * pitch;
- err = check_partial_mapping(obj, &tile, end);
- if (err == -EINTR)
- goto next_tiling;
- if (err)
- goto out_unlock;
- }
- }
-
-next_tiling: ;
- }
-
-out_unlock:
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- i915_gem_object_unpin_pages(obj);
-out:
- i915_gem_object_put(obj);
- return err;
-}
-
-static int make_obj_busy(struct drm_i915_gem_object *obj)
-{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_request *rq;
- struct i915_vma *vma;
- int err;
-
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- return err;
-
- rq = i915_request_create(i915->engine[RCS0]->kernel_context);
- if (IS_ERR(rq)) {
- i915_vma_unpin(vma);
- return PTR_ERR(rq);
- }
-
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-
- i915_request_add(rq);
-
- __i915_gem_object_release_unless_active(obj);
- i915_vma_unpin(vma);
-
- return err;
-}
-
-static bool assert_mmap_offset(struct drm_i915_private *i915,
- unsigned long size,
- int expected)
-{
- struct drm_i915_gem_object *obj;
- int err;
-
- obj = i915_gem_object_create_internal(i915, size);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- err = i915_gem_object_create_mmap_offset(obj);
- i915_gem_object_put(obj);
-
- return err == expected;
-}
-
-static void disable_retire_worker(struct drm_i915_private *i915)
-{
- i915_gem_shrinker_unregister(i915);
-
- intel_gt_pm_get(i915);
-
- cancel_delayed_work_sync(&i915->gem.retire_work);
- cancel_delayed_work_sync(&i915->gem.idle_work);
-}
-
-static void restore_retire_worker(struct drm_i915_private *i915)
-{
- intel_gt_pm_put(i915);
-
- mutex_lock(&i915->drm.struct_mutex);
- igt_flush_test(i915, I915_WAIT_LOCKED);
- mutex_unlock(&i915->drm.struct_mutex);
-
- i915_gem_shrinker_register(i915);
-}
-
-static int igt_mmap_offset_exhaustion(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
- struct drm_i915_gem_object *obj;
- struct drm_mm_node resv, *hole;
- u64 hole_start, hole_end;
- int loop, err;
-
- /* Disable background reaper */
- disable_retire_worker(i915);
- GEM_BUG_ON(!i915->gt.awake);
-
- /* Trim the device mmap space to only a page */
- memset(&resv, 0, sizeof(resv));
- drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
- resv.start = hole_start;
- resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
- err = drm_mm_reserve_node(mm, &resv);
- if (err) {
- pr_err("Failed to trim VMA manager, err=%d\n", err);
- goto out_park;
- }
- break;
- }
-
- /* Just fits! */
- if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
- pr_err("Unable to insert object into single page hole\n");
- err = -EINVAL;
- goto out;
- }
-
- /* Too large */
- if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) {
- pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
- err = -EINVAL;
- goto out;
- }
-
- /* Fill the hole, further allocation attempts should then fail */
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto out;
- }
-
- err = i915_gem_object_create_mmap_offset(obj);
- if (err) {
- pr_err("Unable to insert object into reclaimed hole\n");
- goto err_obj;
- }
-
- if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
- pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
- err = -EINVAL;
- goto err_obj;
- }
-
- i915_gem_object_put(obj);
-
- /* Now fill with busy dead objects that we expect to reap */
- for (loop = 0; loop < 3; loop++) {
- intel_wakeref_t wakeref;
-
- if (i915_terminally_wedged(i915))
- break;
-
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto out;
- }
-
- err = 0;
- mutex_lock(&i915->drm.struct_mutex);
- with_intel_runtime_pm(i915, wakeref)
- err = make_obj_busy(obj);
- mutex_unlock(&i915->drm.struct_mutex);
- if (err) {
- pr_err("[loop %d] Failed to busy the object\n", loop);
- goto err_obj;
- }
-
- /* NB we rely on the _active_ reference to access obj now */
- GEM_BUG_ON(!i915_gem_object_is_active(obj));
- err = i915_gem_object_create_mmap_offset(obj);
- if (err) {
- pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n",
- loop, err);
- goto out;
- }
- }
-
-out:
- drm_mm_remove_node(&resv);
-out_park:
- restore_retire_worker(i915);
- return err;
-err_obj:
- i915_gem_object_put(obj);
- goto out;
-}
-
int i915_gem_object_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
@@ -596,8 +111,6 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_gem_huge),
- SUBTEST(igt_partial_tiling),
- SUBTEST(igt_mmap_offset_exhaustion),
};
return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 6d766925ad04..bbf387de8db3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -16,6 +16,7 @@ selftest(timelines, i915_timeline_live_selftests)
selftest(requests, i915_request_live_selftests)
selftest(active, i915_active_live_selftests)
selftest(objects, i915_gem_object_live_selftests)
+selftest(mman, i915_gem_mman_live_selftests)
selftest(dmabuf, i915_gem_dmabuf_live_selftests)
selftest(coherency, i915_gem_coherency_live_selftests)
selftest(gtt, i915_gem_gtt_live_selftests)
--
2.20.1
More information about the Intel-gfx
mailing list