[Intel-gfx] [PATCH 36/50] drm/i915: Move GEM domain management to its own file
Chris Wilson
chris at chris-wilson.co.uk
Fri Apr 12 08:53:56 UTC 2019
Continuing the decluttering of i915_gem.c, that of the read/write
domains, perhaps the biggest of GEM's follies?
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/gem/i915_gem_domain.c | 784 ++++++++++++++++++
drivers/gpu/drm/i915/gem/i915_gem_object.h | 29 +
drivers/gpu/drm/i915/gvt/cmd_parser.c | 4 +-
drivers/gpu/drm/i915/gvt/scheduler.c | 6 +-
drivers/gpu/drm/i915/i915_cmd_parser.c | 8 +-
drivers/gpu/drm/i915/i915_drv.h | 26 -
drivers/gpu/drm/i915/i915_gem.c | 777 +----------------
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-
drivers/gpu/drm/i915/i915_gem_render_state.c | 4 +-
drivers/gpu/drm/i915/selftests/huge_pages.c | 4 +-
.../drm/i915/selftests/i915_gem_coherency.c | 8 +-
.../gpu/drm/i915/selftests/i915_gem_context.c | 8 +-
13 files changed, 841 insertions(+), 822 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_domain.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b86af182b1ac..35561c0d80f9 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -87,6 +87,7 @@ i915-y += $(gt-y)
# GEM (Graphics Execution Management) code
obj-y += gem/
gem-y += \
+ gem/i915_gem_domain.o \
gem/i915_gem_object.o \
gem/i915_gem_mman.o \
gem/i915_gem_pages.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
new file mode 100644
index 000000000000..eee421e3021c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -0,0 +1,784 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+#include "../i915_gem_clflush.h"
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+
+#include "../intel_frontbuffer.h"
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+ /*
+ * We manually flush the CPU domain so that we can override and
+ * force the flush for the display, and perform it asyncrhonously.
+ */
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+ if (obj->cache_dirty)
+ i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+ obj->write_domain = 0;
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+ if (!READ_ONCE(obj->pin_global))
+ return;
+
+ mutex_lock(&obj->base.dev->struct_mutex);
+ __i915_gem_object_flush_for_display(obj);
+ mutex_unlock(&obj->base.dev->struct_mutex);
+}
+
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+ int ret;
+
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ (write ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret)
+ return ret;
+
+ if (obj->write_domain == I915_GEM_DOMAIN_WC)
+ return 0;
+
+ /* Flush and acquire obj->pages so that we are coherent through
+ * direct access in memory with previous cached writes through
+ * shmemfs and that our cache domain tracking remains valid.
+ * For example, if the obj->filp was moved to swap without us
+ * being notified and releasing the pages, we would mistakenly
+ * continue to assume that the obj remained out of the CPU cached
+ * domain.
+ */
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ return ret;
+
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+ /* Serialise direct access to this object with the barriers for
+ * coherent writes from the GPU, by effectively invalidating the
+ * WC domain upon first access.
+ */
+ if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
+ mb();
+
+ /* It should now be out of any other write domains, and we can update
+ * the domain values for our changes.
+ */
+ GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+ obj->read_domains |= I915_GEM_DOMAIN_WC;
+ if (write) {
+ obj->read_domains = I915_GEM_DOMAIN_WC;
+ obj->write_domain = I915_GEM_DOMAIN_WC;
+ obj->mm.dirty = true;
+ }
+
+ i915_gem_object_unpin_pages(obj);
+ return 0;
+}
+
+/**
+ * Moves a single object to the GTT read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+ int ret;
+
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ (write ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret)
+ return ret;
+
+ if (obj->write_domain == I915_GEM_DOMAIN_GTT)
+ return 0;
+
+ /* Flush and acquire obj->pages so that we are coherent through
+ * direct access in memory with previous cached writes through
+ * shmemfs and that our cache domain tracking remains valid.
+ * For example, if the obj->filp was moved to swap without us
+ * being notified and releasing the pages, we would mistakenly
+ * continue to assume that the obj remained out of the CPU cached
+ * domain.
+ */
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ return ret;
+
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+
+ /* Serialise direct access to this object with the barriers for
+ * coherent writes from the GPU, by effectively invalidating the
+ * GTT domain upon first access.
+ */
+ if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
+ mb();
+
+ /* It should now be out of any other write domains, and we can update
+ * the domain values for our changes.
+ */
+ GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+ obj->read_domains |= I915_GEM_DOMAIN_GTT;
+ if (write) {
+ obj->read_domains = I915_GEM_DOMAIN_GTT;
+ obj->write_domain = I915_GEM_DOMAIN_GTT;
+ obj->mm.dirty = true;
+ }
+
+ i915_gem_object_unpin_pages(obj);
+ return 0;
+}
+
+/**
+ * Changes the cache-level of an object across all VMA.
+ * @obj: object to act on
+ * @cache_level: new cache level to set for the object
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+ enum i915_cache_level cache_level)
+{
+ struct i915_vma *vma;
+ int ret;
+
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+ if (obj->cache_level == cache_level)
+ return 0;
+
+ /* Inspect the list of currently bound VMA and unbind any that would
+ * be invalid given the new cache-level. This is principally to
+ * catch the issue of the CS prefetch crossing page boundaries and
+ * reading an invalid PTE on older architectures.
+ */
+restart:
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
+ if (!drm_mm_node_allocated(&vma->node))
+ continue;
+
+ if (i915_vma_is_pinned(vma)) {
+ DRM_DEBUG("can not change the cache level of pinned objects\n");
+ return -EBUSY;
+ }
+
+ if (!i915_vma_is_closed(vma) &&
+ i915_gem_valid_gtt_space(vma, cache_level))
+ continue;
+
+ ret = i915_vma_unbind(vma);
+ if (ret)
+ return ret;
+
+ /* As unbinding may affect other elements in the
+ * obj->vma_list (due to side-effects from retiring
+ * an active vma), play safe and restart the iterator.
+ */
+ goto restart;
+ }
+
+ /* We can reuse the existing drm_mm nodes but need to change the
+ * cache-level on the PTE. We could simply unbind them all and
+ * rebind with the correct cache-level on next use. However since
+ * we already have a valid slot, dma mapping, pages etc, we may as
+ * rewrite the PTE in the belief that doing so tramples upon less
+ * state and so involves less work.
+ */
+ if (obj->bind_count) {
+ /* Before we change the PTE, the GPU must not be accessing it.
+ * If we wait upon the object, we know that all the bound
+ * VMA are no longer active.
+ */
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret)
+ return ret;
+
+ if (!HAS_LLC(to_i915(obj->base.dev)) &&
+ cache_level != I915_CACHE_NONE) {
+ /* Access to snoopable pages through the GTT is
+ * incoherent and on some machines causes a hard
+ * lockup. Relinquish the CPU mmaping to force
+ * userspace to refault in the pages and we can
+ * then double check if the GTT mapping is still
+ * valid for that pointer access.
+ */
+ i915_gem_object_release_mmap(obj);
+
+ /* As we no longer need a fence for GTT access,
+ * we can relinquish it now (and so prevent having
+ * to steal a fence from someone else on the next
+ * fence request). Note GPU activity would have
+ * dropped the fence as all snoopable access is
+ * supposed to be linear.
+ */
+ for_each_ggtt_vma(vma, obj) {
+ ret = i915_vma_put_fence(vma);
+ if (ret)
+ return ret;
+ }
+ } else {
+ /* We either have incoherent backing store and
+ * so no GTT access or the architecture is fully
+ * coherent. In such cases, existing GTT mmaps
+ * ignore the cache bit in the PTE and we can
+ * rewrite it without confusing the GPU or having
+ * to force userspace to fault back in its mmaps.
+ */
+ }
+
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
+ if (!drm_mm_node_allocated(&vma->node))
+ continue;
+
+ ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+ if (ret)
+ return ret;
+ }
+ }
+
+ list_for_each_entry(vma, &obj->vma.list, obj_link)
+ vma->node.color = cache_level;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
+ obj->cache_dirty = true; /* Always invalidate stale cachelines */
+
+ return 0;
+}
+
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_i915_gem_caching *args = data;
+ struct drm_i915_gem_object *obj;
+ int err = 0;
+
+ rcu_read_lock();
+ obj = i915_gem_object_lookup_rcu(file, args->handle);
+ if (!obj) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ switch (obj->cache_level) {
+ case I915_CACHE_LLC:
+ case I915_CACHE_L3_LLC:
+ args->caching = I915_CACHING_CACHED;
+ break;
+
+ case I915_CACHE_WT:
+ args->caching = I915_CACHING_DISPLAY;
+ break;
+
+ default:
+ args->caching = I915_CACHING_NONE;
+ break;
+ }
+out:
+ rcu_read_unlock();
+ return err;
+}
+
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_i915_private *i915 = to_i915(dev);
+ struct drm_i915_gem_caching *args = data;
+ struct drm_i915_gem_object *obj;
+ enum i915_cache_level level;
+ int ret = 0;
+
+ switch (args->caching) {
+ case I915_CACHING_NONE:
+ level = I915_CACHE_NONE;
+ break;
+ case I915_CACHING_CACHED:
+ /*
+ * Due to a HW issue on BXT A stepping, GPU stores via a
+ * snooped mapping may leave stale data in a corresponding CPU
+ * cacheline, whereas normally such cachelines would get
+ * invalidated.
+ */
+ if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
+ return -ENODEV;
+
+ level = I915_CACHE_LLC;
+ break;
+ case I915_CACHING_DISPLAY:
+ level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ /*
+ * The caching mode of proxy object is handled by its generator, and
+ * not allowed to be changed by userspace.
+ */
+ if (i915_gem_object_is_proxy(obj)) {
+ ret = -ENXIO;
+ goto out;
+ }
+
+ if (obj->cache_level == level)
+ goto out;
+
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret)
+ goto out;
+
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret)
+ goto out;
+
+ ret = i915_gem_object_set_cache_level(obj, level);
+ mutex_unlock(&dev->struct_mutex);
+
+out:
+ i915_gem_object_put(obj);
+ return ret;
+}
+
+/*
+ * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
+ * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
+ * (for pageflips). We only flush the caches while preparing the buffer for
+ * display, the callers are responsible for frontbuffer flush.
+ */
+struct i915_vma *
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+ u32 alignment,
+ const struct i915_ggtt_view *view,
+ unsigned int flags)
+{
+ struct i915_vma *vma;
+ int ret;
+
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+ /* Mark the global pin early so that we account for the
+ * display coherency whilst setting up the cache domains.
+ */
+ obj->pin_global++;
+
+ /* The display engine is not coherent with the LLC cache on gen6. As
+ * a result, we make sure that the pinning that is about to occur is
+ * done with uncached PTEs. This is lowest common denominator for all
+ * chipsets.
+ *
+ * However for gen6+, we could do better by using the GFDT bit instead
+ * of uncaching, which would allow us to flush all the LLC-cached data
+ * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+ */
+ ret = i915_gem_object_set_cache_level(obj,
+ HAS_WT(to_i915(obj->base.dev)) ?
+ I915_CACHE_WT : I915_CACHE_NONE);
+ if (ret) {
+ vma = ERR_PTR(ret);
+ goto err_unpin_global;
+ }
+
+ /* As the user may map the buffer once pinned in the display plane
+ * (e.g. libkms for the bootup splash), we have to ensure that we
+ * always use map_and_fenceable for all scanout buffers. However,
+ * it may simply be too big to fit into mappable, in which case
+ * put it anyway and hope that userspace can cope (but always first
+ * try to preserve the existing ABI).
+ */
+ vma = ERR_PTR(-ENOSPC);
+ if ((flags & PIN_MAPPABLE) == 0 &&
+ (!view || view->type == I915_GGTT_VIEW_NORMAL))
+ vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+ flags |
+ PIN_MAPPABLE |
+ PIN_NONBLOCK);
+ if (IS_ERR(vma))
+ vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+ if (IS_ERR(vma))
+ goto err_unpin_global;
+
+ vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
+ __i915_gem_object_flush_for_display(obj);
+
+ /* It should now be out of any other write domains, and we can update
+ * the domain values for our changes.
+ */
+ obj->read_domains |= I915_GEM_DOMAIN_GTT;
+
+ return vma;
+
+err_unpin_global:
+ obj->pin_global--;
+ return vma;
+}
+
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct list_head *list;
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+ mutex_lock(&i915->ggtt.vm.mutex);
+ for_each_ggtt_vma(vma, obj) {
+ if (!drm_mm_node_allocated(&vma->node))
+ continue;
+
+ list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+ }
+ mutex_unlock(&i915->ggtt.vm.mutex);
+
+ spin_lock(&i915->mm.obj_lock);
+ list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
+ list_move_tail(&obj->mm.link, list);
+ spin_unlock(&i915->mm.obj_lock);
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
+{
+ lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+ if (WARN_ON(vma->obj->pin_global == 0))
+ return;
+
+ if (--vma->obj->pin_global == 0)
+ vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+
+ /* Bump the LRU to try and avoid premature eviction whilst flipping */
+ i915_gem_object_bump_inactive_ggtt(vma->obj);
+
+ i915_vma_unpin(vma);
+}
+
+/**
+ * Moves a single object to the CPU read, and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+ int ret;
+
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ (write ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret)
+ return ret;
+
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+ /* Flush the CPU cache if it's still invalid. */
+ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+ i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+ obj->read_domains |= I915_GEM_DOMAIN_CPU;
+ }
+
+ /* It should now be out of any other write domains, and we can update
+ * the domain values for our changes.
+ */
+ GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+
+ /* If we're writing through the CPU, then the GPU read domains will
+ * need to be invalidated at next use.
+ */
+ if (write)
+ __start_cpu_write(obj);
+
+ return 0;
+}
+
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+ return (domain == I915_GEM_DOMAIN_GTT ?
+ obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+/**
+ * Called when user space prepares to use an object with the CPU, either
+ * through the mmap ioctl's mapping or a GTT mapping.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ */
+int
+i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_i915_gem_set_domain *args = data;
+ struct drm_i915_gem_object *obj;
+ u32 read_domains = args->read_domains;
+ u32 write_domain = args->write_domain;
+ int err;
+
+ /* Only handle setting domains to types used by the CPU. */
+ if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
+ return -EINVAL;
+
+ /*
+ * Having something in the write domain implies it's in the read
+ * domain, and only that read domain. Enforce that in the request.
+ */
+ if (write_domain && read_domains != write_domain)
+ return -EINVAL;
+
+ if (!read_domains)
+ return 0;
+
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ /*
+ * Already in the desired write domain? Nothing for us to do!
+ *
+ * We apply a little bit of cunning here to catch a broader set of
+ * no-ops. If obj->write_domain is set, we must be in the same
+ * obj->read_domains, and only that domain. Therefore, if that
+ * obj->write_domain matches the request read_domains, we are
+ * already in the same read/write domain and can skip the operation,
+ * without having to further check the requested write_domain.
+ */
+ if (READ_ONCE(obj->write_domain) == read_domains) {
+ err = 0;
+ goto out;
+ }
+
+ /*
+ * Try to flush the object off the GPU without holding the lock.
+ * We will repeat the flush holding the lock in the normal manner
+ * to catch cases where we are gazumped.
+ */
+ err = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_PRIORITY |
+ (write_domain ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ goto out;
+
+ /*
+ * Proxy objects do not control access to the backing storage, ergo
+ * they cannot be used as a means to manipulate the cache domain
+ * tracking for that backing storage. The proxy object is always
+ * considered to be outside of any cache domain.
+ */
+ if (i915_gem_object_is_proxy(obj)) {
+ err = -ENXIO;
+ goto out;
+ }
+
+ /*
+ * Flush and acquire obj->pages so that we are coherent through
+ * direct access in memory with previous cached writes through
+ * shmemfs and that our cache domain tracking remains valid.
+ * For example, if the obj->filp was moved to swap without us
+ * being notified and releasing the pages, we would mistakenly
+ * continue to assume that the obj remained out of the CPU cached
+ * domain.
+ */
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ goto out;
+
+ err = i915_mutex_lock_interruptible(dev);
+ if (err)
+ goto out_unpin;
+
+ if (read_domains & I915_GEM_DOMAIN_WC)
+ err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+ else if (read_domains & I915_GEM_DOMAIN_GTT)
+ err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
+ else
+ err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
+
+ /* And bump the LRU for this access */
+ i915_gem_object_bump_inactive_ggtt(obj);
+
+ mutex_unlock(&dev->struct_mutex);
+
+ if (write_domain != 0)
+ intel_fb_obj_invalidate(obj,
+ fb_write_origin(obj, write_domain));
+
+out_unpin:
+ i915_gem_object_unpin_pages(obj);
+out:
+ i915_gem_object_put(obj);
+ return err;
+}
+
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+ unsigned int *needs_clflush)
+{
+ int ret;
+
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+ *needs_clflush = 0;
+ if (!i915_gem_object_has_struct_page(obj))
+ return -ENODEV;
+
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ return ret;
+
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, false);
+ if (ret)
+ goto err_unpin;
+ else
+ goto out;
+ }
+
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+ /* If we're not in the cpu read domain, set ourself into the gtt
+ * read domain and manually flush cachelines (if required). This
+ * optimizes for the case when the gpu will dirty the data
+ * anyway again before the next pread happens.
+ */
+ if (!obj->cache_dirty &&
+ !(obj->read_domains & I915_GEM_DOMAIN_CPU))
+ *needs_clflush = CLFLUSH_BEFORE;
+
+out:
+ /* return with the pages pinned */
+ return 0;
+
+err_unpin:
+ i915_gem_object_unpin_pages(obj);
+ return ret;
+}
+
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+ unsigned int *needs_clflush)
+{
+ int ret;
+
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+ *needs_clflush = 0;
+ if (!i915_gem_object_has_struct_page(obj))
+ return -ENODEV;
+
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ return ret;
+
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ if (ret)
+ goto err_unpin;
+ else
+ goto out;
+ }
+
+ i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+ /* If we're not in the cpu write domain, set ourself into the
+ * gtt write domain and manually flush cachelines (as required).
+ * This optimizes for the case when the gpu will use the data
+ * right away and we therefore have to clflush anyway.
+ */
+ if (!obj->cache_dirty) {
+ *needs_clflush |= CLFLUSH_AFTER;
+
+ /*
+ * Same trick applies to invalidate partially written
+ * cachelines read before writing.
+ */
+ if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
+ *needs_clflush |= CLFLUSH_BEFORE;
+ }
+
+out:
+ intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+ obj->mm.dirty = true;
+ /* return with the pages pinned */
+ return 0;
+
+err_unpin:
+ i915_gem_object_unpin_pages(obj);
+ return ret;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 29671355827d..fe87ed6a016e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -15,6 +15,8 @@
#include "i915_gem_object_types.h"
+#include "../i915_gem_gtt.h"
+
void i915_gem_init__objects(struct drm_i915_private *i915);
struct drm_i915_gem_object *i915_gem_object_alloc(void);
@@ -352,6 +354,20 @@ void
i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
unsigned int flush_domains);
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+ unsigned int *needs_clflush);
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+ unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE BIT(0)
+#define CLFLUSH_AFTER BIT(1)
+#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
+
+static inline void
+i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
+{
+ i915_gem_object_unpin_pages(obj);
+}
+
static inline struct intel_engine_cs *
i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
{
@@ -373,6 +389,19 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
+int __must_check
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
+struct i915_vma * __must_check
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+ u32 alignment,
+ const struct i915_ggtt_view *view,
+ unsigned int flags);
+void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
+
static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
if (obj->cache_dirty)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index e81cad96f14b..8e0692429616 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1734,7 +1734,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
goto err_free_bb;
}
- ret = i915_gem_obj_prepare_shmem_write(bb->obj, &bb->clflush);
+ ret = i915_gem_object_prepare_write(bb->obj, &bb->clflush);
if (ret)
goto err_free_obj;
@@ -1783,7 +1783,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
err_unmap:
i915_gem_object_unpin_map(bb->obj);
err_finish_shmem_access:
- i915_gem_obj_finish_shmem_access(bb->obj);
+ i915_gem_object_finish_access(bb->obj);
err_free_obj:
i915_gem_object_put(bb->obj);
err_free_bb:
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 8b6574e1b495..2deffb3e6ab7 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -465,7 +465,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
bb->obj->base.size);
bb->clflush &= ~CLFLUSH_AFTER;
}
- i915_gem_obj_finish_shmem_access(bb->obj);
+ i915_gem_object_finish_access(bb->obj);
bb->accessing = false;
} else {
@@ -493,7 +493,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
if (ret)
goto err;
- i915_gem_obj_finish_shmem_access(bb->obj);
+ i915_gem_object_finish_access(bb->obj);
bb->accessing = false;
ret = i915_vma_move_to_active(bb->vma,
@@ -571,7 +571,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
if (bb->obj) {
if (bb->accessing)
- i915_gem_obj_finish_shmem_access(bb->obj);
+ i915_gem_object_finish_access(bb->obj);
if (bb->va && !IS_ERR(bb->va))
i915_gem_object_unpin_map(bb->obj);
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index e9fadcb4d592..c893bd4eb2c8 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1058,11 +1058,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
void *dst, *src;
int ret;
- ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
+ ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
if (ret)
return ERR_PTR(ret);
- ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
+ ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
if (ret) {
dst = ERR_PTR(ret);
goto unpin_src;
@@ -1120,9 +1120,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
unpin_dst:
- i915_gem_obj_finish_shmem_access(dst_obj);
+ i915_gem_object_finish_access(dst_obj);
unpin_src:
- i915_gem_obj_finish_shmem_access(src_obj);
+ i915_gem_object_finish_access(src_obj);
return dst;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e48780a94de8..4787d1ebc57d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2889,20 +2889,6 @@ static inline int __sg_page_count(const struct scatterlist *sg)
return sg->length >> PAGE_SHIFT;
}
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
- unsigned int *needs_clflush);
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
- unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE BIT(0)
-#define CLFLUSH_AFTER BIT(1)
-#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
-
-static inline void
-i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
-{
- i915_gem_object_unpin_pages(obj);
-}
-
static inline int __must_check
i915_mutex_lock_interruptible(struct drm_device *dev)
{
@@ -2966,18 +2952,6 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
const struct i915_sched_attr *attr);
#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
-int __must_check
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
-struct i915_vma * __must_check
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
- u32 alignment,
- const struct i915_ggtt_view *view,
- unsigned int flags);
-void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
void i915_gem_release(struct drm_device *dev, struct drm_file *file);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7eb55d596f88..dff82ff27e2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -438,123 +438,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
}
}
-/*
- * Pins the specified object's pages and synchronizes the object with
- * GPU accesses. Sets needs_clflush to non-zero if the caller should
- * flush the object from the CPU cache.
- */
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
- unsigned int *needs_clflush)
-{
- int ret;
-
- lockdep_assert_held(&obj->base.dev->struct_mutex);
-
- *needs_clflush = 0;
- if (!i915_gem_object_has_struct_page(obj))
- return -ENODEV;
-
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
- return ret;
-
- if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
- ret = i915_gem_object_set_to_cpu_domain(obj, false);
- if (ret)
- goto err_unpin;
- else
- goto out;
- }
-
- i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
- /* If we're not in the cpu read domain, set ourself into the gtt
- * read domain and manually flush cachelines (if required). This
- * optimizes for the case when the gpu will dirty the data
- * anyway again before the next pread happens.
- */
- if (!obj->cache_dirty &&
- !(obj->read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush = CLFLUSH_BEFORE;
-
-out:
- /* return with the pages pinned */
- return 0;
-
-err_unpin:
- i915_gem_object_unpin_pages(obj);
- return ret;
-}
-
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
- unsigned int *needs_clflush)
-{
- int ret;
-
- lockdep_assert_held(&obj->base.dev->struct_mutex);
-
- *needs_clflush = 0;
- if (!i915_gem_object_has_struct_page(obj))
- return -ENODEV;
-
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED |
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
- return ret;
-
- if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret)
- goto err_unpin;
- else
- goto out;
- }
-
- i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
- /* If we're not in the cpu write domain, set ourself into the
- * gtt write domain and manually flush cachelines (as required).
- * This optimizes for the case when the gpu will use the data
- * right away and we therefore have to clflush anyway.
- */
- if (!obj->cache_dirty) {
- *needs_clflush |= CLFLUSH_AFTER;
-
- /*
- * Same trick applies to invalidate partially written
- * cachelines read before writing.
- */
- if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush |= CLFLUSH_BEFORE;
- }
-
-out:
- intel_fb_obj_invalidate(obj, ORIGIN_CPU);
- obj->mm.dirty = true;
- /* return with the pages pinned */
- return 0;
-
-err_unpin:
- i915_gem_object_unpin_pages(obj);
- return ret;
-}
-
static int
shmem_pread(struct page *page, int offset, int len, char __user *user_data,
bool needs_clflush)
@@ -588,7 +471,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+ ret = i915_gem_object_prepare_read(obj, &needs_clflush);
mutex_unlock(&obj->base.dev->struct_mutex);
if (ret)
return ret;
@@ -610,7 +493,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
offset = 0;
}
- i915_gem_obj_finish_shmem_access(obj);
+ i915_gem_object_finish_access(obj);
return ret;
}
@@ -985,7 +868,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+ ret = i915_gem_object_prepare_write(obj, &needs_clflush);
mutex_unlock(&i915->drm.struct_mutex);
if (ret)
return ret;
@@ -1017,7 +900,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
}
intel_fb_obj_flush(obj, ORIGIN_CPU);
- i915_gem_obj_finish_shmem_access(obj);
+ i915_gem_object_finish_access(obj);
return ret;
}
@@ -1106,150 +989,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
return ret;
}
-static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
-{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct list_head *list;
- struct i915_vma *vma;
-
- GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
- mutex_lock(&i915->ggtt.vm.mutex);
- for_each_ggtt_vma(vma, obj) {
- if (!drm_mm_node_allocated(&vma->node))
- continue;
-
- list_move_tail(&vma->vm_link, &vma->vm->bound_list);
- }
- mutex_unlock(&i915->ggtt.vm.mutex);
-
- spin_lock(&i915->mm.obj_lock);
- list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
- list_move_tail(&obj->mm.link, list);
- spin_unlock(&i915->mm.obj_lock);
-}
-
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
- return (domain == I915_GEM_DOMAIN_GTT ?
- obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
-/**
- * Called when user space prepares to use an object with the CPU, either
- * through the mmap ioctl's mapping or a GTT mapping.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- */
-int
-i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file)
-{
- struct drm_i915_gem_set_domain *args = data;
- struct drm_i915_gem_object *obj;
- u32 read_domains = args->read_domains;
- u32 write_domain = args->write_domain;
- int err;
-
- /* Only handle setting domains to types used by the CPU. */
- if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
- return -EINVAL;
-
- /*
- * Having something in the write domain implies it's in the read
- * domain, and only that read domain. Enforce that in the request.
- */
- if (write_domain && read_domains != write_domain)
- return -EINVAL;
-
- if (!read_domains)
- return 0;
-
- obj = i915_gem_object_lookup(file, args->handle);
- if (!obj)
- return -ENOENT;
-
- /*
- * Already in the desired write domain? Nothing for us to do!
- *
- * We apply a little bit of cunning here to catch a broader set of
- * no-ops. If obj->write_domain is set, we must be in the same
- * obj->read_domains, and only that domain. Therefore, if that
- * obj->write_domain matches the request read_domains, we are
- * already in the same read/write domain and can skip the operation,
- * without having to further check the requested write_domain.
- */
- if (READ_ONCE(obj->write_domain) == read_domains) {
- err = 0;
- goto out;
- }
-
- /*
- * Try to flush the object off the GPU without holding the lock.
- * We will repeat the flush holding the lock in the normal manner
- * to catch cases where we are gazumped.
- */
- err = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_PRIORITY |
- (write_domain ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT);
- if (err)
- goto out;
-
- /*
- * Proxy objects do not control access to the backing storage, ergo
- * they cannot be used as a means to manipulate the cache domain
- * tracking for that backing storage. The proxy object is always
- * considered to be outside of any cache domain.
- */
- if (i915_gem_object_is_proxy(obj)) {
- err = -ENXIO;
- goto out;
- }
-
- /*
- * Flush and acquire obj->pages so that we are coherent through
- * direct access in memory with previous cached writes through
- * shmemfs and that our cache domain tracking remains valid.
- * For example, if the obj->filp was moved to swap without us
- * being notified and releasing the pages, we would mistakenly
- * continue to assume that the obj remained out of the CPU cached
- * domain.
- */
- err = i915_gem_object_pin_pages(obj);
- if (err)
- goto out;
-
- err = i915_mutex_lock_interruptible(dev);
- if (err)
- goto out_unpin;
-
- if (read_domains & I915_GEM_DOMAIN_WC)
- err = i915_gem_object_set_to_wc_domain(obj, write_domain);
- else if (read_domains & I915_GEM_DOMAIN_GTT)
- err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
- else
- err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
-
- /* And bump the LRU for this access */
- i915_gem_object_bump_inactive_ggtt(obj);
-
- mutex_unlock(&dev->struct_mutex);
-
- if (write_domain != 0)
- intel_fb_obj_invalidate(obj,
- fb_write_origin(obj, write_domain));
-
-out_unpin:
- i915_gem_object_unpin_pages(obj);
-out:
- i915_gem_object_put(obj);
- return err;
-}
-
/**
* Called when user space has done writes to this buffer
* @dev: drm device
@@ -1518,514 +1257,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
return 0;
}
-static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
-{
- /*
- * We manually flush the CPU domain so that we can override and
- * force the flush for the display, and perform it asyncrhonously.
- */
- i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
- if (obj->cache_dirty)
- i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
- obj->write_domain = 0;
-}
-
-void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
-{
- if (!READ_ONCE(obj->pin_global))
- return;
-
- mutex_lock(&obj->base.dev->struct_mutex);
- __i915_gem_object_flush_for_display(obj);
- mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
-/**
- * Moves a single object to the WC read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
-{
- int ret;
-
- lockdep_assert_held(&obj->base.dev->struct_mutex);
-
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED |
- (write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
- if (obj->write_domain == I915_GEM_DOMAIN_WC)
- return 0;
-
- /* Flush and acquire obj->pages so that we are coherent through
- * direct access in memory with previous cached writes through
- * shmemfs and that our cache domain tracking remains valid.
- * For example, if the obj->filp was moved to swap without us
- * being notified and releasing the pages, we would mistakenly
- * continue to assume that the obj remained out of the CPU cached
- * domain.
- */
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
- return ret;
-
- i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
-
- /* Serialise direct access to this object with the barriers for
- * coherent writes from the GPU, by effectively invalidating the
- * WC domain upon first access.
- */
- if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
- mb();
-
- /* It should now be out of any other write domains, and we can update
- * the domain values for our changes.
- */
- GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
- obj->read_domains |= I915_GEM_DOMAIN_WC;
- if (write) {
- obj->read_domains = I915_GEM_DOMAIN_WC;
- obj->write_domain = I915_GEM_DOMAIN_WC;
- obj->mm.dirty = true;
- }
-
- i915_gem_object_unpin_pages(obj);
- return 0;
-}
-
-/**
- * Moves a single object to the GTT read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
-{
- int ret;
-
- lockdep_assert_held(&obj->base.dev->struct_mutex);
-
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED |
- (write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
- if (obj->write_domain == I915_GEM_DOMAIN_GTT)
- return 0;
-
- /* Flush and acquire obj->pages so that we are coherent through
- * direct access in memory with previous cached writes through
- * shmemfs and that our cache domain tracking remains valid.
- * For example, if the obj->filp was moved to swap without us
- * being notified and releasing the pages, we would mistakenly
- * continue to assume that the obj remained out of the CPU cached
- * domain.
- */
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
- return ret;
-
- i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
-
- /* Serialise direct access to this object with the barriers for
- * coherent writes from the GPU, by effectively invalidating the
- * GTT domain upon first access.
- */
- if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
- mb();
-
- /* It should now be out of any other write domains, and we can update
- * the domain values for our changes.
- */
- GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
- obj->read_domains |= I915_GEM_DOMAIN_GTT;
- if (write) {
- obj->read_domains = I915_GEM_DOMAIN_GTT;
- obj->write_domain = I915_GEM_DOMAIN_GTT;
- obj->mm.dirty = true;
- }
-
- i915_gem_object_unpin_pages(obj);
- return 0;
-}
-
-/**
- * Changes the cache-level of an object across all VMA.
- * @obj: object to act on
- * @cache_level: new cache level to set for the object
- *
- * After this function returns, the object will be in the new cache-level
- * across all GTT and the contents of the backing storage will be coherent,
- * with respect to the new cache-level. In order to keep the backing storage
- * coherent for all users, we only allow a single cache level to be set
- * globally on the object and prevent it from being changed whilst the
- * hardware is reading from the object. That is if the object is currently
- * on the scanout it will be set to uncached (or equivalent display
- * cache coherency) and all non-MOCS GPU access will also be uncached so
- * that all direct access to the scanout remains coherent.
- */
-int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
- enum i915_cache_level cache_level)
-{
- struct i915_vma *vma;
- int ret;
-
- lockdep_assert_held(&obj->base.dev->struct_mutex);
-
- if (obj->cache_level == cache_level)
- return 0;
-
- /* Inspect the list of currently bound VMA and unbind any that would
- * be invalid given the new cache-level. This is principally to
- * catch the issue of the CS prefetch crossing page boundaries and
- * reading an invalid PTE on older architectures.
- */
-restart:
- list_for_each_entry(vma, &obj->vma.list, obj_link) {
- if (!drm_mm_node_allocated(&vma->node))
- continue;
-
- if (i915_vma_is_pinned(vma)) {
- DRM_DEBUG("can not change the cache level of pinned objects\n");
- return -EBUSY;
- }
-
- if (!i915_vma_is_closed(vma) &&
- i915_gem_valid_gtt_space(vma, cache_level))
- continue;
-
- ret = i915_vma_unbind(vma);
- if (ret)
- return ret;
-
- /* As unbinding may affect other elements in the
- * obj->vma_list (due to side-effects from retiring
- * an active vma), play safe and restart the iterator.
- */
- goto restart;
- }
-
- /* We can reuse the existing drm_mm nodes but need to change the
- * cache-level on the PTE. We could simply unbind them all and
- * rebind with the correct cache-level on next use. However since
- * we already have a valid slot, dma mapping, pages etc, we may as
- * rewrite the PTE in the belief that doing so tramples upon less
- * state and so involves less work.
- */
- if (obj->bind_count) {
- /* Before we change the PTE, the GPU must not be accessing it.
- * If we wait upon the object, we know that all the bound
- * VMA are no longer active.
- */
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED |
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
- if (!HAS_LLC(to_i915(obj->base.dev)) &&
- cache_level != I915_CACHE_NONE) {
- /* Access to snoopable pages through the GTT is
- * incoherent and on some machines causes a hard
- * lockup. Relinquish the CPU mmaping to force
- * userspace to refault in the pages and we can
- * then double check if the GTT mapping is still
- * valid for that pointer access.
- */
- i915_gem_object_release_mmap(obj);
-
- /* As we no longer need a fence for GTT access,
- * we can relinquish it now (and so prevent having
- * to steal a fence from someone else on the next
- * fence request). Note GPU activity would have
- * dropped the fence as all snoopable access is
- * supposed to be linear.
- */
- for_each_ggtt_vma(vma, obj) {
- ret = i915_vma_put_fence(vma);
- if (ret)
- return ret;
- }
- } else {
- /* We either have incoherent backing store and
- * so no GTT access or the architecture is fully
- * coherent. In such cases, existing GTT mmaps
- * ignore the cache bit in the PTE and we can
- * rewrite it without confusing the GPU or having
- * to force userspace to fault back in its mmaps.
- */
- }
-
- list_for_each_entry(vma, &obj->vma.list, obj_link) {
- if (!drm_mm_node_allocated(&vma->node))
- continue;
-
- ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
- if (ret)
- return ret;
- }
- }
-
- list_for_each_entry(vma, &obj->vma.list, obj_link)
- vma->node.color = cache_level;
- i915_gem_object_set_cache_coherency(obj, cache_level);
- obj->cache_dirty = true; /* Always invalidate stale cachelines */
-
- return 0;
-}
-
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file)
-{
- struct drm_i915_gem_caching *args = data;
- struct drm_i915_gem_object *obj;
- int err = 0;
-
- rcu_read_lock();
- obj = i915_gem_object_lookup_rcu(file, args->handle);
- if (!obj) {
- err = -ENOENT;
- goto out;
- }
-
- switch (obj->cache_level) {
- case I915_CACHE_LLC:
- case I915_CACHE_L3_LLC:
- args->caching = I915_CACHING_CACHED;
- break;
-
- case I915_CACHE_WT:
- args->caching = I915_CACHING_DISPLAY;
- break;
-
- default:
- args->caching = I915_CACHING_NONE;
- break;
- }
-out:
- rcu_read_unlock();
- return err;
-}
-
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file)
-{
- struct drm_i915_private *i915 = to_i915(dev);
- struct drm_i915_gem_caching *args = data;
- struct drm_i915_gem_object *obj;
- enum i915_cache_level level;
- int ret = 0;
-
- switch (args->caching) {
- case I915_CACHING_NONE:
- level = I915_CACHE_NONE;
- break;
- case I915_CACHING_CACHED:
- /*
- * Due to a HW issue on BXT A stepping, GPU stores via a
- * snooped mapping may leave stale data in a corresponding CPU
- * cacheline, whereas normally such cachelines would get
- * invalidated.
- */
- if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
- return -ENODEV;
-
- level = I915_CACHE_LLC;
- break;
- case I915_CACHING_DISPLAY:
- level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
- break;
- default:
- return -EINVAL;
- }
-
- obj = i915_gem_object_lookup(file, args->handle);
- if (!obj)
- return -ENOENT;
-
- /*
- * The caching mode of proxy object is handled by its generator, and
- * not allowed to be changed by userspace.
- */
- if (i915_gem_object_is_proxy(obj)) {
- ret = -ENXIO;
- goto out;
- }
-
- if (obj->cache_level == level)
- goto out;
-
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- goto out;
-
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- goto out;
-
- ret = i915_gem_object_set_cache_level(obj, level);
- mutex_unlock(&dev->struct_mutex);
-
-out:
- i915_gem_object_put(obj);
- return ret;
-}
-
-/*
- * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
- * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
- * (for pageflips). We only flush the caches while preparing the buffer for
- * display, the callers are responsible for frontbuffer flush.
- */
-struct i915_vma *
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
- u32 alignment,
- const struct i915_ggtt_view *view,
- unsigned int flags)
-{
- struct i915_vma *vma;
- int ret;
-
- lockdep_assert_held(&obj->base.dev->struct_mutex);
-
- /* Mark the global pin early so that we account for the
- * display coherency whilst setting up the cache domains.
- */
- obj->pin_global++;
-
- /* The display engine is not coherent with the LLC cache on gen6. As
- * a result, we make sure that the pinning that is about to occur is
- * done with uncached PTEs. This is lowest common denominator for all
- * chipsets.
- *
- * However for gen6+, we could do better by using the GFDT bit instead
- * of uncaching, which would allow us to flush all the LLC-cached data
- * with that bit in the PTE to main memory with just one PIPE_CONTROL.
- */
- ret = i915_gem_object_set_cache_level(obj,
- HAS_WT(to_i915(obj->base.dev)) ?
- I915_CACHE_WT : I915_CACHE_NONE);
- if (ret) {
- vma = ERR_PTR(ret);
- goto err_unpin_global;
- }
-
- /* As the user may map the buffer once pinned in the display plane
- * (e.g. libkms for the bootup splash), we have to ensure that we
- * always use map_and_fenceable for all scanout buffers. However,
- * it may simply be too big to fit into mappable, in which case
- * put it anyway and hope that userspace can cope (but always first
- * try to preserve the existing ABI).
- */
- vma = ERR_PTR(-ENOSPC);
- if ((flags & PIN_MAPPABLE) == 0 &&
- (!view || view->type == I915_GGTT_VIEW_NORMAL))
- vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
- flags |
- PIN_MAPPABLE |
- PIN_NONBLOCK);
- if (IS_ERR(vma))
- vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
- if (IS_ERR(vma))
- goto err_unpin_global;
-
- vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
-
- __i915_gem_object_flush_for_display(obj);
-
- /* It should now be out of any other write domains, and we can update
- * the domain values for our changes.
- */
- obj->read_domains |= I915_GEM_DOMAIN_GTT;
-
- return vma;
-
-err_unpin_global:
- obj->pin_global--;
- return vma;
-}
-
-void
-i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
-{
- lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
- if (WARN_ON(vma->obj->pin_global == 0))
- return;
-
- if (--vma->obj->pin_global == 0)
- vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
-
- /* Bump the LRU to try and avoid premature eviction whilst flipping */
- i915_gem_object_bump_inactive_ggtt(vma->obj);
-
- i915_vma_unpin(vma);
-}
-
-/**
- * Moves a single object to the CPU read, and possibly write domain.
- * @obj: object to act on
- * @write: requesting write or read-only access
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
-{
- int ret;
-
- lockdep_assert_held(&obj->base.dev->struct_mutex);
-
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED |
- (write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
- i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
- /* Flush the CPU cache if it's still invalid. */
- if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
- i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
- obj->read_domains |= I915_GEM_DOMAIN_CPU;
- }
-
- /* It should now be out of any other write domains, and we can update
- * the domain values for our changes.
- */
- GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
-
- /* If we're writing through the CPU, then the GPU read domains will
- * need to be invalidated at next use.
- */
- if (write)
- __start_cpu_write(obj);
-
- return 0;
-}
-
/* Throttle our rendering by waiting until the ring has completed our requests
* emitted over 20 msec ago.
*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 27391300507e..fc68c9096e94 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1026,7 +1026,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
mb();
kunmap_atomic(vaddr);
- i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
+ i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
} else {
wmb();
io_mapping_unmap_atomic((void __iomem *)vaddr);
@@ -1058,7 +1058,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
unsigned int flushes;
int err;
- err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
+ err = i915_gem_object_prepare_write(obj, &flushes);
if (err)
return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 9440024c763f..f3b42b026fff 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -84,7 +84,7 @@ static int render_state_setup(struct intel_render_state *so,
u32 *d;
int ret;
- ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush);
+ ret = i915_gem_object_prepare_write(so->obj, &needs_clflush);
if (ret)
return ret;
@@ -166,7 +166,7 @@ static int render_state_setup(struct intel_render_state *so,
ret = 0;
out:
- i915_gem_obj_finish_shmem_access(so->obj);
+ i915_gem_object_finish_access(so->obj);
return ret;
err:
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index ce4ec87698f6..b22b8249dfbd 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1017,7 +1017,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
unsigned long n;
int err;
- err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+ err = i915_gem_object_prepare_read(obj, &needs_flush);
if (err)
return err;
@@ -1038,7 +1038,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
kunmap_atomic(ptr);
}
- i915_gem_obj_finish_shmem_access(obj);
+ i915_gem_object_finish_access(obj);
return err;
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index 046a38743152..cb25b5fc8027 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -37,7 +37,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
u32 *cpu;
int err;
- err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+ err = i915_gem_object_prepare_write(obj, &needs_clflush);
if (err)
return err;
@@ -54,7 +54,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
drm_clflush_virt_range(cpu, sizeof(*cpu));
kunmap_atomic(map);
- i915_gem_obj_finish_shmem_access(obj);
+ i915_gem_object_finish_access(obj);
return 0;
}
@@ -69,7 +69,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
u32 *cpu;
int err;
- err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+ err = i915_gem_object_prepare_read(obj, &needs_clflush);
if (err)
return err;
@@ -83,7 +83,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
*v = *cpu;
kunmap_atomic(map);
- i915_gem_obj_finish_shmem_access(obj);
+ i915_gem_object_finish_access(obj);
return 0;
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index b62f005e4d50..38bdf9cdb3d7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -354,7 +354,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
unsigned int n, m, need_flush;
int err;
- err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);
+ err = i915_gem_object_prepare_write(obj, &need_flush);
if (err)
return err;
@@ -369,7 +369,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
kunmap_atomic(map);
}
- i915_gem_obj_finish_shmem_access(obj);
+ i915_gem_object_finish_access(obj);
obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
obj->write_domain = 0;
return 0;
@@ -381,7 +381,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
unsigned int n, m, needs_flush;
int err;
- err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+ err = i915_gem_object_prepare_read(obj, &needs_flush);
if (err)
return err;
@@ -419,7 +419,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
break;
}
- i915_gem_obj_finish_shmem_access(obj);
+ i915_gem_object_finish_access(obj);
return err;
}
--
2.20.1
More information about the Intel-gfx
mailing list