[PATCH 61/64] drm/i915/gem: Improve coordination of page domains
Chris Wilson
chris at chris-wilson.co.uk
Tue Jun 23 12:31:29 UTC 2020
Explicitly control which read/write domains we serialise when acquiring
the pages for internal temporary access, and use this information to
coordinate with outstanding and future fences without waiting underneath
the reservation_ww_class lock.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 26 +--
drivers/gpu/drm/i915/gem/i915_gem_domain.c | 204 ++----------------
drivers/gpu/drm/i915/gem/i915_gem_fence.c | 166 ++++++++++++--
drivers/gpu/drm/i915/gem/i915_gem_object.h | 30 +--
drivers/gpu/drm/i915/gem/i915_gem_pm.c | 3 +-
.../gpu/drm/i915/gem/selftests/huge_pages.c | 16 +-
.../i915/gem/selftests/i915_gem_coherency.c | 65 +++---
.../drm/i915/gem/selftests/i915_gem_context.c | 44 ++--
.../drm/i915/gem/selftests/i915_gem_mman.c | 26 +--
.../drm/i915/gem/selftests/i915_gem_phys.c | 11 +-
drivers/gpu/drm/i915/i915_gem.c | 100 +++------
drivers/gpu/drm/i915/selftests/i915_vma.c | 10 +-
12 files changed, 308 insertions(+), 393 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 049a15e6b496..4c2dc8677e1f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -121,30 +121,24 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
- bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
- int err;
+ unsigned int write;
- err = i915_gem_object_lock_interruptible(obj);
- if (err)
- return err;
+ write = 0;
+ if (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE)
+ write = I915_GEM_DOMAIN_CPU;
- err = i915_gem_object_set_to_cpu_domain(obj, write);
- i915_gem_object_unlock(obj);
- return err;
+ return i915_gem_object_acquire_pages_barrier(obj,
+ I915_GEM_DOMAIN_CPU,
+ write);
}
static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
- int err;
- err = i915_gem_object_lock_interruptible(obj);
- if (err)
- return err;
-
- err = i915_gem_object_set_to_gtt_domain(obj, false);
- i915_gem_object_unlock(obj);
- return err;
+ return i915_gem_object_acquire_pages_barrier(obj,
+ I915_GEM_DOMAIN_GTT,
+ 0);
}
static const struct dma_buf_ops i915_dmabuf_ops = {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 2ee42a58bfee..d11ffc102dcc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -97,24 +97,16 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
* This function returns when the move is complete, including waiting on
* flushes to occur.
*/
-int
+void
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
{
- int ret;
-
assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- (write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
if (obj->write_domain == I915_GEM_DOMAIN_WC)
- return 0;
+ return;
- /* Flush and acquire obj->pages so that we are coherent through
+ /*
+ * Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
* For example, if the obj->filp was moved to swap without us
@@ -122,20 +114,18 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
* continue to assume that the obj remained out of the CPU cached
* domain.
*/
- ret = __i915_gem_object_get_pages_locked(obj);
- if (ret)
- return ret;
-
flush_write_domains(obj, ~I915_GEM_DOMAIN_WC);
- /* Serialise direct access to this object with the barriers for
+ /*
+ * Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
* WC domain upon first access.
*/
if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
mb();
- /* It should now be out of any other write domains, and we can update
+ /*
+ * It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
@@ -145,9 +135,6 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
obj->write_domain = I915_GEM_DOMAIN_WC;
obj->mm.dirty = true;
}
-
- i915_gem_object_unpin_pages(obj);
- return 0;
}
/**
@@ -158,34 +145,13 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
* This function returns when the move is complete, including waiting on
* flushes to occur.
*/
-int
+void
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
{
- int ret;
-
assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- (write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
if (obj->write_domain == I915_GEM_DOMAIN_GTT)
- return 0;
-
- /* Flush and acquire obj->pages so that we are coherent through
- * direct access in memory with previous cached writes through
- * shmemfs and that our cache domain tracking remains valid.
- * For example, if the obj->filp was moved to swap without us
- * being notified and releasing the pages, we would mistakenly
- * continue to assume that the obj remained out of the CPU cached
- * domain.
- */
- ret = __i915_gem_object_get_pages_locked(obj);
- if (ret)
- return ret;
+ return;
flush_write_domains(obj, ~I915_GEM_DOMAIN_GTT);
@@ -214,9 +180,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
i915_vma_set_ggtt_write(vma);
spin_unlock(&obj->vma.lock);
}
-
- i915_gem_object_unpin_pages(obj);
- return 0;
}
/**
@@ -465,20 +428,11 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
* This function returns when the move is complete, including waiting on
* flushes to occur.
*/
-int
+void
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
{
- int ret;
-
assert_object_held(obj);
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- (write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
flush_write_domains(obj, ~I915_GEM_DOMAIN_CPU);
/* Flush the CPU cache if it's still invalid. */
@@ -497,8 +451,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
*/
if (write)
__start_cpu_write(obj);
-
- return 0;
}
/**
@@ -584,141 +536,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
* continue to assume that the obj remained out of the CPU cached
* domain.
*/
- err = i915_gem_object_lock_interruptible(obj);
- if (err)
- goto out;
-
- if (read_domains & I915_GEM_DOMAIN_WC)
- err = i915_gem_object_set_to_wc_domain(obj, write_domain);
- else if (read_domains & I915_GEM_DOMAIN_GTT)
- err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
- else
- err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
-
+ err = i915_gem_object_acquire_pages_barrier(obj,
+ read_domains,
+ write_domain);
/* And bump the LRU for this access */
i915_gem_object_bump_inactive_ggtt(obj);
- i915_gem_object_unlock(obj);
-
- if (write_domain)
- i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
-
out:
i915_gem_object_put(obj);
return err;
}
-
-/*
- * Pins the specified object's pages and synchronizes the object with
- * GPU accesses. Sets needs_clflush to non-zero if the caller should
- * flush the object from the CPU cache.
- */
-int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
- unsigned int *needs_clflush)
-{
- int ret;
-
- assert_object_held(obj);
-
- *needs_clflush = 0;
- if (!i915_gem_object_has_struct_page(obj))
- return -ENODEV;
-
- ret = __i915_gem_object_get_pages_locked(obj);
- if (ret)
- return ret;
-
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- goto err_unpin;
-
- if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
- ret = i915_gem_object_set_to_cpu_domain(obj, false);
- if (ret)
- goto err_unpin;
- else
- goto out;
- }
-
- flush_write_domains(obj, ~I915_GEM_DOMAIN_CPU);
-
- /* If we're not in the cpu read domain, set ourself into the gtt
- * read domain and manually flush cachelines (if required). This
- * optimizes for the case when the gpu will dirty the data
- * anyway again before the next pread happens.
- */
- if (!obj->cache_dirty &&
- !(obj->read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush = CLFLUSH_BEFORE;
-
-out:
- /* return with the pages pinned */
- return 0;
-
-err_unpin:
- i915_gem_object_unpin_pages(obj);
- return ret;
-}
-
-int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
- unsigned int *needs_clflush)
-{
- int ret;
-
- assert_object_held(obj);
-
- *needs_clflush = 0;
- if (!i915_gem_object_has_struct_page(obj))
- return -ENODEV;
-
- ret = __i915_gem_object_get_pages_locked(obj);
- if (ret)
- return ret;
-
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- goto err_unpin;
-
- if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret)
- goto err_unpin;
- else
- goto out;
- }
-
- flush_write_domains(obj, ~I915_GEM_DOMAIN_CPU);
-
- /* If we're not in the cpu write domain, set ourself into the
- * gtt write domain and manually flush cachelines (as required).
- * This optimizes for the case when the gpu will use the data
- * right away and we therefore have to clflush anyway.
- */
- if (!obj->cache_dirty) {
- *needs_clflush |= CLFLUSH_AFTER;
-
- /*
- * Same trick applies to invalidate partially written
- * cachelines read before writing.
- */
- if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush |= CLFLUSH_BEFORE;
- }
-
-out:
- i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
- obj->mm.dirty = true;
- /* return with the pages pinned */
- return 0;
-
-err_unpin:
- i915_gem_object_unpin_pages(obj);
- return ret;
-}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
index 8ab842c80f99..97cd389fd479 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
@@ -4,12 +4,15 @@
* Copyright © 2019 Intel Corporation
*/
+#include "gt/intel_gt.h"
+
#include "i915_drv.h"
#include "i915_gem_object.h"
struct stub_fence {
struct dma_fence dma;
struct i915_sw_fence chain;
+ unsigned int flush_domain;
};
static int __i915_sw_fence_call
@@ -19,7 +22,6 @@ stub_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
switch (state) {
case FENCE_COMPLETE:
- dma_fence_signal(&stub->dma);
break;
case FENCE_FREE:
@@ -56,40 +58,176 @@ static const struct dma_fence_ops stub_fence_ops = {
.release = stub_release,
};
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int write_domain)
+{
+ struct i915_vma *vma;
+
+ switch (write_domain) {
+ case I915_GEM_DOMAIN_GTT:
+ spin_lock(&obj->vma.lock);
+ for_each_ggtt_vma(vma, obj) {
+ if (i915_vma_unset_ggtt_write(vma))
+ intel_gt_flush_ggtt_writes(vma->vm->gt);
+ }
+ spin_unlock(&obj->vma.lock);
+ fallthrough;
+ case I915_GEM_DOMAIN_WC:
+ wmb();
+ break;
+
+ case I915_GEM_DOMAIN_CPU:
+ drm_clflush_sg(obj->mm.pages);
+ break;
+
+ case I915_GEM_DOMAIN_RENDER:
+ break;
+ }
+}
+
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ return !(obj->cache_level == I915_CACHE_NONE ||
+ obj->cache_level == I915_CACHE_WT);
+}
+
struct dma_fence *
-i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
+i915_gem_object_acquire_pages_fence(struct drm_i915_gem_object *obj,
+ unsigned int read_domains,
+ unsigned int write_domain,
+ unsigned int *needs_clflush)
{
struct stub_fence *stub;
+ unsigned int old = 0;
+ int err;
- assert_object_held(obj);
+ if (needs_clflush) {
+ *needs_clflush = 0;
+ if (!static_cpu_has(X86_FEATURE_CLFLUSH))
+ needs_clflush = NULL;
+ }
stub = kmalloc(sizeof(*stub), GFP_KERNEL);
if (!stub)
- return NULL;
+ return ERR_PTR(-ENOMEM);
i915_sw_fence_init(&stub->chain, stub_notify);
dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock,
0, 0);
+ dma_fence_get(&stub->dma);
+
+ i915_gem_object_lock(obj);
+
+ err = dma_resv_reserve_shared(obj->base.resv, 1);
+ if (err)
+ goto err;
- if (i915_sw_fence_await_reservation(&stub->chain,
- obj->base.resv, NULL, true,
- i915_fence_timeout(to_i915(obj->base.dev)),
- I915_FENCE_GFP) < 0)
+ err = ____i915_gem_object_get_pages_async(obj);
+ if (err)
goto err;
- dma_resv_add_excl_fence(obj->base.resv, &stub->dma);
+ err = i915_sw_fence_await_active(&stub->chain,
+ &obj->mm.active,
+ I915_ACTIVE_AWAIT_EXCL);
+ if (err)
+ goto err;
+
+ err = i915_sw_fence_await_reservation(&stub->chain,
+ obj->base.resv, NULL,
+ write_domain,
+ i915_fence_timeout(to_i915(obj->base.dev)),
+ I915_FENCE_GFP);
+ if (err < 0)
+ goto err;
+
+ err = i915_active_ref(&obj->mm.active, 0, &stub->dma);
+ if (err)
+ goto err;
+
+ if (!needs_clflush) {
+ if (obj->write_domain != (read_domains | write_domain)) {
+ if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT) &&
+ gpu_write_needs_clflush(obj))
+ obj->cache_dirty = true;
+
+ old = obj->write_domain;
+ obj->write_domain = 0;
+ }
+ if (write_domain) {
+ obj->read_domains = 0;
+ obj->write_domain = write_domain;
+ }
+ obj->read_domains |= read_domains;
+ } else {
+ if (obj->write_domain & ~I915_GEM_DOMAIN_CPU &&
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+ *needs_clflush |= CLFLUSH_BEFORE;
+
+ if (write_domain & I915_GEM_DOMAIN_CPU &&
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+ *needs_clflush |= CLFLUSH_AFTER;
+ }
+
+ if (write_domain)
+ dma_resv_add_excl_fence(obj->base.resv, &stub->dma);
+ else
+ dma_resv_add_shared_fence(obj->base.resv, &stub->dma);
+
+ stub->flush_domain = write_domain;
+ obj->mm.dirty |= write_domain;
+
+ i915_gem_object_unlock(obj);
+
+ i915_sw_fence_commit(&stub->chain);
+
+ if (wait_event_interruptible(stub->chain.wait,
+ i915_sw_fence_done(&stub->chain))) {
+ dma_fence_put(&stub->dma);
+ return ERR_PTR(-EINTR);
+ }
+
+ flush_write_domain(obj, old);
+ if (write_domain)
+ i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
return &stub->dma;
err:
- stub_release(&stub->dma);
- return NULL;
+ i915_gem_object_unlock(obj);
+ i915_sw_fence_commit(&stub->chain);
+ dma_fence_signal(&stub->dma);
+ dma_fence_put(&stub->dma);
+ return ERR_PTR(err);
}
-void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
- struct dma_fence *fence)
+void i915_gem_object_release_pages_fence(struct drm_i915_gem_object *obj,
+ struct dma_fence *fence)
{
struct stub_fence *stub = container_of(fence, typeof(*stub), dma);
- i915_sw_fence_commit(&stub->chain);
+ if (stub->flush_domain & I915_GEM_DOMAIN_GTT)
+ intel_gt_flush_ggtt_writes(to_i915(obj->base.dev)->ggtt.vm.gt);
+ if (stub->flush_domain)
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
+
+ dma_fence_signal(fence);
+ dma_fence_put(fence);
+}
+
+int
+i915_gem_object_acquire_pages_barrier(struct drm_i915_gem_object *obj,
+ unsigned int read_domains,
+ unsigned int write_domain)
+{
+ struct dma_fence *fence;
+
+ fence = i915_gem_object_acquire_pages_fence(obj,
+ read_domains,
+ write_domain,
+ NULL);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+
+ i915_gem_object_release_pages_fence(obj, fence);
+ return 0;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index fdfecbb3346f..549a413fbd2d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -132,9 +132,19 @@ static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
}
struct dma_fence *
-i915_gem_object_lock_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
- struct dma_fence *fence);
+i915_gem_object_acquire_pages_fence(struct drm_i915_gem_object *obj,
+ unsigned int read_domains,
+ unsigned int write_domain,
+ unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE BIT(0)
+#define CLFLUSH_AFTER BIT(1)
+void i915_gem_object_release_pages_fence(struct drm_i915_gem_object *obj,
+ struct dma_fence *fence);
+
+int
+i915_gem_object_acquire_pages_barrier(struct drm_i915_gem_object *obj,
+ unsigned int read_domains,
+ unsigned int write_domain);
static inline void
i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
@@ -392,14 +402,6 @@ void
i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
unsigned int flush_domains);
-int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
- unsigned int *needs_clflush);
-int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
- unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE BIT(0)
-#define CLFLUSH_AFTER BIT(1)
-#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
-
static inline void
i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
{
@@ -427,11 +429,11 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
-int __must_check
+void
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
+void
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
+void
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
struct i915_vma * __must_check
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3d215164dd5a..43b0b967ef6a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -85,8 +85,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
i915_gem_object_lock(obj);
- drm_WARN_ON(&i915->drm,
- i915_gem_object_set_to_gtt_domain(obj, false));
+ i915_gem_object_set_to_gtt_domain(obj, false);
i915_gem_object_unlock(obj);
i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 6ff7c402556e..b5e00b873262 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -953,7 +953,7 @@ static int gpu_write(struct intel_context *ce,
int err;
i915_gem_object_lock(vma->obj);
- err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+ i915_gem_object_set_to_gtt_domain(vma->obj, true);
i915_gem_object_unlock(vma->obj);
if (err)
return err;
@@ -966,13 +966,15 @@ static int
__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
{
unsigned int needs_flush;
+ struct dma_fence *fence;
unsigned long n;
int err;
- i915_gem_object_lock(obj);
- err = i915_gem_object_prepare_read(obj, &needs_flush);
- if (err)
- goto unlock;
+ fence = i915_gem_object_acquire_pages_fence(obj,
+ I915_GEM_DOMAIN_CPU, 0,
+ &needs_flush);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
@@ -991,9 +993,7 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
kunmap_atomic(ptr);
}
- i915_gem_object_finish_access(obj);
-unlock:
- i915_gem_object_unlock(obj);
+ i915_gem_object_release_pages_fence(obj, fence);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 58e9f68b815f..862ad8025dba 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -22,15 +22,17 @@ struct context {
static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
{
unsigned int needs_clflush;
+ struct dma_fence *fence;
struct page *page;
void *map;
u32 *cpu;
- int err;
- i915_gem_object_lock(ctx->obj);
- err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
- if (err)
- goto unlock;
+ fence = i915_gem_object_acquire_pages_fence(ctx->obj,
+ I915_GEM_DOMAIN_CPU,
+ I915_GEM_DOMAIN_CPU,
+ &needs_clflush);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
@@ -45,25 +47,24 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
drm_clflush_virt_range(cpu, sizeof(*cpu));
kunmap_atomic(map);
- i915_gem_object_finish_access(ctx->obj);
-unlock:
- i915_gem_object_unlock(ctx->obj);
- return err;
+ i915_gem_object_release_pages_fence(ctx->obj, fence);
+ return 0;
}
static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
{
unsigned int needs_clflush;
+ struct dma_fence *fence;
struct page *page;
void *map;
u32 *cpu;
- int err;
- i915_gem_object_lock(ctx->obj);
- err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
- if (err)
- goto unlock;
+ fence = i915_gem_object_acquire_pages_fence(ctx->obj,
+ I915_GEM_DOMAIN_CPU, 0,
+ &needs_clflush);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
@@ -75,11 +76,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
*v = *cpu;
kunmap_atomic(map);
- i915_gem_object_finish_access(ctx->obj);
-unlock:
- i915_gem_object_unlock(ctx->obj);
- return err;
+ i915_gem_object_release_pages_fence(ctx->obj, fence);
+ return 0;
}
static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
@@ -88,16 +87,14 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
u32 __iomem *map;
int err = 0;
- i915_gem_object_lock(ctx->obj);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- i915_gem_object_unlock(ctx->obj);
- if (err)
- return err;
-
vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
if (IS_ERR(vma))
return PTR_ERR(vma);
+ i915_gem_object_lock(ctx->obj);
+ i915_gem_object_set_to_gtt_domain(ctx->obj, true);
+ i915_gem_object_unlock(ctx->obj);
+
intel_gt_pm_get(vma->vm->gt);
map = i915_vma_pin_iomap(vma);
@@ -121,16 +118,14 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
u32 __iomem *map;
int err = 0;
- i915_gem_object_lock(ctx->obj);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
- i915_gem_object_unlock(ctx->obj);
- if (err)
- return err;
-
vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
if (IS_ERR(vma))
return PTR_ERR(vma);
+ i915_gem_object_lock(ctx->obj);
+ i915_gem_object_set_to_gtt_domain(ctx->obj, false);
+ i915_gem_object_unlock(ctx->obj);
+
intel_gt_pm_get(vma->vm->gt);
map = i915_vma_pin_iomap(vma);
@@ -187,16 +182,14 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
u32 *cs;
int err;
- i915_gem_object_lock(ctx->obj);
- err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- i915_gem_object_unlock(ctx->obj);
- if (err)
- return err;
-
vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
if (IS_ERR(vma))
return PTR_ERR(vma);
+ i915_gem_object_lock(ctx->obj);
+ i915_gem_object_set_to_gtt_domain(ctx->obj, true);
+ i915_gem_object_unlock(ctx->obj);
+
rq = intel_engine_create_kernel_request(ctx->engine);
if (IS_ERR(rq)) {
i915_vma_unpin(vma);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 6abf36e076eb..a3e31110cf4e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -458,13 +458,15 @@ static int gpu_fill(struct intel_context *ce,
static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
{
const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
- unsigned int n, m, need_flush;
- int err;
+ struct dma_fence *fence;
+ unsigned int n, m;
- i915_gem_object_lock(obj);
- err = i915_gem_object_prepare_write(obj, &need_flush);
- if (err)
- goto unlock;
+ fence = i915_gem_object_acquire_pages_fence(obj,
+ I915_GEM_DOMAIN_CPU,
+ I915_GEM_DOMAIN_CPU,
+ NULL);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
for (n = 0; n < real_page_count(obj); n++) {
u32 *map;
@@ -477,11 +479,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
kunmap_atomic(map);
}
- i915_gem_object_finish_access(obj);
obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
obj->write_domain = 0;
-unlock:
- i915_gem_object_unlock(obj);
+
+ i915_gem_object_release_pages_fence(obj, fence);
return 0;
}
@@ -489,17 +490,20 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
unsigned int idx, unsigned int max)
{
unsigned int n, m, needs_flush;
+ struct dma_fence *fence;
int err;
- i915_gem_object_lock(obj);
- err = i915_gem_object_prepare_read(obj, &needs_flush);
- if (err)
- goto unlock;
+ fence = i915_gem_object_acquire_pages_fence(obj,
+ I915_GEM_DOMAIN_CPU, 0,
+ &needs_flush);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
for (n = 0; n < real_page_count(obj); n++) {
u32 *map;
map = kmap_atomic(i915_gem_object_get_page(obj, n));
+
if (needs_flush & CLFLUSH_BEFORE)
drm_clflush_virt_range(map, PAGE_SIZE);
@@ -530,9 +534,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
break;
}
- i915_gem_object_finish_access(obj);
-unlock:
- i915_gem_object_unlock(obj);
+ i915_gem_object_release_pages_fence(obj, fence);
return err;
}
@@ -956,16 +958,14 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
return PTR_ERR(vma);
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, false);
- i915_gem_object_unlock(obj);
- if (err)
- return err;
-
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
return err;
+ i915_gem_object_lock(obj);
+ i915_gem_object_set_to_gtt_domain(obj, false);
+ i915_gem_object_unlock(obj);
+
batch = rpcs_query_batch(vma);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 11f734fea3ab..36d545d67d87 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -84,6 +84,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
{
const unsigned long npages = obj->base.size / PAGE_SIZE;
struct i915_ggtt_view view;
+ struct dma_fence *fence;
struct i915_vma *vma;
unsigned long page;
u32 __iomem *io;
@@ -103,12 +104,14 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, true);
- i915_gem_object_unlock(obj);
- if (err) {
- pr_err("Failed to flush to GTT write domain; err=%d\n", err);
- return err;
+ fence = i915_gem_object_acquire_pages_fence(obj,
+ I915_GEM_DOMAIN_GTT,
+ I915_GEM_DOMAIN_GTT,
+ NULL);
+ if (IS_ERR(fence)) {
+ pr_err("Failed to flush to GTT write domain; err=%ld\n",
+ PTR_ERR(fence));
+ return PTR_ERR(fence);
}
page = i915_prandom_u32_max_state(npages, prng);
@@ -118,7 +121,8 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
if (IS_ERR(vma)) {
pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
page, (int)PTR_ERR(vma));
- return PTR_ERR(vma);
+ err = PTR_ERR(vma);
+ goto out_fence;
}
n = page - view.partial.offset;
@@ -165,6 +169,8 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
out:
__i915_vma_put(vma);
+out_fence:
+ i915_gem_object_release_pages_fence(obj, fence);
return err;
}
@@ -189,12 +195,8 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, true);
+ i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
- if (err) {
- pr_err("Failed to flush to GTT write domain; err=%d\n", err);
- return err;
- }
for_each_prime_number_from(page, 1, npages) {
struct i915_ggtt_view view =
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index 34932871b3a5..1e46969f491e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -44,14 +44,9 @@ static int mock_phys_object(void *arg)
}
/* Make the object dirty so that put_pages must do copy back the data */
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, true);
- i915_gem_object_unlock(obj);
- if (err) {
- pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
- err);
- goto out_obj;
- }
+ err = i915_gem_object_acquire_pages_barrier(obj,
+ I915_GEM_DOMAIN_GTT,
+ I915_GEM_DOMAIN_GTT);
out_obj:
i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0fbe438c4523..3ea3cbae6f40 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -307,7 +307,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
static int
shmem_pread(struct page *page, int offset, int len, char __user *user_data,
- bool needs_clflush)
+ unsigned int needs_clflush)
{
char *vaddr;
int ret;
@@ -328,25 +328,20 @@ static int
i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
struct drm_i915_gem_pread *args)
{
- unsigned int needs_clflush;
- unsigned int idx, offset;
+ unsigned int idx, offset, needs_clflush;
struct dma_fence *fence;
char __user *user_data;
u64 remain;
int ret;
- i915_gem_object_lock(obj);
- ret = i915_gem_object_prepare_read(obj, &needs_clflush);
- if (ret) {
- i915_gem_object_unlock(obj);
- return ret;
- }
+ if (!i915_gem_object_has_struct_page(obj))
+ return -ENODEV;
- fence = i915_gem_object_lock_fence(obj);
- i915_gem_object_finish_access(obj);
- i915_gem_object_unlock(obj);
- if (!fence)
- return -ENOMEM;
+ fence = i915_gem_object_acquire_pages_fence(obj,
+ I915_GEM_DOMAIN_CPU, 0,
+ &needs_clflush);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
remain = args->size;
user_data = u64_to_user_ptr(args->data_ptr);
@@ -365,7 +360,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
offset = 0;
}
- i915_gem_object_unlock_fence(obj, fence);
+ i915_gem_object_release_pages_fence(obj, fence);
return ret;
}
@@ -424,20 +419,11 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!drm_mm_node_allocated(&node));
}
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- goto out_unpin;
-
- ret = i915_gem_object_set_to_gtt_domain(obj, false);
- if (ret) {
- i915_gem_object_unlock(obj);
- goto out_unpin;
- }
-
- fence = i915_gem_object_lock_fence(obj);
- i915_gem_object_unlock(obj);
- if (!fence) {
- ret = -ENOMEM;
+ fence = i915_gem_object_acquire_pages_fence(obj,
+ I915_GEM_DOMAIN_GTT, 0,
+ NULL);
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
goto out_unpin;
}
@@ -475,7 +461,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
offset += page_length;
}
- i915_gem_object_unlock_fence(obj, fence);
+ i915_gem_object_release_pages_fence(obj, fence);
out_unpin:
if (drm_mm_node_allocated(&node)) {
ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
@@ -623,20 +609,12 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!drm_mm_node_allocated(&node));
}
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- goto out_unpin;
-
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret) {
- i915_gem_object_unlock(obj);
- goto out_unpin;
- }
-
- fence = i915_gem_object_lock_fence(obj);
- i915_gem_object_unlock(obj);
- if (!fence) {
- ret = -ENOMEM;
+ fence = i915_gem_object_acquire_pages_fence(obj,
+ I915_GEM_DOMAIN_GTT,
+ I915_GEM_DOMAIN_GTT,
+ NULL);
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
goto out_unpin;
}
@@ -683,10 +661,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
offset += page_length;
}
- intel_gt_flush_ggtt_writes(ggtt->vm.gt);
- i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
-
- i915_gem_object_unlock_fence(obj, fence);
+ i915_gem_object_release_pages_fence(obj, fence);
out_unpin:
if (drm_mm_node_allocated(&node)) {
ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
@@ -738,20 +713,18 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
u64 remain;
int ret;
- i915_gem_object_lock(obj);
- ret = i915_gem_object_prepare_write(obj, &needs_clflush);
- if (ret) {
- i915_gem_object_unlock(obj);
- return ret;
- }
+ if (!i915_gem_object_has_struct_page(obj))
+ return -ENODEV;
- fence = i915_gem_object_lock_fence(obj);
- i915_gem_object_finish_access(obj);
- i915_gem_object_unlock(obj);
- if (!fence)
- return -ENOMEM;
+ fence = i915_gem_object_acquire_pages_fence(obj,
+ I915_GEM_DOMAIN_CPU,
+ I915_GEM_DOMAIN_CPU,
+ &needs_clflush);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
- /* If we don't overwrite a cacheline completely we need to be
+ /*
+ * If we don't overwrite a cacheline completely we need to be
* careful to have up-to-date data by first clflushing. Don't
* overcomplicate things and flush the entire patch.
*/
@@ -777,9 +750,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
offset = 0;
}
- i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
- i915_gem_object_unlock_fence(obj, fence);
-
+ i915_gem_object_release_pages_fence(obj, fence);
return ret;
}
@@ -1306,8 +1277,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
i915_gem_object_lock(obj);
- drm_WARN_ON(&i915->drm,
- i915_gem_object_set_to_cpu_domain(obj, true));
+ i915_gem_object_set_to_cpu_domain(obj, true);
i915_gem_object_unlock(obj);
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index af89c7fc8f59..c41e978eb293 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -892,12 +892,6 @@ static int igt_vma_remapped_gtt(void *arg)
unsigned int x, y;
int err;
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, true);
- i915_gem_object_unlock(obj);
- if (err)
- goto out;
-
vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
@@ -913,6 +907,10 @@ static int igt_vma_remapped_gtt(void *arg)
goto out;
}
+ i915_gem_object_lock(obj);
+ i915_gem_object_set_to_gtt_domain(obj, true);
+ i915_gem_object_unlock(obj);
+
for (y = 0 ; y < p->height; y++) {
for (x = 0 ; x < p->width; x++) {
unsigned int offset;
--
2.20.1
More information about the Intel-gfx-trybot
mailing list