[PATCH 14/14] drm/i915/gem: Serialise relocations
Chris Wilson
chris at chris-wilson.co.uk
Sun May 24 15:37:22 UTC 2020
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 200 +++++++++++-------
1 file changed, 124 insertions(+), 76 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 00f3274b5717..1d2dac7a2c1b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -5,6 +5,7 @@
*/
#include <linux/intel-iommu.h>
+#include <linux/dma-fence-proxy.h>
#include <linux/dma-resv.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h>
@@ -259,7 +260,8 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
- struct i915_vma *target;
+ struct dma_fence *fence;
+
struct i915_request *rq;
struct i915_vma *rq_vma;
u32 *rq_cmd;
@@ -900,8 +902,6 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
static void eb_destroy(const struct i915_execbuffer *eb)
{
- GEM_BUG_ON(eb->reloc_cache.rq);
-
if (eb->array)
eb_vma_array_put(eb->array);
@@ -926,8 +926,8 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
+ cache->fence = NULL;
cache->rq = NULL;
- cache->target = NULL;
}
static inline void *unmask_page(unsigned long p)
@@ -1026,13 +1026,9 @@ static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
static int reloc_gpu_flush(struct reloc_cache *cache)
{
- struct i915_request *rq;
+ struct i915_request *rq = cache->rq;
int err;
- rq = fetch_and_zero(&cache->rq);
- if (!rq)
- return 0;
-
if (cache->rq_vma) {
struct drm_i915_gem_object *obj = cache->rq_vma->obj;
@@ -1056,34 +1052,14 @@ static int reloc_gpu_flush(struct reloc_cache *cache)
i915_request_set_error_once(rq, err);
intel_gt_chipset_flush(rq->engine->gt);
+ i915_request_get(rq);
i915_request_add(rq);
return err;
}
-static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
-{
- struct drm_i915_gem_object *obj = vma->obj;
- int err;
-
- i915_vma_lock(vma);
-
- if (obj->cache_dirty & ~obj->cache_coherent)
- i915_gem_clflush_object(obj, 0);
- obj->write_domain = 0;
-
- err = i915_request_await_object(rq, vma->obj, true);
- if (err == 0)
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-
- i915_vma_unlock(vma);
-
- return err;
-}
-
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
- struct intel_engine_cs *engine,
- unsigned int len)
+ struct intel_engine_cs *engine)
{
struct reloc_cache *cache = &eb->reloc_cache;
struct intel_gt_buffer_pool_node *pool;
@@ -1186,30 +1162,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
u32 *cmd;
int err;
- if (unlikely(!cache->rq)) {
- struct intel_engine_cs *engine = eb->engine;
-
- if (!reloc_can_use_engine(engine)) {
- engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
- if (!engine)
- return ERR_PTR(-ENODEV);
- }
-
- err = __reloc_gpu_alloc(eb, engine, len);
- if (unlikely(err))
- return ERR_PTR(err);
- }
-
- if (vma != cache->target) {
- err = reloc_move_to_gpu(cache->rq, vma);
- if (unlikely(err)) {
- i915_request_set_error_once(cache->rq, err);
- return ERR_PTR(err);
- }
-
- cache->target = vma;
- }
-
if (unlikely(cache->rq_size + len >
PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
err = reloc_gpu_chain(cache);
@@ -1312,15 +1264,15 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
}
static u64
-relocate_entry(struct i915_vma *vma,
+relocate_entry(struct i915_execbuffer *eb,
+ struct eb_vma *ev,
const struct drm_i915_gem_relocation_entry *reloc,
- struct i915_execbuffer *eb,
const struct i915_vma *target)
{
u64 target_addr = relocation_target(reloc, target);
int err;
- err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr);
+ err = __reloc_entry_gpu(eb, ev->vma, reloc->offset, target_addr);
if (err)
return err;
@@ -1408,18 +1360,8 @@ eb_relocate_entry(struct i915_execbuffer *eb,
return -EINVAL;
}
- /*
- * If we write into the object, we need to force the synchronisation
- * barrier, either with an asynchronous clflush or if we executed the
- * patching using the GPU (though that should be serialised by the
- * timeline). To be completely sure, and since we are required to
- * do relocations we are already stalling, disable the user's opt
- * out of our synchronisation.
- */
- ev->flags &= ~EXEC_OBJECT_ASYNC;
-
/* and update the user's relocation entry */
- return relocate_entry(ev->vma, reloc, eb, target->vma);
+ return relocate_entry(eb, ev, reloc, target->vma);
}
static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
@@ -1500,6 +1442,89 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
return 0;
}
+static int reloc_move_to_gpu(struct reloc_cache *cache, struct eb_vma *ev)
+{
+ struct i915_request *rq = cache->rq;
+ struct i915_vma *vma = ev->vma;
+ struct drm_i915_gem_object *obj = vma->obj;
+ int err;
+
+ if (obj->cache_dirty & ~obj->cache_coherent)
+ i915_gem_clflush_object(obj, 0);
+
+ obj->write_domain = I915_GEM_DOMAIN_RENDER;
+ obj->read_domains = I915_GEM_DOMAIN_RENDER;
+
+ err = i915_request_await_object(rq, obj, true);
+ if (err == 0) {
+ dma_resv_add_excl_fence(vma->resv, cache->fence);
+ ev->flags |= EXEC_OBJECT_ASYNC;
+
+ err = __i915_vma_move_to_active(vma, rq);
+ }
+
+ return err;
+}
+
+static int
+lock_relocs(struct i915_execbuffer *eb)
+{
+ struct ww_acquire_ctx acquire;
+ struct eb_vma *ev;
+ int err = 0;
+
+ ww_acquire_init(&acquire, &reservation_ww_class);
+
+ list_for_each_entry(ev, &eb->relocs, reloc_link) {
+ struct i915_vma *vma = ev->vma;
+
+ err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
+ if (err == -EDEADLK) {
+ struct eb_vma *unlock = ev, *en;
+
+ list_for_each_entry_safe_continue_reverse(unlock, en,
+ &eb->relocs,
+ reloc_link) {
+ ww_mutex_unlock(&unlock->vma->resv->lock);
+ list_move_tail(&unlock->reloc_link,
+ &eb->relocs);
+ }
+
+ GEM_BUG_ON(!list_is_first(&ev->reloc_link,
+ &eb->relocs));
+ err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
+ &acquire);
+ }
+ if (err)
+ break;
+ }
+
+ ww_acquire_done(&acquire);
+
+ list_for_each_entry_continue_reverse(ev, &eb->relocs, reloc_link) {
+ if (err == 0)
+ err = reloc_move_to_gpu(&eb->reloc_cache, ev);
+ ww_mutex_unlock(&ev->vma->resv->lock);
+ }
+
+ ww_acquire_fini(&acquire);
+
+ return err;
+}
+
+static int reloc_gpu_alloc(struct i915_execbuffer *eb)
+{
+ struct intel_engine_cs *engine = eb->engine;
+
+ if (!reloc_can_use_engine(engine)) {
+ engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
+ if (!engine)
+ return -ENODEV;
+ }
+
+ return __reloc_gpu_alloc(eb, engine);
+}
+
static int eb_relocate(struct i915_execbuffer *eb)
{
int err;
@@ -1519,11 +1544,23 @@ static int eb_relocate(struct i915_execbuffer *eb)
struct eb_vma *ev;
int flush;
+ eb->reloc_cache.fence = __dma_fence_create_proxy(0, 0);
+ if (!eb->reloc_cache.fence)
+ return -ENOMEM;
+
+ err = reloc_gpu_alloc(eb);
+ if (err) {
+ dma_fence_put(eb->reloc_cache.fence);
+ eb->reloc_cache.fence = NULL;
+ return err;
+ }
+
+ err = lock_relocs(eb);
list_for_each_entry(ev, &eb->relocs, reloc_link) {
- err = eb_relocate_vma(eb, ev);
- if (err)
- break;
+ if (err == 0)
+ err = eb_relocate_vma(eb, ev);
}
+ GEM_BUG_ON(dma_fence_is_signaled(eb->reloc_cache.fence));
flush = reloc_gpu_flush(&eb->reloc_cache);
if (!err)
@@ -1904,6 +1941,13 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
{
int err;
+ if (eb->reloc_cache.rq) {
+ err = i915_request_await_dma_fence(eb->request,
+ &eb->reloc_cache.rq->fence);
+ if (err)
+ return err;
+ }
+
err = eb_move_to_gpu(eb);
if (err)
return err;
@@ -2461,7 +2505,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err = eb_parse(&eb);
if (err)
- goto err_vma;
+ goto err_reloc;
/*
* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
@@ -2490,9 +2534,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
batch = vma;
}
- /* All GPU relocation batches must be submitted prior to the user rq */
- GEM_BUG_ON(eb.reloc_cache.rq);
-
/* Allocate a request for this batch buffer nice and early. */
eb.request = i915_request_create(eb.context);
if (IS_ERR(eb.request)) {
@@ -2565,6 +2606,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err_parse:
if (batch->private)
intel_gt_buffer_pool_put(batch->private);
+err_reloc:
+ if (eb.reloc_cache.fence) {
+ dma_fence_proxy_set_target(eb.reloc_cache.fence,
+ &eb.reloc_cache.rq->fence);
+ i915_request_put(eb.reloc_cache.rq);
+ dma_fence_put(eb.reloc_cache.fence);
+ }
err_vma:
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
--
2.20.1
More information about the Intel-gfx-trybot
mailing list