[PATCH 3/3] copy client

Matthew Auld matthew.auld at intel.com
Fri May 31 19:13:09 UTC 2019


---
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    | 262 ++++++++++++++++++
 .../gpu/drm/i915/gem/i915_gem_client_blt.h    |   8 +
 .../i915/gem/selftests/i915_gem_client_blt.c  | 120 ++++++++
 3 files changed, 390 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 4899ca1dd76c..063c925099f2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -301,6 +301,268 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
 	return err;
 }
 
+struct copy_pages_work {
+	struct dma_fence dma;
+	struct dma_fence_cb cb;
+	struct i915_sw_fence wait;
+	struct work_struct work;
+	struct irq_work irq_work;
+	struct i915_sleeve *sleeve_src;
+	struct i915_sleeve *sleeve_dst;
+	struct intel_context *ce;
+};
+
+static const char *copy_pages_work_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *copy_pages_work_timeline_name(struct dma_fence *fence)
+{
+	return "clear";
+}
+
+static void copy_pages_work_release(struct dma_fence *fence)
+{
+	struct copy_pages_work *w = container_of(fence, typeof(*w), dma);
+
+	destroy_sleeve(w->sleeve_src);
+	destroy_sleeve(w->sleeve_dst);
+
+	i915_sw_fence_fini(&w->wait);
+
+	BUILD_BUG_ON(offsetof(typeof(*w), dma));
+	dma_fence_free(&w->dma);
+}
+
+static const struct dma_fence_ops copy_pages_work_ops = {
+	.get_driver_name = copy_pages_work_driver_name,
+	.get_timeline_name = copy_pages_work_timeline_name,
+	.release = copy_pages_work_release,
+};
+
+static void copy_pages_signal_irq_worker(struct irq_work *work)
+{
+	struct copy_pages_work *w = container_of(work, typeof(*w), irq_work);
+
+	dma_fence_signal(&w->dma);
+	dma_fence_put(&w->dma);
+}
+
+static void copy_pages_dma_fence_cb(struct dma_fence *fence,
+				    struct dma_fence_cb *cb)
+{
+	struct copy_pages_work *w = container_of(cb, typeof(*w), cb);
+
+	if (fence->error)
+		dma_fence_set_error(&w->dma, fence->error);
+
+	/*
+	 * Push the signalling of the fence into yet another worker to avoid
+	 * the nightmare locking around the fence spinlock.
+	 */
+	irq_work_queue(&w->irq_work);
+}
+
+static void copy_pages_worker(struct work_struct *work)
+{
+	struct copy_pages_work *w = container_of(work, typeof(*w), work);
+	struct drm_i915_private *i915 = w->ce->gem_context->i915;
+	struct drm_i915_gem_object *src = w->sleeve_src->obj;
+	struct drm_i915_gem_object *dst = w->sleeve_dst->obj;
+	struct i915_vma *vma_src = w->sleeve_src->vma;
+	struct i915_vma *vma_dst = w->sleeve_dst->vma;
+	struct i915_request *rq;
+	int err = w->dma.error;
+
+	if (unlikely(err))
+		goto out_signal;
+
+	if (src->cache_dirty) {
+		src->write_domain = 0;
+		if (i915_gem_object_has_struct_page(src))
+			drm_clflush_sg(w->sleeve_src->pages);
+		src->cache_dirty = false;
+	}
+
+	if (dst->cache_dirty) {
+		dst->write_domain = 0;
+		if (i915_gem_object_has_struct_page(dst))
+			drm_clflush_sg(w->sleeve_dst->pages);
+		dst->cache_dirty = false;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_vma_pin(vma_src, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_unlock;
+
+	err = i915_vma_pin(vma_dst, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_unpin_src;
+
+	rq = i915_request_create(w->ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unpin_dst;
+	}
+
+	/* There's no way the fence has signalled */
+	if (dma_fence_add_callback(&rq->fence, &w->cb,
+				   copy_pages_dma_fence_cb))
+		GEM_BUG_ON(1);
+
+	i915_vma_lock(vma_src);
+	err = i915_vma_move_to_active(vma_src, rq, 0);
+	i915_vma_unlock(vma_src);
+	if (err)
+		goto out_request;
+
+	i915_vma_lock(vma_dst);
+	err = i915_vma_move_to_active(vma_dst, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma_dst);
+	if (err)
+		goto out_request;
+
+	if (w->ce->engine->emit_init_breadcrumb) {
+		err = w->ce->engine->emit_init_breadcrumb(rq);
+		if (unlikely(err))
+			goto out_request;
+	}
+
+	err = intel_emit_vma_copy_blt(rq, vma_src, vma_dst);
+out_request:
+	if (unlikely(err)) {
+		i915_request_skip(rq, err);
+		err = 0;
+	}
+
+	i915_request_add(rq);
+out_unpin_dst:
+	i915_vma_unpin(vma_dst);
+out_unpin_src:
+	i915_vma_unpin(vma_src);
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+out_signal:
+	if (unlikely(err)) {
+		dma_fence_set_error(&w->dma, err);
+		dma_fence_signal(&w->dma);
+		dma_fence_put(&w->dma);
+	}
+}
+
+static int __i915_sw_fence_call
+copy_pages_work_notify(struct i915_sw_fence *fence,
+			enum i915_sw_fence_notify state)
+{
+	struct copy_pages_work *w = container_of(fence, typeof(*w), wait);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		schedule_work(&w->work);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&w->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+int i915_gem_schedule_copy_pages_blt(struct drm_i915_gem_object *src,
+				     struct drm_i915_gem_object *dst,
+				     struct intel_context *ce,
+				     struct sg_table *pages_src,
+				     struct sg_table *pages_dst,
+				     struct i915_page_sizes *page_sizes_src,
+				     struct i915_page_sizes *page_sizes_dst)
+{
+	struct drm_i915_private *i915 = to_i915(src->base.dev);
+	struct i915_gem_context *ctx = ce->gem_context;
+	struct i915_sleeve *sleeve_src, *sleeve_dst;
+	struct i915_address_space *vm;
+	struct copy_pages_work *work;
+	int err;
+
+	vm = ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
+
+	sleeve_src = create_sleeve(vm, src, pages_src, page_sizes_src);
+	if (IS_ERR(sleeve_src))
+		return PTR_ERR(sleeve_src);
+
+	sleeve_dst = create_sleeve(vm, dst, pages_dst, page_sizes_dst);
+	if (IS_ERR(sleeve_dst)) {
+		err = PTR_ERR(sleeve_dst);
+		goto err_destroy_src;
+	}
+
+	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	if (!work) {
+		err = -ENOMEM;
+		goto err_destroy_dst;
+	}
+
+	work->sleeve_src = sleeve_src;
+	work->sleeve_dst = sleeve_dst;
+	work->ce = ce;
+
+	INIT_WORK(&work->work, copy_pages_worker);
+
+	init_irq_work(&work->irq_work, copy_pages_signal_irq_worker);
+
+	dma_fence_init(&work->dma,
+		       &copy_pages_work_ops,
+		       &fence_lock,
+		       i915->mm.unordered_timeline,
+		       0);
+	i915_sw_fence_init(&work->wait, copy_pages_work_notify);
+
+	i915_gem_object_lock(src);
+	err = i915_sw_fence_await_reservation(&work->wait,
+					      src->resv, NULL,
+					      false, I915_FENCE_TIMEOUT,
+					      I915_FENCE_GFP);
+	if (err >= 0) {
+		err = reservation_object_reserve_shared(src->resv, 1);
+		if (!err)
+			reservation_object_add_shared_fence(src->resv,
+							    &work->dma);
+	}
+	i915_gem_object_unlock(src);
+
+	if (err) {
+		dma_fence_set_error(&work->dma, err);
+		goto err_commit;
+	}
+
+	i915_gem_object_lock(dst);
+	err = i915_sw_fence_await_reservation(&work->wait,
+					      dst->resv, NULL,
+					      true, I915_FENCE_TIMEOUT,
+					      I915_FENCE_GFP);
+	if (err < 0) {
+		dma_fence_set_error(&work->dma, err);
+	} else {
+		reservation_object_add_excl_fence(dst->resv, &work->dma);
+		err = 0;
+	}
+	i915_gem_object_unlock(dst);
+
+err_commit:
+	dma_fence_get(&work->dma);
+	i915_sw_fence_commit(&work->wait);
+
+	return err;
+
+err_destroy_dst:
+	destroy_sleeve(sleeve_dst);
+err_destroy_src:
+	destroy_sleeve(sleeve_src);
+	return err;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_gem_client_blt.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h
index 3dbd28c22ff5..4cf30cd101ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h
@@ -18,4 +18,12 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
 				     struct i915_page_sizes *page_sizes,
 				     u32 value);
 
+int i915_gem_schedule_copy_pages_blt(struct drm_i915_gem_object *src,
+				     struct drm_i915_gem_object *dst,
+				     struct intel_context *ce,
+				     struct sg_table *pages_src,
+				     struct sg_table *pages_dst,
+				     struct i915_page_sizes *page_sizes_src,
+				     struct i915_page_sizes *page_sizes_dst);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index f3a5eb807c1c..2c094939eab9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -111,10 +111,130 @@ static int igt_client_fill(void *arg)
 	return err;
 }
 
+static int igt_client_copy(void *arg)
+{
+	struct intel_context *ce = arg;
+	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_gem_object *src, *dst;
+	struct rnd_state prng;
+	IGT_TIMEOUT(end);
+	u32 *vaddr;
+	int err = 0;
+
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+
+	do {
+		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		u32 val = prandom_u32_state(&prng);
+		u32 i;
+
+		sz = round_up(sz, PAGE_SIZE);
+
+		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+
+		src = i915_gem_object_create_internal(i915, sz);
+		if (IS_ERR(src)) {
+			err = PTR_ERR(vaddr);
+			goto err_flush;
+		}
+
+		vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_src;
+		}
+
+		memset32(vaddr, val, src->base.size / sizeof(u32));
+
+		i915_gem_object_unpin_map(src);
+
+		if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+			src->cache_dirty = true;
+
+		dst = i915_gem_object_create_internal(i915, sz);
+		if (IS_ERR(dst)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_src;
+		}
+
+		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_dst;
+		}
+
+		memset32(vaddr, val ^ 0xdeadbeaf, dst->base.size / sizeof(u32));
+
+		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+			dst->cache_dirty = true;
+
+		mutex_lock(&i915->drm.struct_mutex);
+		err = i915_gem_schedule_copy_pages_blt(src, dst, ce,
+						       src->mm.pages,
+						       dst->mm.pages,
+						       &src->mm.page_sizes,
+						       &dst->mm.page_sizes);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err)
+			goto err_unpin;
+
+		/*
+		 * XXX: For now do the wait without the object resv lock to
+		 * ensure we don't deadlock.
+		 */
+		err = i915_gem_object_wait(dst,
+					   I915_WAIT_INTERRUPTIBLE |
+					   I915_WAIT_ALL,
+					   MAX_SCHEDULE_TIMEOUT);
+		if (err)
+			goto err_unpin;
+
+		i915_gem_object_lock(dst);
+		err = i915_gem_object_set_to_cpu_domain(dst, false);
+		i915_gem_object_unlock(dst);
+		if (err)
+			goto err_unpin;
+
+		for (i = 0; i < dst->base.size / sizeof(u32); ++i) {
+			if (vaddr[i] != val) {
+				pr_err("vaddr[%u]=%x, expected=%x\n", i,
+				       vaddr[i], val);
+				err = -EINVAL;
+				goto err_unpin;
+			}
+		}
+
+		i915_gem_object_unpin_map(dst);
+
+		i915_gem_object_put(src);
+		i915_gem_object_put(dst);
+	} while (!time_after(jiffies, end));
+
+	goto err_flush;
+
+err_unpin:
+	i915_gem_object_unpin_map(dst);
+err_put_dst:
+	i915_gem_object_put(dst);
+err_put_src:
+	i915_gem_object_put(src);
+err_flush:
+	mutex_lock(&i915->drm.struct_mutex);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
 int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_client_fill),
+		SUBTEST(igt_client_copy),
 	};
 
 	if (i915_terminally_wedged(i915))
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list