[PATCH 12/14] drm/i915: Lockless object unreference and free
Chris Wilson
chris at chris-wilson.co.uk
Sat Aug 6 13:34:06 UTC 2016
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/dma-buf/fence.c | 58 +++
drivers/dma-buf/reservation.c | 48 +++
drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
drivers/gpu/drm/i915/i915_drv.c | 2 +-
drivers/gpu/drm/i915/i915_drv.h | 12 +-
drivers/gpu/drm/i915/i915_gem.c | 32 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 66 +++-
drivers/gpu/drm/i915/i915_gem_gtt.c | 54 +--
drivers/gpu/drm/i915/i915_gem_gtt.h | 1 -
drivers/gpu/drm/i915/i915_gem_request.c | 12 +-
drivers/gpu/drm/i915/i915_gem_request.h | 24 +-
drivers/gpu/drm/i915/i915_gem_tiling.c | 6 +-
drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +-
drivers/gpu/drm/i915/intel_display.c | 161 ++++----
drivers/gpu/drm/i915/intel_drv.h | 7 +-
drivers/gpu/drm/i915/intel_fbdev.c | 21 +-
drivers/gpu/drm/i915/intel_guc_loader.c | 3 -
drivers/gpu/drm/i915/intel_lrc.c | 419 +++++++--------------
drivers/gpu/drm/i915/intel_lrc.h | 2 -
drivers/gpu/drm/i915/intel_overlay.c | 4 +-
drivers/gpu/drm/i915/intel_pm.c | 2 +-
drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +-
include/linux/fence.h | 6 +
include/linux/kfence.h | 86 +++++
include/linux/reservation.h | 7 +
kernel/Makefile | 2 +-
kernel/kfence.c | 497 ++++++++++++++++++++++++
lib/Kconfig.debug | 23 ++
lib/Makefile | 1 +
lib/test-kfence.c | 580 +++++++++++++++++++++++++++++
tools/testing/selftests/lib/kfence.sh | 10 +
31 files changed, 1683 insertions(+), 476 deletions(-)
create mode 100644 include/linux/kfence.h
create mode 100644 kernel/kfence.c
create mode 100644 lib/test-kfence.c
create mode 100755 tools/testing/selftests/lib/kfence.sh
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 4d51f9e83fa8..97091894ff16 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -22,6 +22,7 @@
#include <linux/export.h>
#include <linux/atomic.h>
#include <linux/fence.h>
+#include <linux/kfence.h>
#define CREATE_TRACE_POINTS
#include <trace/events/fence.h>
@@ -530,3 +531,60 @@ fence_init(struct fence *fence, const struct fence_ops *ops,
trace_fence_init(fence);
}
EXPORT_SYMBOL(fence_init);
+
+struct dma_fence_cb {
+ struct fence_cb base;
+ struct kfence *fence;
+};
+
+static void dma_kfence_wake(struct fence *dma, struct fence_cb *data)
+{
+ struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+ kfence_complete(cb->fence);
+ kfence_put(cb->fence);
+ kfree(cb);
+}
+
+/**
+ * kfence_await_dma_fence - set the fence to wait upon a DMA fence
+ * @fence: this kfence
+ * @dma: target DMA fence to wait upon
+ * @gfp: the allowed allocation type
+ *
+ * kfence_add_dma() causes the @fence to wait upon completion of a DMA fence.
+ *
+ * Returns 1 if the @fence was successfully to the waitqueue of @dma, 0
+ * if @dma was already signaled (and so not added), or a negative error code.
+ */
+int kfence_await_dma_fence(struct kfence *fence, struct fence *dma, gfp_t gfp)
+{
+ struct dma_fence_cb *cb;
+ int ret;
+
+ if (fence_is_signaled(dma))
+ return 0;
+
+ cb = kmalloc(sizeof(*cb), gfp);
+ if (!cb) {
+ if (!gfpflags_allow_blocking(gfp))
+ return -ENOMEM;
+
+ return fence_wait(dma, false);
+ }
+
+ cb->fence = kfence_get(fence);
+ kfence_await(fence);
+
+ ret = fence_add_callback(dma, &cb->base, dma_kfence_wake);
+ if (ret == 0) {
+ ret = 1;
+ } else {
+ dma_kfence_wake(dma, &cb->base);
+ if (ret == -ENOENT) /* fence already signaled */
+ ret = 0;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kfence_await_dma_fence);
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 9566a62ad8e3..138b792af0c3 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -543,3 +543,51 @@ unlock_retry:
goto retry;
}
EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu);
+
+/**
+ * kfence_add_reservation - set the fence to wait upon a reservation_object
+ * @fence: this kfence
+ * @resv: target reservation_object (collection of DMA fences) to wait upon
+ * @write: Wait for read or read/write access
+ * @gfp: the allowed allocation type
+ *
+ * kfence_add_reservation() causes the @fence to wait upon completion of the
+ * reservation object (a collection of DMA fences), either for read access
+ * or for read/write access.
+ *
+ * Returns 1 if the @fence was successfully to the waitqueues of @resv, 0
+ * if @resev was already signaled (and so not added), or a negative error code.
+ */
+int kfence_await_reservation(struct kfence *fence,
+ struct reservation_object *resv,
+ bool write,
+ gfp_t gfp)
+{
+ struct fence *excl, **shared;
+ unsigned int count, i;
+ int ret;
+
+ ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
+ if (ret)
+ return ret;
+
+ if (write) {
+ for (i = 0; i < count; i++) {
+ ret |= kfence_await_dma_fence(fence, shared[i], gfp);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ if (excl)
+ ret |= kfence_await_dma_fence(fence, excl, gfp);
+
+out:
+ fence_put(excl);
+ for (i = 0; i < count; i++)
+ fence_put(shared[i]);
+ kfree(shared);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kfence_await_reservation);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 60bcdb228207..34f44050af25 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2190,7 +2190,7 @@ static int i915_execlists(struct seq_file *m, void *data)
status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
- read_pointer = engine->next_context_status_buffer;
+ read_pointer = GEN8_CSB_READ_PTR(status_pointer);
write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
if (read_pointer > write_pointer)
write_pointer += GEN8_CSB_ENTRIES;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ccae2ec4c78e..4036dbe1334a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2568,7 +2568,7 @@ static struct drm_driver driver = {
.set_busid = drm_pci_set_busid,
.gem_close_object = i915_gem_close_object,
- .gem_free_object = i915_gem_free_object,
+ .gem_free_object_unlocked = i915_gem_free_object,
.gem_vm_ops = &i915_gem_vm_ops,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e8778c97eaa5..568ff1b35f8b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2275,7 +2275,7 @@ struct drm_i915_gem_object {
struct i915_gem_active last_write;
/** References from framebuffers, locks out tiling changes. */
- unsigned long framebuffer_references;
+ atomic_t framebuffer_references;
/** Record of address bit 17 of each page at last unbind. */
unsigned long *bit_17;
@@ -2331,19 +2331,11 @@ __attribute__((nonnull))
static inline void
i915_gem_object_put(struct drm_i915_gem_object *obj)
{
- drm_gem_object_unreference(&obj->base);
+ __drm_gem_object_unreference(&obj->base);
}
__deprecated
extern void drm_gem_object_unreference(struct drm_gem_object *);
-
-__attribute__((nonnull))
-static inline void
-i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj)
-{
- drm_gem_object_unreference_unlocked(&obj->base);
-}
-
__deprecated
extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1cb71630d61a..c03fea6b0677 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -501,7 +501,7 @@ i915_gem_create(struct drm_file *file,
ret = drm_gem_handle_create(file, &obj->base, &handle);
/* drop reference from allocate - handle holds it now */
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
if (ret)
return ret;
@@ -981,7 +981,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
i915_gem_object_unpin_pages(obj);
out:
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return ret;
}
@@ -1310,7 +1310,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
i915_gem_object_unpin_pages(obj);
err:
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return ret;
}
@@ -1387,7 +1387,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
err_pages:
i915_gem_object_unpin_pages(obj);
err_unlocked:
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return ret;
}
@@ -1418,7 +1418,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
}
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return err;
}
@@ -1464,7 +1464,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
* pages from.
*/
if (!obj->base.filp) {
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return -EINVAL;
}
@@ -1476,7 +1476,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct vm_area_struct *vma;
if (down_write_killable(&mm->mmap_sem)) {
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return -EINTR;
}
vma = find_vma(mm, addr);
@@ -1490,7 +1490,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
/* This may race, but that's ok, it only gets set */
WRITE_ONCE(obj->has_wc_mmap, true);
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
if (IS_ERR((void *)addr))
return addr;
@@ -1841,7 +1841,7 @@ i915_gem_mmap_gtt(struct drm_file *file,
if (ret == 0)
*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return ret;
}
@@ -2409,7 +2409,11 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
/* Ensure irq handler finishes or is cancelled. */
tasklet_kill(&engine->irq_tasklet);
- intel_execlists_cancel_requests(engine);
+ INIT_LIST_HEAD(&engine->execlist_queue);
+ i915_gem_request_assign(&engine->execlist_port[0].request,
+ NULL);
+ i915_gem_request_assign(&engine->execlist_port[1].request,
+ NULL);
}
/*
@@ -2610,7 +2614,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
break;
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return ret;
}
@@ -3311,7 +3315,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
break;
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return 0;
}
@@ -3828,7 +3832,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
rcu_read_unlock();
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return 0;
}
@@ -3884,7 +3888,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&obj->mm.lock);
err:
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 520145cb2580..150a6cff6f2f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1352,6 +1352,68 @@ static unsigned int eb_other_engines(struct i915_execbuffer *eb)
}
static int
+__eb_sync(struct drm_i915_gem_request *to,
+ struct drm_i915_gem_request *from)
+{
+ int idx, ret;
+
+ if (to->engine == from->engine)
+ return 0;
+
+ idx = intel_engine_sync_index(from->engine, to->engine);
+ if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+ return 0;
+
+ trace_i915_gem_ring_sync_to(to, from);
+ if (!i915.semaphores) {
+ ret = i915_wait_request(from, true, NULL, NO_WAITBOOST);
+ if (ret)
+ return ret;
+ } else {
+ ret = to->engine->semaphore.sync_to(to, from);
+ if (ret)
+ return ret;
+ }
+
+ from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+ return 0;
+}
+
+static int
+eb_sync(struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_request *to,
+ bool write)
+{
+ struct i915_gem_active *active;
+ unsigned long active_mask;
+ int idx;
+
+ if (write) {
+ active_mask = i915_gem_object_get_active(obj);
+ active = obj->last_read;
+ } else {
+ active_mask = 1;
+ active = &obj->last_write;
+ }
+
+ for_each_active(active_mask, idx) {
+ struct drm_i915_gem_request *request;
+ int ret;
+
+ request = i915_gem_active_peek(&active[idx],
+ &obj->base.dev->struct_mutex);
+ if (!request)
+ continue;
+
+ ret = __eb_sync(to, request);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
eb_move_to_gpu(struct i915_execbuffer *eb)
{
const unsigned int other_rings = eb_other_engines(eb);
@@ -1366,8 +1428,8 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
struct drm_i915_gem_object *obj = vma->obj;
if (obj->flags & other_rings) {
- ret = i915_gem_object_sync(obj, eb->request,
- entry->flags & EXEC_OBJECT_WRITE);
+ ret = eb_sync(obj, eb->request,
+ entry->flags & EXEC_OBJECT_WRITE);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 3aeced17d369..0b7e7a6bb65c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2515,14 +2515,16 @@ static int ggtt_bind_vma(struct i915_vma *vma,
{
struct drm_i915_private *i915 = to_i915(vma->vm->dev);
struct drm_i915_gem_object *obj = vma->obj;
- u32 pte_flags = 0;
- int ret;
+ u32 pte_flags;
- ret = i915_get_ggtt_vma_pages(vma);
- if (ret)
- return ret;
+ if (!vma->ggtt_view.pages) {
+ int ret = i915_get_ggtt_vma_pages(vma);
+ if (ret)
+ return ret;
+ }
/* Currently applicable only to VLV */
+ pte_flags = 0;
if (obj->gt_ro)
pte_flags |= PTE_READ_ONLY;
@@ -2548,18 +2550,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
{
struct drm_i915_private *i915 = to_i915(vma->vm->dev);
u32 pte_flags;
- int ret;
- ret = i915_get_ggtt_vma_pages(vma);
- if (ret)
- return ret;
+ if (!vma->ggtt_view.pages) {
+ int ret = i915_get_ggtt_vma_pages(vma);
+ if (ret)
+ return ret;
+ }
/* Currently applicable only to VLV */
pte_flags = 0;
if (vma->obj->gt_ro)
pte_flags |= PTE_READ_ONLY;
-
if (flags & I915_VMA_GLOBAL_BIND) {
intel_runtime_pm_get(i915);
vma->vm->insert_entries(vma->vm,
@@ -3465,7 +3467,7 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
return sg;
}
-static struct sg_table *
+noinline static struct sg_table *
intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
struct drm_i915_gem_object *obj)
{
@@ -3555,7 +3557,7 @@ err_st_alloc:
return ERR_PTR(ret);
}
-static struct sg_table *
+noinline static struct sg_table *
intel_partial_pages(const struct i915_ggtt_view *view,
struct drm_i915_gem_object *obj)
{
@@ -3599,34 +3601,36 @@ err_st_alloc:
static int
i915_get_ggtt_vma_pages(struct i915_vma *vma)
{
- int ret = 0;
+ int ret;
- if (vma->ggtt_view.pages)
+ switch (vma->ggtt_view.type) {
+ case I915_GGTT_VIEW_NORMAL:
+ vma->ggtt_view.pages = vma->obj->mm.pages;
return 0;
- if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
- vma->ggtt_view.pages = vma->obj->mm.pages;
- else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
+ case I915_GGTT_VIEW_ROTATED:
vma->ggtt_view.pages =
intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
- else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
+ break;
+
+ case I915_GGTT_VIEW_PARTIAL:
vma->ggtt_view.pages =
intel_partial_pages(&vma->ggtt_view, vma->obj);
- else
+ break;
+
+ default:
WARN_ONCE(1, "GGTT view %u not implemented!\n",
vma->ggtt_view.type);
+ return -EINVAL;
+ }
- if (!vma->ggtt_view.pages) {
- DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
- vma->ggtt_view.type);
- ret = -EINVAL;
- } else if (IS_ERR(vma->ggtt_view.pages)) {
+ ret = 0;
+ if (IS_ERR(vma->ggtt_view.pages)) {
ret = PTR_ERR(vma->ggtt_view.pages);
vma->ggtt_view.pages = NULL;
DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
vma->ggtt_view.type, ret);
}
-
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 05b5eda5c4a3..da71fdfd97ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -465,7 +465,6 @@ struct i915_hw_ppgtt {
gen6_pte_t __iomem *pd_addr;
- int (*enable)(struct i915_hw_ppgtt *ppgtt);
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req);
void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8f98a7e34212..c23008c63470 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -380,7 +380,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
req->previous_context = NULL;
req->file_priv = NULL;
req->batch = NULL;
- req->elsp_submitted = 0;
/*
* Reserve space in the ring buffer for all the commands required to
@@ -725,16 +724,18 @@ complete:
return ret;
}
-static void engine_retire_requests(struct intel_engine_cs *engine)
+static bool engine_retire_requests(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request, *next;
list_for_each_entry_safe(request, next, &engine->request_list, link) {
if (!i915_gem_request_completed(request))
- break;
+ return false;
i915_gem_request_retire(request);
}
+
+ return true;
}
void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
@@ -748,9 +749,8 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
GEM_BUG_ON(!dev_priv->gt.awake);
- for_each_engine(engine, dev_priv) {
- engine_retire_requests(engine);
- if (!intel_engine_is_active(engine))
+ for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines) {
+ if (engine_retire_requests(engine))
dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 1f83a38e69b4..edca7f74d471 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -101,6 +101,9 @@ struct drm_i915_gem_request {
/** Position in the ringbuffer of the end of the whole request */
u32 tail;
+ /** Position in the ringbuffer after the end of the whole request */
+ u32 wa_tail;
+
/** Preallocate space in the ringbuffer for the emitting the request */
u32 reserved_space;
@@ -134,27 +137,8 @@ struct drm_i915_gem_request {
/** file_priv list entry for this request */
struct list_head client_link;
- /**
- * The ELSP only accepts two elements at a time, so we queue
- * context/tail pairs on a given queue (ring->execlist_queue) until the
- * hardware is available. The queue serves a double purpose: we also use
- * it to keep track of the up to 2 contexts currently in the hardware
- * (usually one in execution and the other queued up by the GPU): We
- * only remove elements from the head of the queue when the hardware
- * informs us that an element has been completed.
- *
- * All accesses to the queue are mediated by a spinlock
- * (ring->execlist_lock).
- */
-
- /** Execlist link in the submission queue.*/
+ /** Link in the execlist submission queue, guarded by execlist_lock. */
struct list_head execlist_link;
-
- /** Execlists no. of times this request has been sent to the ELSP */
- int elsp_submitted;
-
- /** Execlists context hardware id. */
- unsigned int ctx_hw_id;
};
extern const struct fence_ops i915_fence_ops;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index a4a5cb6f1781..b3512cf9c921 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -200,12 +200,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
if (!i915_tiling_ok(dev,
args->stride, obj->base.size, args->tiling_mode)) {
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return -EINVAL;
}
mutex_lock(&dev->struct_mutex);
- if (obj->pin_display || obj->framebuffer_references) {
+ if (obj->pin_display || atomic_read(&obj->framebuffer_references)) {
ret = -EBUSY;
goto err;
}
@@ -353,6 +353,6 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 257cc3cc8468..4a00c7673bed 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -550,7 +550,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
release_pages(pvec, pinned, 0);
drm_free_large(pvec);
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
put_task_struct(work->task);
kfree(work);
}
@@ -810,7 +810,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
ret = drm_gem_handle_create(file, &obj->base, &handle);
/* drop reference from allocate - handle holds it now */
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 71ad09ac374b..9979fce88ea7 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -97,10 +97,9 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
static void ironlake_pch_clock_get(struct intel_crtc *crtc,
struct intel_crtc_state *pipe_config);
-static int intel_framebuffer_init(struct drm_device *dev,
- struct intel_framebuffer *ifb,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj);
+static int intel_framebuffer_init(struct intel_framebuffer *ifb,
+ struct drm_i915_gem_object *obj,
+ struct drm_mode_fb_cmd2 *mode_cmd);
static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc);
static void intel_set_pipe_timings(struct intel_crtc *intel_crtc);
static void intel_set_pipe_src_size(struct intel_crtc *intel_crtc);
@@ -2114,11 +2113,13 @@ static void intel_tile_dims(const struct drm_i915_private *dev_priv,
}
unsigned int
-intel_fb_align_height(struct drm_device *dev, unsigned int height,
- uint32_t pixel_format, uint64_t fb_modifier)
+intel_fb_align_height(struct drm_i915_private *dev_priv,
+ unsigned int height,
+ uint32_t pixel_format,
+ uint64_t fb_modifier)
{
unsigned int cpp = drm_format_plane_cpp(pixel_format, 0);
- unsigned int tile_height = intel_tile_height(to_i915(dev), fb_modifier, cpp);
+ unsigned int tile_height = intel_tile_height(dev_priv, fb_modifier, cpp);
return ALIGN(height, tile_height);
}
@@ -2446,15 +2447,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
return false;
mutex_lock(&dev->struct_mutex);
-
obj = i915_gem_object_create_stolen_for_preallocated(dev,
base_aligned,
base_aligned,
size_aligned);
- if (!obj) {
- mutex_unlock(&dev->struct_mutex);
+ mutex_unlock(&dev->struct_mutex);
+ if (!obj)
return false;
- }
if (plane_config->tiling == I915_TILING_X)
obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X;
@@ -2466,13 +2465,11 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
mode_cmd.modifier[0] = fb->modifier[0];
mode_cmd.flags = DRM_MODE_FB_MODIFIERS;
- if (intel_framebuffer_init(dev, to_intel_framebuffer(fb),
- &mode_cmd, obj)) {
+ if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) {
DRM_DEBUG_KMS("intel fb init failed\n");
goto out_unref_obj;
}
- mutex_unlock(&dev->struct_mutex);
DRM_DEBUG_KMS("initial plane fb obj %p\n", obj);
return true;
@@ -8156,7 +8153,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
val = I915_READ(DSPSTRIDE(pipe));
fb->pitches[0] = val & 0xffffffc0;
- aligned_height = intel_fb_align_height(dev, fb->height,
+ aligned_height = intel_fb_align_height(dev_priv,
+ fb->height,
fb->pixel_format,
fb->modifier[0]);
@@ -9180,7 +9178,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
fb->pixel_format);
fb->pitches[0] = (val & 0x3ff) * stride_mult;
- aligned_height = intel_fb_align_height(dev, fb->height,
+ aligned_height = intel_fb_align_height(dev_priv,
+ fb->height,
fb->pixel_format,
fb->modifier[0]);
@@ -9277,7 +9276,8 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc,
val = I915_READ(DSPSTRIDE(pipe));
fb->pitches[0] = val & 0xffffffc0;
- aligned_height = intel_fb_align_height(dev, fb->height,
+ aligned_height = intel_fb_align_height(dev_priv,
+ fb->height,
fb->pixel_format,
fb->modifier[0]);
@@ -10348,9 +10348,8 @@ static struct drm_display_mode load_detect_mode = {
};
struct drm_framebuffer *
-__intel_framebuffer_create(struct drm_device *dev,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj)
+intel_framebuffer_create(struct drm_i915_gem_object *obj,
+ struct drm_mode_fb_cmd2 *mode_cmd)
{
struct intel_framebuffer *intel_fb;
int ret;
@@ -10359,7 +10358,7 @@ __intel_framebuffer_create(struct drm_device *dev,
if (!intel_fb)
return ERR_PTR(-ENOMEM);
- ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj);
+ ret = intel_framebuffer_init(intel_fb, obj, mode_cmd);
if (ret)
goto err;
@@ -10370,23 +10369,6 @@ err:
return ERR_PTR(ret);
}
-static struct drm_framebuffer *
-intel_framebuffer_create(struct drm_device *dev,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj)
-{
- struct drm_framebuffer *fb;
- int ret;
-
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ERR_PTR(ret);
- fb = __intel_framebuffer_create(dev, mode_cmd, obj);
- mutex_unlock(&dev->struct_mutex);
-
- return fb;
-}
-
static u32
intel_framebuffer_pitch_for_width(int width, int bpp)
{
@@ -10421,9 +10403,9 @@ intel_framebuffer_create_for_mode(struct drm_device *dev,
bpp);
mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth);
- fb = intel_framebuffer_create(dev, &mode_cmd, obj);
+ fb = intel_framebuffer_create(obj, &mode_cmd);
if (IS_ERR(fb))
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return fb;
}
@@ -11737,7 +11719,7 @@ cleanup:
crtc->primary->fb = old_fb;
update_state_fb(crtc->primary);
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
drm_framebuffer_unreference(work->old_fb);
spin_lock_irq(&dev->event_lock);
@@ -14833,14 +14815,14 @@ static void intel_setup_outputs(struct drm_device *dev)
static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
{
- struct drm_device *dev = fb->dev;
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
drm_framebuffer_cleanup(fb);
- mutex_lock(&dev->struct_mutex);
- WARN_ON(!intel_fb->obj->framebuffer_references--);
+
+ WARN_ON(atomic_read(&intel_fb->obj->framebuffer_references) == 0);
+ atomic_dec(&intel_fb->obj->framebuffer_references);
i915_gem_object_put(intel_fb->obj);
- mutex_unlock(&dev->struct_mutex);
+
kfree(intel_fb);
}
@@ -14883,10 +14865,11 @@ static const struct drm_framebuffer_funcs intel_fb_funcs = {
};
static
-u32 intel_fb_pitch_limit(struct drm_device *dev, uint64_t fb_modifier,
+u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv,
+ uint64_t fb_modifier,
uint32_t pixel_format)
{
- u32 gen = INTEL_INFO(dev)->gen;
+ u32 gen = INTEL_GEN(dev_priv);
if (gen >= 9) {
int cpp = drm_format_plane_cpp(pixel_format, 0);
@@ -14895,7 +14878,7 @@ u32 intel_fb_pitch_limit(struct drm_device *dev, uint64_t fb_modifier,
* pixels and 32K bytes."
*/
return min(8192 * cpp, 32768);
- } else if (gen >= 5 && !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
+ } else if (gen >= 5 && !IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) {
return 32*1024;
} else if (gen >= 4) {
if (fb_modifier == I915_FORMAT_MOD_X_TILED)
@@ -14913,17 +14896,16 @@ u32 intel_fb_pitch_limit(struct drm_device *dev, uint64_t fb_modifier,
}
}
-static int intel_framebuffer_init(struct drm_device *dev,
- struct intel_framebuffer *intel_fb,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj)
+static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
+ struct drm_i915_gem_object *obj,
+ struct drm_mode_fb_cmd2 *mode_cmd)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- unsigned int aligned_height;
- int ret;
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
u32 pitch_limit, stride_alignment;
+ unsigned int aligned_height;
+ int ret = -EINVAL;
- WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+ atomic_inc(&obj->framebuffer_references);
if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
/* Enforce that fb modifier and tiling mode match, but only for
@@ -14931,14 +14913,14 @@ static int intel_framebuffer_init(struct drm_device *dev,
if (!!(i915_gem_object_get_tiling(obj) == I915_TILING_X) !=
!!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) {
DRM_DEBUG("tiling_mode doesn't match fb modifier\n");
- return -EINVAL;
+ goto err;
}
} else {
if (i915_gem_object_get_tiling(obj) == I915_TILING_X)
mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
else if (i915_gem_object_get_tiling(obj) == I915_TILING_Y) {
DRM_DEBUG("No Y tiling for legacy addfb\n");
- return -EINVAL;
+ goto err;
}
}
@@ -14946,10 +14928,10 @@ static int intel_framebuffer_init(struct drm_device *dev,
switch (mode_cmd->modifier[0]) {
case I915_FORMAT_MOD_Y_TILED:
case I915_FORMAT_MOD_Yf_TILED:
- if (INTEL_INFO(dev)->gen < 9) {
+ if (INTEL_GEN(dev_priv) < 9) {
DRM_DEBUG("Unsupported tiling 0x%llx!\n",
mode_cmd->modifier[0]);
- return -EINVAL;
+ goto err;
}
case DRM_FORMAT_MOD_NONE:
case I915_FORMAT_MOD_X_TILED:
@@ -14957,7 +14939,7 @@ static int intel_framebuffer_init(struct drm_device *dev,
default:
DRM_DEBUG("Unsupported fb modifier 0x%llx!\n",
mode_cmd->modifier[0]);
- return -EINVAL;
+ goto err;
}
stride_alignment = intel_fb_stride_alignment(dev_priv,
@@ -14966,17 +14948,18 @@ static int intel_framebuffer_init(struct drm_device *dev,
if (mode_cmd->pitches[0] & (stride_alignment - 1)) {
DRM_DEBUG("pitch (%d) must be at least %u byte aligned\n",
mode_cmd->pitches[0], stride_alignment);
- return -EINVAL;
+ goto err;
}
- pitch_limit = intel_fb_pitch_limit(dev, mode_cmd->modifier[0],
+ pitch_limit = intel_fb_pitch_limit(dev_priv,
+ mode_cmd->modifier[0],
mode_cmd->pixel_format);
if (mode_cmd->pitches[0] > pitch_limit) {
DRM_DEBUG("%s pitch (%u) must be at less than %d\n",
mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ?
"tiled" : "linear",
mode_cmd->pitches[0], pitch_limit);
- return -EINVAL;
+ goto err;
}
if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED &&
@@ -14984,7 +14967,7 @@ static int intel_framebuffer_init(struct drm_device *dev,
DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n",
mode_cmd->pitches[0],
i915_gem_object_get_stride(obj));
- return -EINVAL;
+ goto err;
}
/* Reject formats not supported by any plane early. */
@@ -14995,77 +14978,83 @@ static int intel_framebuffer_init(struct drm_device *dev,
case DRM_FORMAT_ARGB8888:
break;
case DRM_FORMAT_XRGB1555:
- if (INTEL_INFO(dev)->gen > 3) {
+ if (INTEL_GEN(dev_priv) > 3) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
case DRM_FORMAT_ABGR8888:
- if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
- INTEL_INFO(dev)->gen < 9) {
+ if (!IS_VALLEYVIEW(dev_priv) &&
+ !IS_CHERRYVIEW(dev_priv) &&
+ INTEL_GEN(dev_priv) < 9) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_XRGB2101010:
case DRM_FORMAT_XBGR2101010:
- if (INTEL_INFO(dev)->gen < 4) {
+ if (INTEL_GEN(dev_priv) < 4) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
case DRM_FORMAT_ABGR2101010:
- if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
+ if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
case DRM_FORMAT_YUYV:
case DRM_FORMAT_UYVY:
case DRM_FORMAT_YVYU:
case DRM_FORMAT_VYUY:
- if (INTEL_INFO(dev)->gen < 5) {
+ if (INTEL_GEN(dev_priv) < 5) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
default:
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
/* FIXME need to adjust LINOFF/TILEOFF accordingly. */
if (mode_cmd->offsets[0] != 0)
- return -EINVAL;
+ goto err;
- aligned_height = intel_fb_align_height(dev, mode_cmd->height,
+ aligned_height = intel_fb_align_height(dev_priv,
+ mode_cmd->height,
mode_cmd->pixel_format,
mode_cmd->modifier[0]);
/* FIXME drm helper for size checks (especially planar formats)? */
if (obj->base.size < aligned_height * mode_cmd->pitches[0])
- return -EINVAL;
+ goto err;
drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
intel_fb->obj = obj;
intel_fill_fb_info(dev_priv, &intel_fb->base);
- ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
+ ret = drm_framebuffer_init(obj->base.dev,
+ &intel_fb->base,
+ &intel_fb_funcs);
if (ret) {
DRM_ERROR("framebuffer init failed %d\n", ret);
- return ret;
+ goto err;
}
- intel_fb->obj->framebuffer_references++;
-
return 0;
+
+err:
+ atomic_dec(&obj->framebuffer_references);
+ return ret;
}
static struct drm_framebuffer *
@@ -15081,9 +15070,9 @@ intel_user_framebuffer_create(struct drm_device *dev,
if (!obj)
return ERR_PTR(-ENOENT);
- fb = intel_framebuffer_create(dev, &mode_cmd, obj);
+ fb = intel_framebuffer_create(obj, &mode_cmd);
if (IS_ERR(fb))
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return fb;
}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a4870eac84c1..69b2a735deea 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1136,7 +1136,7 @@ void intel_ddi_clock_get(struct intel_encoder *encoder,
void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
-unsigned int intel_fb_align_height(struct drm_device *dev,
+unsigned int intel_fb_align_height(struct drm_i915_private *dev_priv,
unsigned int height,
uint32_t pixel_format,
uint64_t fb_format_modifier);
@@ -1222,9 +1222,8 @@ struct i915_vma *
intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
struct drm_framebuffer *
-__intel_framebuffer_create(struct drm_device *dev,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj);
+intel_framebuffer_create(struct drm_i915_gem_object *obj,
+ struct drm_mode_fb_cmd2 *mode_cmd);
void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe);
void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe);
void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe);
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 692bf75db3bd..2893a2d67403 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -125,7 +125,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct drm_mode_fb_cmd2 mode_cmd = {};
- struct drm_i915_gem_object *obj = NULL;
+ struct drm_i915_gem_object *obj;
int size, ret;
/* we don't do packed 24bpp */
@@ -140,14 +140,13 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
sizes->surface_depth);
- mutex_lock(&dev->struct_mutex);
-
size = mode_cmd.pitches[0] * mode_cmd.height;
size = PAGE_ALIGN(size);
/* If the FB is too big, just don't use it since fbdev is not very
* important and we should probably use that space with FBC or other
* features. */
+ obj = NULL;
if (size * 2 < ggtt->stolen_usable_size)
obj = i915_gem_object_create_stolen(dev, size);
if (obj == NULL)
@@ -155,24 +154,22 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
if (IS_ERR(obj)) {
DRM_ERROR("failed to allocate framebuffer\n");
ret = PTR_ERR(obj);
- goto out;
+ goto err;
}
- fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
+ fb = intel_framebuffer_create(obj, &mode_cmd);
if (IS_ERR(fb)) {
- i915_gem_object_put(obj);
ret = PTR_ERR(fb);
- goto out;
+ goto err_obj;
}
- mutex_unlock(&dev->struct_mutex);
-
ifbdev->fb = to_intel_framebuffer(fb);
return 0;
-out:
- mutex_unlock(&dev->struct_mutex);
+err_obj:
+ i915_gem_object_put(obj);
+err:
return ret;
}
@@ -634,7 +631,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
}
cur_size = intel_crtc->config->base.adjusted_mode.crtc_vdisplay;
- cur_size = intel_fb_align_height(dev, cur_size,
+ cur_size = intel_fb_align_height(to_i915(dev), cur_size,
fb->base.pixel_format,
fb->base.modifier[0]);
cur_size *= fb->base.pitches[0];
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 7f17edfe5cb5..78288f0af669 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -661,9 +661,6 @@ fail:
DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n",
guc_fw->guc_fw_path, err);
- i915_gem_object_put_unlocked(guc_fw->guc_fw_obj);
- guc_fw->guc_fw_obj = NULL;
-
release_firmware(fw); /* OK even if fw is NULL */
guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 75ddaf372045..c1189962258e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -156,6 +156,11 @@
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN8_CTX_STATUS_COMPLETED_MASK \
+ (GEN8_CTX_STATUS_ACTIVE_IDLE | \
+ GEN8_CTX_STATUS_PREEMPTED | \
+ GEN8_CTX_STATUS_ELEMENT_SWITCH)
+
#define CTX_LRI_HEADER_0 0x01
#define CTX_CONTEXT_CONTROL 0x02
#define CTX_RING_HEAD 0x04
@@ -263,12 +268,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv))
- engine->idle_lite_restore_wa = ~0;
-
- engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
- IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
- (engine->id == VCS || engine->id == VCS2);
+ engine->disable_lite_restore_wa =
+ (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
+ IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
+ (engine->id == VCS || engine->id == VCS2);
engine->ctx_desc_template = GEN8_CTX_VALID;
if (IS_GEN8(dev_priv))
@@ -328,36 +331,6 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
return ctx->engine[engine->id].lrc_desc;
}
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
- struct drm_i915_gem_request *rq1)
-{
-
- struct intel_engine_cs *engine = rq0->engine;
- struct drm_i915_private *dev_priv = rq0->i915;
- uint64_t desc[2];
-
- if (rq1) {
- desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
- rq1->elsp_submitted++;
- } else {
- desc[1] = 0;
- }
-
- desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
- rq0->elsp_submitted++;
-
- /* You must always write both descriptors in the order below. */
- I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1]));
- I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1]));
-
- I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0]));
- /* The context is automatically loaded after the following */
- I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0]));
-
- /* ELSP is a wo register, use another nearby reg for posting */
- POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine));
-}
-
static void
execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
{
@@ -367,13 +340,13 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
}
-static void execlists_update_context(struct drm_i915_gem_request *rq)
+static u64 execlists_update_context(struct drm_i915_gem_request *rq)
{
- struct intel_engine_cs *engine = rq->engine;
+ struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
- uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
- reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
+ ce->lrc_reg_state[CTX_RING_TAIL+1] =
+ intel_ring_offset(rq->ring, rq->tail);
/* True 32b PPGTT with dynamic page allocation: update PDP
* registers and point the unallocated PDPs to scratch page.
@@ -381,32 +354,14 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
* in 48-bit mode.
*/
if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
- execlists_update_context_pdps(ppgtt, reg_state);
-}
-
-static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
- struct drm_i915_gem_request *rq1)
-{
- struct drm_i915_private *dev_priv = rq0->i915;
- unsigned int fw_domains = rq0->engine->fw_domains;
-
- execlists_update_context(rq0);
-
- if (rq1)
- execlists_update_context(rq1);
+ execlists_update_context_pdps(ppgtt, ce->lrc_reg_state);
- spin_lock_irq(&dev_priv->uncore.lock);
- intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
-
- execlists_elsp_write(rq0, rq1);
-
- intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
- spin_unlock_irq(&dev_priv->uncore.lock);
+ return ce->lrc_desc;
}
-static inline void execlists_context_status_change(
- struct drm_i915_gem_request *rq,
- unsigned long status)
+static inline void
+execlists_context_status_change(struct drm_i915_gem_request *rq,
+ unsigned long status)
{
/*
* Only used when GVT-g is enabled now. When GVT-g is disabled,
@@ -418,122 +373,106 @@ static inline void execlists_context_status_change(
atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
}
-static void execlists_unqueue(struct intel_engine_cs *engine)
+static void execlists_submit_ports(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
- struct drm_i915_gem_request *cursor, *tmp;
-
- assert_spin_locked(&engine->execlist_lock);
-
- /*
- * If irqs are not active generate a warning as batches that finish
- * without the irqs may get lost and a GPU Hang may occur.
- */
- WARN_ON(!intel_irqs_enabled(engine->i915));
-
- /* Try to read in pairs */
- list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
- execlist_link) {
- if (!req0) {
- req0 = cursor;
- } else if (req0->ctx == cursor->ctx) {
- /* Same ctx: ignore first request, as second request
- * will update tail past first request's workload */
- cursor->elsp_submitted = req0->elsp_submitted;
- list_del(&req0->execlist_link);
- i915_gem_request_put(req0);
- req0 = cursor;
- } else {
- if (IS_ENABLED(CONFIG_DRM_I915_GVT)) {
- /*
- * req0 (after merged) ctx requires single
- * submission, stop picking
- */
- if (req0->ctx->execlists_force_single_submission)
- break;
- /*
- * req0 ctx doesn't require single submission,
- * but next req ctx requires, stop picking
- */
- if (cursor->ctx->execlists_force_single_submission)
- break;
- }
- req1 = cursor;
- WARN_ON(req1->elsp_submitted);
- break;
- }
- }
-
- if (unlikely(!req0))
- return;
+ struct drm_i915_private *dev_priv = engine->i915;
+ u32 *elsp = dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ u64 desc[2];
- execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN);
+ if (!engine->execlist_port[0].count)
+ execlists_context_status_change(engine->execlist_port[0].request,
+ INTEL_CONTEXT_SCHEDULE_IN);
+ desc[0] = execlists_update_context(engine->execlist_port[0].request);
+ engine->preempt_wa = engine->execlist_port[0].count++;
- if (req1)
- execlists_context_status_change(req1,
+ if (engine->execlist_port[1].request) {
+ GEM_BUG_ON(engine->execlist_port[1].count);
+ execlists_context_status_change(engine->execlist_port[1].request,
INTEL_CONTEXT_SCHEDULE_IN);
+ desc[1] = execlists_update_context(engine->execlist_port[1].request);
+ engine->execlist_port[1].count = 1;
+ } else
+ desc[1] = 0;
- if (req0->elsp_submitted & engine->idle_lite_restore_wa) {
- /*
- * WaIdleLiteRestore: make sure we never cause a lite restore
- * with HEAD==TAIL.
- *
- * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we
- * resubmit the request. See gen8_emit_request() for where we
- * prepare the padding after the end of the request.
- */
- req0->tail += 8;
- req0->tail &= req0->ring->size - 1;
- }
+ /* You must always write both descriptors in the order below. */
+ writel(upper_32_bits(desc[1]), elsp);
+ writel(lower_32_bits(desc[1]), elsp);
- execlists_elsp_submit_contexts(req0, req1);
+ writel(upper_32_bits(desc[0]), elsp);
+ /* The context is automatically loaded after the following */
+ writel(lower_32_bits(desc[0]), elsp);
}
-static unsigned int
-execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id)
+static bool merge_ctx(struct i915_gem_context *prev,
+ struct i915_gem_context *next)
{
- struct drm_i915_gem_request *head_req;
+ if (prev != next)
+ return false;
- assert_spin_locked(&engine->execlist_lock);
+ if (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
+ prev->execlists_force_single_submission)
+ return false;
- head_req = list_first_entry_or_null(&engine->execlist_queue,
- struct drm_i915_gem_request,
- execlist_link);
+ return true;
+}
- if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id)))
- return 0;
+static void execlists_context_unqueue(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_request *cursor, *last;
+ struct execlist_port *port = engine->execlist_port;
+ bool submit = false;
+
+ last = port->request;
+ if (last != NULL) {
+ /* WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
+ * as we resubmit the request. See gen8_emit_request()
+ * for where we prepare the padding after the end of the
+ * request.
+ */
+ last->tail = last->wa_tail;
+ }
- WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
+ /* Try to read in pairs and fill both submission ports */
+ spin_lock(&engine->execlist_lock);
+ list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
+ if (last && !merge_ctx(cursor->ctx, last->ctx)) {
+ if (port != engine->execlist_port)
+ break;
- if (--head_req->elsp_submitted > 0)
- return 0;
+ if (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
+ cursor->ctx->execlists_force_single_submission)
+ break;
- execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT);
+ i915_gem_request_assign(&port->request, last);
+ port++;
- list_del(&head_req->execlist_link);
- i915_gem_request_put(head_req);
+ }
+ last = cursor;
+ submit = true;
+ }
+ if (submit) {
+ i915_gem_request_assign(&port->request, last);
+ engine->execlist_queue.next = &cursor->execlist_link;
+ cursor->execlist_link.prev = &engine->execlist_queue;
+ }
+ spin_unlock(&engine->execlist_lock);
- return 1;
+ if (submit)
+ execlists_submit_ports(engine);
}
-static u32
-get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
- u32 *context_id)
+static bool execlists_elsp_idle(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
- u32 status;
-
- read_pointer %= GEN8_CSB_ENTRIES;
-
- status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer));
-
- if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
- return 0;
-
- *context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine,
- read_pointer));
+ return engine->execlist_port[0].request == NULL;
+}
- return status;
+static bool execlists_elsp_ready(struct intel_engine_cs *engine)
+{
+ if (engine->disable_lite_restore_wa || engine->preempt_wa)
+ return engine->execlist_port[0].request == NULL;
+ else
+ return engine->execlist_port[1].request == NULL;
}
/*
@@ -544,100 +483,62 @@ static void intel_lrc_irq_handler(unsigned long data)
{
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
struct drm_i915_private *dev_priv = engine->i915;
- u32 status_pointer;
- unsigned int read_pointer, write_pointer;
- u32 csb[GEN8_CSB_ENTRIES][2];
- unsigned int csb_read = 0, i;
- unsigned int submit_contexts = 0;
intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
- status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine));
-
- read_pointer = engine->next_context_status_buffer;
- write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
- if (read_pointer > write_pointer)
- write_pointer += GEN8_CSB_ENTRIES;
-
- while (read_pointer < write_pointer) {
- if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES))
- break;
- csb[csb_read][0] = get_context_status(engine, ++read_pointer,
- &csb[csb_read][1]);
- csb_read++;
- }
-
- engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
-
- /* Update the read pointer to the old write pointer. Manual ringbuffer
- * management ftw </sarcasm> */
- I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine),
- _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
- engine->next_context_status_buffer << 8));
-
- intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
-
- spin_lock(&engine->execlist_lock);
-
- for (i = 0; i < csb_read; i++) {
- if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) {
- if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) {
- if (execlists_check_remove_request(engine, csb[i][1]))
- WARN(1, "Lite Restored request removed from queue\n");
- } else
- WARN(1, "Preemption without Lite Restore\n");
+ if (!execlists_elsp_idle(engine)) {
+ u32 *ring_mmio =
+ dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+ u32 *csb_mmio =
+ dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+ unsigned ring, head, tail;
+
+ ring = readl(ring_mmio);
+ head = GEN8_CSB_READ_PTR(ring);
+ tail = GEN8_CSB_WRITE_PTR(ring);
+ if (tail < head)
+ tail += GEN8_CSB_ENTRIES;
+ while (head < tail) {
+ unsigned idx = ++head % GEN8_CSB_ENTRIES;
+ unsigned status = readl(&csb_mmio[2*idx]);
+
+ if (status & GEN8_CTX_STATUS_COMPLETED_MASK) {
+ GEM_BUG_ON(engine->execlist_port[0].count == 0);
+ if (--engine->execlist_port[0].count == 0) {
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+ execlists_context_status_change(engine->execlist_port[0].request,
+ INTEL_CONTEXT_SCHEDULE_OUT);
+ i915_gem_request_put(engine->execlist_port[0].request);
+ engine->execlist_port[0] = engine->execlist_port[1];
+ memset(&engine->execlist_port[1], 0,
+ sizeof(engine->execlist_port[1]));
+ engine->preempt_wa = false;
+ }
+ }
+ GEM_BUG_ON(engine->execlist_port[0].count == 0 &&
+ !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
}
- if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE |
- GEN8_CTX_STATUS_ELEMENT_SWITCH))
- submit_contexts +=
- execlists_check_remove_request(engine, csb[i][1]);
+ writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
+ GEN8_CSB_WRITE_PTR(ring) << 8),
+ ring_mmio);
}
- if (submit_contexts) {
- if (!engine->disable_lite_restore_wa ||
- (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE))
- execlists_unqueue(engine);
- }
+ if (execlists_elsp_ready(engine))
+ execlists_context_unqueue(engine);
- spin_unlock(&engine->execlist_lock);
-
- if (unlikely(submit_contexts > 2))
- DRM_ERROR("More than two context complete events?\n");
+ intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
}
static void execlists_submit_request(struct drm_i915_gem_request *request)
{
struct intel_engine_cs *engine = request->engine;
- struct drm_i915_gem_request *cursor;
- int num_elements = 0;
spin_lock_bh(&engine->execlist_lock);
- list_for_each_entry(cursor, &engine->execlist_queue, execlist_link)
- if (++num_elements > 2)
- break;
-
- if (num_elements > 2) {
- struct drm_i915_gem_request *tail_req;
-
- tail_req = list_last_entry(&engine->execlist_queue,
- struct drm_i915_gem_request,
- execlist_link);
-
- if (request->ctx == tail_req->ctx) {
- WARN(tail_req->elsp_submitted != 0,
- "More than 2 already-submitted reqs queued\n");
- list_del(&tail_req->execlist_link);
- i915_gem_request_put(tail_req);
- }
- }
-
- i915_gem_request_get(request);
list_add_tail(&request->execlist_link, &engine->execlist_queue);
- request->ctx_hw_id = request->ctx->hw_id;
- if (num_elements == 0)
- execlists_unqueue(engine);
+ if (execlists_elsp_idle(engine))
+ tasklet_hi_schedule(&engine->irq_tasklet);
spin_unlock_bh(&engine->execlist_lock);
}
@@ -731,6 +632,7 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
+ request->wa_tail = request->ring->tail;
/* We keep the previous context alive until we retire the following
* request. This ensures that any the context object is still pinned
@@ -743,23 +645,6 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
return 0;
}
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
-{
- struct drm_i915_gem_request *req, *tmp;
- LIST_HEAD(cancel_list);
-
- WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex));
-
- spin_lock_bh(&engine->execlist_lock);
- list_replace_init(&engine->execlist_queue, &cancel_list);
- spin_unlock_bh(&engine->execlist_lock);
-
- list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) {
- list_del(&req->execlist_link);
- i915_gem_request_put(req);
- }
-}
-
static int intel_lr_context_pin(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
@@ -1276,7 +1161,6 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
static int gen8_init_common_ring(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- unsigned int next_context_status_buffer_hw;
lrc_init_hws(engine);
@@ -1287,32 +1171,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
I915_WRITE(RING_MODE_GEN7(engine),
_MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
_MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
- POSTING_READ(RING_MODE_GEN7(engine));
- /*
- * Instead of resetting the Context Status Buffer (CSB) read pointer to
- * zero, we need to read the write pointer from hardware and use its
- * value because "this register is power context save restored".
- * Effectively, these states have been observed:
- *
- * | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
- * BDW | CSB regs not reset | CSB regs reset |
- * CHT | CSB regs not reset | CSB regs not reset |
- * SKL | ? | ? |
- * BXT | ? | ? |
- */
- next_context_status_buffer_hw =
- GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine)));
-
- /*
- * When the CSB registers are reset (also after power-up / gpu reset),
- * CSB write pointer is set to all 1's, which is not valid, use '5' in
- * this special case, so the first element read is CSB[0].
- */
- if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
- next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
+ I915_WRITE(RING_CONTEXT_STATUS_PTR(engine),
+ _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK |
+ GEN8_CSB_WRITE_PTR_MASK,
+ 0));
- engine->next_context_status_buffer = next_context_status_buffer_hw;
DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
intel_engine_init_hangcheck(engine);
@@ -1697,7 +1561,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
}
intel_lr_context_unpin(dev_priv->kernel_context, engine);
- engine->idle_lite_restore_wa = 0;
engine->disable_lite_restore_wa = false;
engine->ctx_desc_template = 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a52cf57dbd40..4d70346500c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -97,6 +97,4 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
int enable_execlists);
void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine);
-
#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index ede09f000af5..aa8ee7b870bf 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1215,7 +1215,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
out_unlock:
mutex_unlock(&dev->struct_mutex);
drm_modeset_unlock_all(dev);
- i915_gem_object_put_unlocked(new_bo);
+ i915_gem_object_put(new_bo);
out_free:
kfree(params);
@@ -1459,7 +1459,7 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
* hardware should be off already */
WARN_ON(dev_priv->overlay->active);
- i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo);
+ i915_gem_object_put(dev_priv->overlay->reg_bo);
kfree(dev_priv->overlay);
}
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index df2efa4c713e..073d12502736 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5720,7 +5720,7 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
if (WARN_ON(!dev_priv->vlv_pctx))
return;
- i915_gem_object_put_unlocked(dev_priv->vlv_pctx);
+ i915_gem_object_put(dev_priv->vlv_pctx);
dev_priv->vlv_pctx = NULL;
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7da768a8d06e..5cdd3c960903 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -292,11 +292,14 @@ struct intel_engine_cs {
/* Execlists */
struct tasklet_struct irq_tasklet;
spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
+ struct execlist_port {
+ struct drm_i915_gem_request *request;
+ unsigned count;
+ } execlist_port[2];
struct list_head execlist_queue;
unsigned int fw_domains;
- unsigned int next_context_status_buffer;
- unsigned int idle_lite_restore_wa;
bool disable_lite_restore_wa;
+ bool preempt_wa;
u32 ctx_desc_template;
/**
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 523ea3fbbddd..798bde6fde74 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -34,6 +34,8 @@ struct fence;
struct fence_ops;
struct fence_cb;
+struct kfence;
+
/**
* struct fence - software synchronization primitive
* @refcount: refcount for this fence
@@ -377,4 +379,8 @@ u64 fence_context_alloc(unsigned num);
##args); \
} while (0)
+int kfence_await_dma_fence(struct kfence *fence,
+ struct fence *dma,
+ gfp_t gfp);
+
#endif /* __LINUX_FENCE_H */
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
new file mode 100644
index 000000000000..9b7dbd892218
--- /dev/null
+++ b/include/linux/kfence.h
@@ -0,0 +1,86 @@
+/*
+ * kfence.h - library routines for N:M synchronisation points
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifndef _KFENCE_H_
+#define _KFENCE_H_
+
+#include <linux/gfp.h>
+#include <linux/kref.h>
+#include <linux/notifier.h> /* for NOTIFY_DONE */
+#include <linux/wait.h>
+
+struct completion;
+struct fence;
+struct reservation_object;
+enum hrtimer_mode;
+
+/**
+ * struct kfence - used for tracking pending events and a set of listeners.
+ * @wait: a waitqueue of listeners (includes both kfences and tasks)
+ * @flags: a bitmask of interesting bits, mixed in with a notification function
+ * @kref: a reference counter
+ * @pending: an atomic counter of pending events
+ */
+struct kfence {
+ wait_queue_head_t wait;
+ unsigned long flags;
+ struct kref kref;
+ atomic_t pending;
+};
+
+#define KFENCE_CHECKED_BIT 0 /* used internally for DAG checking */
+#define KFENCE_PRIVATE_BIT 1 /* available for use by owner */
+#define KFENCE_MASK (~3)
+
+/**
+ * typedef kfence_notify_t - callback function type
+ *
+ * An optional callback to a fence may be provided that is called first
+ * when the fence is complete, and later when the fence is released. The
+ * callback must of &kfence_notify_t function type, and be aligned using
+ * __kfence_call function attribute.
+ */
+typedef int (*kfence_notify_t)(struct kfence *);
+#define __kfence_call __aligned(4)
+
+void kfence_init(struct kfence *fence, kfence_notify_t fn);
+
+struct kfence *kfence_get(struct kfence *fence);
+void kfence_put(struct kfence *fence);
+
+void kfence_await(struct kfence *fence);
+int kfence_await_kfence(struct kfence *fence,
+ struct kfence *after,
+ gfp_t gfp);
+int kfence_await_completion(struct kfence *fence,
+ struct completion *x,
+ gfp_t gfp);
+int kfence_await_hrtimer(struct kfence *fence,
+ clockid_t clock, enum hrtimer_mode mode,
+ ktime_t delay, u64 slack,
+ gfp_t gfp);
+void kfence_complete(struct kfence *fence);
+void kfence_wake_up_all(struct kfence *fence);
+void kfence_wait(struct kfence *fence);
+
+/**
+ * kfence_done - report when the fence has been passed
+ * @fence: the kfence to query
+ *
+ * kfence_done() reports true when the fence is no longer waiting for any
+ * events and has completed its fence-complete notification.
+ *
+ * Returns true when the fence has been passed, false otherwise.
+ */
+static inline bool kfence_done(const struct kfence *fence)
+{
+ return atomic_read(&fence->pending) < 0;
+}
+
+#endif /* _KFENCE_H_ */
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index b0f305e77b7f..1954bab95db9 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -49,6 +49,8 @@ extern struct ww_class reservation_ww_class;
extern struct lock_class_key reservation_seqcount_class;
extern const char reservation_seqcount_string[];
+struct kfence;
+
/**
* struct reservation_object_list - a list of shared fences
* @rcu: for internal use
@@ -210,4 +212,9 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
bool reservation_object_test_signaled_rcu(struct reservation_object *obj,
bool test_all);
+int kfence_await_reservation(struct kfence *fence,
+ struct reservation_object *resv,
+ bool write,
+ gfp_t gfp);
+
#endif /* _LINUX_RESERVATION_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index e2ec54e2b952..ff11f31b7ec9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o \
extable.o params.o \
kthread.o sys_ni.o nsproxy.o \
notifier.o ksysfs.o cred.o reboot.o \
- async.o range.o smpboot.o
+ async.o kfence.o range.o smpboot.o
obj-$(CONFIG_MULTIUSER) += groups.o
diff --git a/kernel/kfence.c b/kernel/kfence.c
new file mode 100644
index 000000000000..4605eabc2c1b
--- /dev/null
+++ b/kernel/kfence.c
@@ -0,0 +1,497 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/fence.h>
+#include <linux/kfence.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+
+/**
+ * DOC: kfence overview
+ *
+ * kfences provide synchronisation barriers between multiple tasks. They are
+ * very similar to completions, or the OpenGL fence synchronisation object.
+ * Where kfences differ from completions is their ability to track multiple
+ * event sources rather than being a singular "completion event". Similar to
+ * completions multiple processes can wait upon a kfence. However, unlike
+ * completions, a kfence can wait upon other kfences allowing for a graph
+ * of interdependent events.
+ *
+ * Each kfence is a one-shot flag, signaling that work has progressed past
+ * a certain point (as measured by completion of all events the kfence is
+ * listening for) and the waiters upon that kfence may proceed.
+ *
+ * kfences provide both signaling and waiting routines:
+ *
+ * - kfence_await(): indicates that the kfence is asynchronously waiting for
+ * another event.
+ *
+ * - kfence_complete(): undoes the earlier await and marks the fence as done
+ * if all of its pending events have been completed.
+ *
+ * - kfence_done(): reports whether or not the kfence has been passed.
+ *
+ * - kfence_wait(): allows the caller to sleep (uninterruptibly) until the
+ * fence is passed.
+ *
+ * This interface is very similar to completions, with the exception of
+ * allowing the fence to await multiple events. kfences can wait upon other
+ * fences or other hardware events, building an ordered dependency graph:
+ *
+ * - kfence_await_kfence(): the kfence asynchronously waits upon completion
+ * of another kfence
+ *
+ * - kfence_await_completion(): the kfence asynchronously waits upon a
+ * completion
+ *
+ * - kfence_await_hrtimer(): the kfence asynchronously wait for an expiration
+ * of a timer
+ *
+ * - kfence_await_dma_fence(): the kfence asynchronously waits for a DMA
+ * (hardware signaled) fence
+ *
+ * - kfence_await_reservation(): the kfence asynchronously waits for a DMA
+ * reservation object
+ *
+ * A kfence is initialised using kfence_init(), and starts off awaiting an
+ * event. Once you have finished setting up the fence, including adding
+ * all of its asynchronous waits, call kfence_complete().
+ *
+ * Unlike completions, kfences are expected to live inside more complex graphs
+ * and form the basis for parallel execution of interdependent tasks and so are
+ * reference counted. Use kfence_get() and kfence_put() to acquire or release
+ * a reference to the kfence respectively.
+ *
+ * The kfence can be embedded inside a larger structure and be used as part
+ * of its event driven mechanism. As such kfence_init() can be passed a
+ * callback function that will be called first when the kfence is completed,
+ * and again when the kfence is to be freed. If no callback is provided, the
+ * kfence will be freed using kfree() when its reference count hits zero -
+ * if it is embedded inside another structure and no callback is provided,
+ * it must be the first member of its parent struct.
+ *
+ * The fence-completed notification is called before any listeners upon the
+ * fence are signaled, or any waiters woken. You can defer their wake up by
+ * returning NOTIFY_OK from the fence-completed notification and calling
+ * kfence_wake_up_all() later when ready.
+ */
+
+static DEFINE_SPINLOCK(kfence_lock);
+
+static int __kfence_notify(struct kfence *fence)
+{
+ kfence_notify_t fn;
+
+ fn = (kfence_notify_t)(fence->flags & KFENCE_MASK);
+ return fn(fence);
+}
+
+static void kfence_free(struct kref *kref)
+{
+ struct kfence *fence = container_of(kref, typeof(*fence), kref);
+
+ WARN_ON(atomic_read(&fence->pending) > 0);
+
+ if (fence->flags & KFENCE_MASK)
+ WARN_ON(__kfence_notify(fence) != NOTIFY_DONE);
+ else
+ kfree(fence);
+}
+
+/**
+ * kfence_put - release a reference to a kfence
+ * @fence: the kfence being disposed of
+ *
+ * kfence_put() decrements the reference count on the @fence, and when
+ * it hits zero the fence will be freed.
+ */
+void kfence_put(struct kfence *fence)
+{
+ kref_put(&fence->kref, kfence_free);
+}
+EXPORT_SYMBOL_GPL(kfence_put);
+
+/**
+ * kfence_get - acquire a reference to a kfence
+ * @fence: the kfence being used
+ *
+ * Returns the pointer to the kfence, with its reference count incremented.
+ */
+struct kfence *kfence_get(struct kfence *fence)
+{
+ kref_get(&fence->kref);
+ return fence;
+}
+EXPORT_SYMBOL_GPL(kfence_get);
+
+static void __kfence_wake_up_all(struct kfence *fence,
+ struct list_head *continuation)
+{
+ wait_queue_head_t *x = &fence->wait;
+ unsigned long flags;
+
+ atomic_dec(&fence->pending);
+
+ /*
+ * To prevent unbounded recursion as we traverse the graph of kfences,
+ * we move the task_list from this the next ready fence to the tail of
+ * the original fence's task_list (and so added to the list to be
+ * woken).
+ */
+ smp_mb__before_spinlock();
+ spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
+ if (continuation) {
+ list_splice_tail_init(&x->task_list, continuation);
+ } else {
+ while (!list_empty(&x->task_list))
+ __wake_up_locked_key(x, TASK_NORMAL, &x->task_list);
+ }
+ spin_unlock_irqrestore(&x->lock, flags);
+}
+
+/**
+ * kfence_wake_up_all - wake all waiters upon a fence
+ * @fence: the kfence to signal
+ *
+ * If the fence-complete notification is deferred, when the callback is
+ * complete it should call kfence_wake_up_all() to wake up all waiters
+ * upon the fence.
+ *
+ * It is invalid to call kfence_wake_up_all() at any time other than from
+ * inside a deferred fence-complete notification.
+ */
+void kfence_wake_up_all(struct kfence *fence)
+{
+ WARN_ON(atomic_read(&fence->pending) != 0);
+ __kfence_wake_up_all(fence, NULL);
+}
+
+static void __kfence_complete(struct kfence *fence,
+ struct list_head *continuation)
+{
+ if (!atomic_dec_and_test(&fence->pending))
+ return;
+
+ if (fence->flags & KFENCE_MASK && __kfence_notify(fence) != NOTIFY_DONE)
+ return;
+
+ __kfence_wake_up_all(fence, continuation);
+}
+
+/**
+ * kfence_await - increment the count of events being asynchronously waited upon
+ * @fence: the kfence
+ *
+ * kfence_await() indicates that the @fence is waiting upon the completion
+ * of an event. The @fence may wait upon multiple events concurrently.
+ * When that event is complete, a corresponding call to kfence_complete()
+ * should be made.
+ */
+void kfence_await(struct kfence *fence)
+{
+ WARN_ON(atomic_inc_return(&fence->pending) <= 1);
+}
+EXPORT_SYMBOL_GPL(kfence_await);
+
+/**
+ * kfence_complete - decrement the count of events waited upon
+ * @fence: the kfence
+ *
+ * When all event sources for the @fence are completed, i.e. the event count
+ * hits zero, all waiters upon the @fence are woken up.
+ */
+void kfence_complete(struct kfence *fence)
+{
+ if (WARN_ON(kfence_done(fence)))
+ return;
+
+ __kfence_complete(fence, NULL);
+}
+EXPORT_SYMBOL_GPL(kfence_complete);
+
+/**
+ * kfence_wait - wait upon a fence to be completed
+ * @fence: the kfence to wait upon
+ *
+ * Blocks (uninterruptibly waits) until the @fence event counter reaches zero
+ * and then also waits for the fence-completed notification to finish.
+ */
+void kfence_wait(struct kfence *fence)
+{
+ wait_event(fence->wait, kfence_done(fence));
+}
+EXPORT_SYMBOL_GPL(kfence_wait);
+
+/**
+ * kfence_init - initialize a fence for embedded use within a struct
+ * @fence: this kfence
+ * @fn: a callback function for when the fence is complete, and when the
+ * fence is released
+ *
+ * This function initialises the @fence for use embedded within a parent
+ * structure. The optional @fn hook is first called when the fence is completed
+ * (when all its pending event count hits 0) and again when the fence is
+ * to be freed. Note that the @fn will be called from atomic context. The @fn
+ * is stored inside the fence mixed with some flags, and so the @fn must
+ * be aligned using the __kfence_call function attribute.
+ *
+ * If the @fn is not provided, the kfence must be the first member in its
+ * parent struct as it will be freed using kfree().
+ *
+ * fence-complete notification: @fn will be called when the pending event
+ * count hits 0, however the fence is not completed unless the callback
+ * returns NOTIFY_DONE. During this notification callback fence_done() reports
+ * false. You can suspend completion of the fence by returning
+ * NOTIFY_OK instead and then later calling kfence_wake_up_all().
+ *
+ * fence-release notification: @fn will be called when the reference count
+ * hits 0, fence_done() will report true.
+ */
+void kfence_init(struct kfence *fence, kfence_notify_t fn)
+{
+ BUG_ON((unsigned long)fn & ~KFENCE_MASK);
+
+ init_waitqueue_head(&fence->wait);
+ kref_init(&fence->kref);
+ atomic_set(&fence->pending, 1);
+ fence->flags = (unsigned long)fn;
+}
+EXPORT_SYMBOL_GPL(kfence_init);
+
+static int kfence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+{
+ list_del(&wq->task_list);
+ __kfence_complete(wq->private, key);
+ kfence_put(wq->private);
+ kfree(wq);
+ return 0;
+}
+
+static bool __kfence_check_if_after(struct kfence *fence,
+ const struct kfence * const signaler)
+{
+ wait_queue_t *wq;
+
+ if (__test_and_set_bit(KFENCE_CHECKED_BIT, &fence->flags))
+ return false;
+
+ if (fence == signaler)
+ return true;
+
+ list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+ if (wq->func != kfence_wake)
+ continue;
+
+ if (__kfence_check_if_after(wq->private, signaler))
+ return true;
+ }
+
+ return false;
+}
+
+static void __kfence_clear_checked_bit(struct kfence *fence)
+{
+ wait_queue_t *wq;
+
+ if (!__test_and_clear_bit(KFENCE_CHECKED_BIT, &fence->flags))
+ return;
+
+ list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+ if (wq->func != kfence_wake)
+ continue;
+
+ __kfence_clear_checked_bit(wq->private);
+ }
+}
+
+static bool kfence_check_if_after(struct kfence *fence,
+ const struct kfence * const signaler)
+{
+ unsigned long flags;
+ bool err;
+
+ if (!config_enabled(CONFIG_KFENCE_CHECK_DAG))
+ return false;
+
+ spin_lock_irqsave(&kfence_lock, flags);
+ err = __kfence_check_if_after(fence, signaler);
+ __kfence_clear_checked_bit(fence);
+ spin_unlock_irqrestore(&kfence_lock, flags);
+
+ return err;
+}
+
+static wait_queue_t *__kfence_create_wq(struct kfence *fence, gfp_t gfp)
+{
+ wait_queue_t *wq;
+
+ wq = kmalloc(sizeof(*wq), gfp);
+ if (unlikely(!wq))
+ return NULL;
+
+ INIT_LIST_HEAD(&wq->task_list);
+ wq->flags = 0;
+ wq->func = kfence_wake;
+ wq->private = kfence_get(fence);
+
+ kfence_await(fence);
+
+ return wq;
+}
+
+/**
+ * kfence_await_kfence - set one fence to wait upon another
+ * @fence: this kfence
+ * @signaler: target kfence to wait upon
+ * @gfp: the allowed allocation mask
+ *
+ * kfence_await_kfence() causes the @fence to asynchronously wait upon the
+ * completion of @signaler.
+ *
+ * Returns 1 if the @fence was added to the waitqueue of @signaler, 0
+ * if @signaler was already complete, or a negative error code.
+ */
+int kfence_await_kfence(struct kfence *fence,
+ struct kfence *signaler,
+ gfp_t gfp)
+{
+ wait_queue_t *wq;
+ unsigned long flags;
+ int pending;
+
+ if (kfence_done(signaler))
+ return 0;
+
+ /* The dependency graph must be acyclic. */
+ if (unlikely(kfence_check_if_after(fence, signaler)))
+ return -EINVAL;
+
+ wq = __kfence_create_wq(fence, gfp);
+ if (unlikely(!wq)) {
+ if (!gfpflags_allow_blocking(gfp))
+ return -ENOMEM;
+
+ kfence_wait(signaler);
+ return 0;
+ }
+
+ spin_lock_irqsave(&signaler->wait.lock, flags);
+ if (likely(!kfence_done(signaler))) {
+ __add_wait_queue_tail(&signaler->wait, wq);
+ pending = 1;
+ } else {
+ kfence_wake(wq, 0, 0, NULL);
+ pending = 0;
+ }
+ spin_unlock_irqrestore(&signaler->wait.lock, flags);
+
+ return pending;
+}
+EXPORT_SYMBOL_GPL(kfence_await_kfence);
+
+/**
+ * kfence_await_completion - set the fence to wait upon a completion
+ * @fence: this kfence
+ * @x: target completion to wait upon
+ * @gfp: the allowed allocation mask
+ *
+ * kfence_await_completion() causes the @fence to asynchronously wait upon
+ * the completion.
+ *
+ * Returns 1 if the @fence was added to the waitqueue of @x, 0
+ * if @x was already complete, or a negative error code.
+ */
+int kfence_await_completion(struct kfence *fence,
+ struct completion *x,
+ gfp_t gfp)
+{
+ wait_queue_t *wq;
+ unsigned long flags;
+ int pending;
+
+ if (completion_done(x))
+ return 0;
+
+ wq = __kfence_create_wq(fence, gfp);
+ if (unlikely(!wq)) {
+ if (!gfpflags_allow_blocking(gfp))
+ return -ENOMEM;
+
+ wait_for_completion(x);
+ return 0;
+ }
+
+ spin_lock_irqsave(&x->wait.lock, flags);
+ if (likely(!READ_ONCE(x->done))) {
+ __add_wait_queue_tail(&x->wait, wq);
+ pending = 1;
+ } else {
+ kfence_wake(wq, 0, 0, NULL);
+ pending = 0;
+ }
+ spin_unlock_irqrestore(&x->wait.lock, flags);
+
+ return pending;
+}
+EXPORT_SYMBOL_GPL(kfence_await_completion);
+
+struct timer_cb {
+ struct hrtimer timer;
+ struct kfence *fence;
+};
+
+static enum hrtimer_restart
+timer_kfence_wake(struct hrtimer *timer)
+{
+ struct timer_cb *cb = container_of(timer, typeof(*cb), timer);
+
+ kfence_complete(cb->fence);
+ kfence_put(cb->fence);
+ kfree(cb);
+
+ return HRTIMER_NORESTART;
+}
+
+/**
+ * kfence_await_hrtimer - set the fence to wait for a period of time
+ * @fence: this kfence
+ * @clock: which clock to program
+ * @mode: delay given as relative or absolute
+ * @delay: how long or until what time to wait
+ * @slack: how much slack that may be applied to the delay
+ *
+ * kfence_await_hrtimer() causes the @fence to wait for a a period of time, or
+ * until a certain point in time. It is a convenience wrapper around
+ * hrtimer_start_range_ns(). For more details on @clock, @mode, @delay and
+ * @slack please consult the hrtimer documentation.
+ *
+ * Returns 1 if the delay was sucessfuly added to the @fence, or a negative
+ * error code on failure.
+ */
+int kfence_await_hrtimer(struct kfence *fence,
+ clockid_t clock, enum hrtimer_mode mode,
+ ktime_t delay, u64 slack,
+ gfp_t gfp)
+{
+ struct timer_cb *cb;
+
+ cb = kmalloc(sizeof(*cb), gfp);
+ if (!cb)
+ return -ENOMEM;
+
+ cb->fence = kfence_get(fence);
+ kfence_await(fence);
+
+ hrtimer_init(&cb->timer, clock, mode);
+ cb->timer.function = timer_kfence_wake;
+
+ hrtimer_start_range_ns(&cb->timer, delay, slack, mode);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(kfence_await_hrtimer);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index eb8917a71489..d8297d313a2c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1733,6 +1733,29 @@ config KPROBES_SANITY_TEST
Say N if you are unsure.
+config KFENCE_SELFTEST
+ tristate "Kfence self tests"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ This option provides a kernel modules that can be used to test
+ the kfence handling. This option is not useful for distributions
+ or general kernels, but only for kernel developers working on the
+ kfence and async_domain facility.
+
+ Say N if you are unsure.
+
+config KFENCE_CHECK_DAG
+ bool "Check that kfence are only used with directed acyclic graphs"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ This option enforces that kfences are only used with directed acyclic
+ graphs (DAG), as otherwise the cycles in the graph means that they
+ will never be signaled (or the corresponding task executed).
+
+ Say N if you are unsure.
+
config BACKTRACE_SELF_TEST
tristate "Self test for the backtrace code"
depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index cfa68eb269e4..ca9ce8e700eb 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,6 +25,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
+obj-$(CONFIG_KFENCE_SELFTEST) += test-kfence.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
lib-$(CONFIG_HAS_DMA) += dma-noop.o
diff --git a/lib/test-kfence.c b/lib/test-kfence.c
new file mode 100644
index 000000000000..1b0853fda7c3
--- /dev/null
+++ b/lib/test-kfence.c
@@ -0,0 +1,580 @@
+/*
+ * Test cases for kfence facility.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/kfence.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+static struct kfence *alloc_kfence(void)
+{
+ struct kfence *fence;
+
+ fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return NULL;
+
+ kfence_init(fence, NULL);
+ return fence;
+}
+
+static int __init __test_self(struct kfence *fence)
+{
+ if (kfence_done(fence))
+ return -EINVAL;
+
+ kfence_complete(fence);
+ if (!kfence_done(fence))
+ return -EINVAL;
+
+ kfence_wait(fence);
+ if (!kfence_done(fence))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __init test_self(void)
+{
+ struct kfence *fence;
+ int ret;
+
+ /* Test kfence signaling and completion testing */
+ pr_debug("%s\n", __func__);
+
+ fence = alloc_kfence();
+ if (!fence)
+ return -ENOMEM;
+
+ ret = __test_self(fence);
+
+ kfence_put(fence);
+ return ret;
+}
+
+struct test_stack {
+ struct kfence fence;
+ bool seen;
+};
+
+static int __init __kfence_call fence_callback(struct kfence *fence)
+{
+ container_of(fence, typeof(struct test_stack), fence)->seen = true;
+ return NOTIFY_DONE;
+}
+
+static int __init test_stack(void)
+{
+ struct test_stack ts;
+ int ret;
+
+ /* Test kfence signaling and completion testing (on stack) */
+ pr_debug("%s\n", __func__);
+
+ ts.seen = false;
+ kfence_init(&ts.fence, fence_callback);
+
+ ret = __test_self(&ts.fence);
+ if (ret < 0)
+ return ret;
+
+ if (!ts.seen) {
+ pr_err("fence callback not executed\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __init test_dag(void)
+{
+ struct kfence *A, *B, *C;
+
+ /* Test detection of cycles within the kfence graphs */
+ pr_debug("%s\n", __func__);
+
+ if (!config_enabled(CONFIG_KFENCE_CHECK_DAG))
+ return 0;
+
+ A = alloc_kfence();
+ if (kfence_await_kfence(A, A, GFP_KERNEL) != -EINVAL) {
+ pr_err("recursive cycle not detected (AA)\n");
+ return -EINVAL;
+ }
+
+ B = alloc_kfence();
+
+ kfence_await_kfence(A, B, GFP_KERNEL);
+ if (kfence_await_kfence(B, A, GFP_KERNEL) != -EINVAL) {
+ pr_err("single depth cycle not detected (BAB)\n");
+ return -EINVAL;
+ }
+
+ C = alloc_kfence();
+ kfence_await_kfence(B, C, GFP_KERNEL);
+ if (kfence_await_kfence(C, A, GFP_KERNEL) != -EINVAL) {
+ pr_err("cycle not detected (BA, CB, AC)\n");
+ return -EINVAL;
+ }
+
+ kfence_complete(A);
+ kfence_put(A);
+
+ kfence_complete(B);
+ kfence_put(B);
+
+ kfence_complete(C);
+ kfence_put(C);
+
+ return 0;
+}
+
+static int __init test_AB(void)
+{
+ struct kfence *A, *B;
+ int ret;
+
+ /* Test kfence (A) waiting on an event source (B) */
+ pr_debug("%s\n", __func__);
+
+ A = alloc_kfence();
+ B = alloc_kfence();
+ if (!A || !B)
+ return -ENOMEM;
+
+ ret = kfence_await_kfence(A, B, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(A);
+ if (kfence_done(A))
+ return -EINVAL;
+
+ kfence_complete(B);
+ if (!kfence_done(B))
+ return -EINVAL;
+
+ if (!kfence_done(A))
+ return -EINVAL;
+
+ kfence_put(B);
+ kfence_put(A);
+ return 0;
+}
+
+static int __init test_ABC(void)
+{
+ struct kfence *A, *B, *C;
+ int ret;
+
+ /* Test a chain of fences, A waits on B who waits on C */
+ pr_debug("%s\n", __func__);
+
+ A = alloc_kfence();
+ B = alloc_kfence();
+ C = alloc_kfence();
+ if (!A || !B || !C)
+ return -ENOMEM;
+
+ ret = kfence_await_kfence(A, B, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ ret = kfence_await_kfence(B, C, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(A);
+ if (kfence_done(A))
+ return -EINVAL;
+
+ kfence_complete(B);
+ if (kfence_done(B))
+ return -EINVAL;
+
+ if (kfence_done(A))
+ return -EINVAL;
+
+ kfence_complete(C);
+ if (!kfence_done(C))
+ return -EINVAL;
+
+ if (!kfence_done(B))
+ return -EINVAL;
+
+ if (!kfence_done(A))
+ return -EINVAL;
+
+ kfence_put(C);
+ kfence_put(B);
+ kfence_put(A);
+ return 0;
+}
+
+static int __init test_AB_C(void)
+{
+ struct kfence *A, *B, *C;
+ int ret;
+
+ /* Test multiple fences (AB) waiting on a single event (C) */
+ pr_debug("%s\n", __func__);
+
+ A = alloc_kfence();
+ B = alloc_kfence();
+ C = alloc_kfence();
+ if (!A || !B || !C)
+ return -ENOMEM;
+
+ ret = kfence_await_kfence(A, C, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ ret = kfence_await_kfence(B, C, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(A);
+ kfence_complete(B);
+
+ if (kfence_done(A))
+ return -EINVAL;
+
+ if (kfence_done(B))
+ return -EINVAL;
+
+ kfence_complete(C);
+ if (!kfence_done(C))
+ return -EINVAL;
+
+ if (!kfence_done(B))
+ return -EINVAL;
+
+ if (!kfence_done(A))
+ return -EINVAL;
+
+ kfence_put(C);
+ kfence_put(B);
+ kfence_put(A);
+ return 0;
+}
+
+static int __init test_C_AB(void)
+{
+ struct kfence *A, *B, *C;
+ int ret;
+
+ /* Test multiple event sources (A,B) for a single fence (C) */
+ pr_debug("%s\n", __func__);
+
+ A = alloc_kfence();
+ B = alloc_kfence();
+ C = alloc_kfence();
+ if (!A || !B || !C)
+ return -ENOMEM;
+
+ ret = kfence_await_kfence(C, A, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ ret = kfence_await_kfence(C, B, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(C);
+ if (kfence_done(C))
+ return -EINVAL;
+
+ kfence_complete(A);
+ kfence_complete(B);
+
+ if (!kfence_done(A))
+ return -EINVAL;
+
+ if (!kfence_done(B))
+ return -EINVAL;
+
+ if (!kfence_done(C))
+ return -EINVAL;
+
+ kfence_put(C);
+ kfence_put(B);
+ kfence_put(A);
+ return 0;
+}
+
+static int __init test_completion(void)
+{
+ struct kfence *fence;
+ struct completion x;
+ int ret;
+
+ /* Test use of a completion as an event source for kfences */
+ pr_debug("%s\n", __func__);
+
+ init_completion(&x);
+
+ fence = alloc_kfence();
+ if (!fence)
+ return -ENOMEM;
+
+ ret = kfence_await_completion(fence, &x, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(fence);
+ if (kfence_done(fence))
+ return -EINVAL;
+
+ complete_all(&x);
+ if (!kfence_done(fence))
+ return -EINVAL;
+
+ kfence_put(fence);
+ return 0;
+}
+
+static int __init test_delay(void)
+{
+ struct kfence *fence;
+ ktime_t delay;
+ int ret;
+
+ /* Test use of a hrtimer as an event source for kfences */
+ pr_debug("%s\n", __func__);
+
+ fence = alloc_kfence();
+ if (!fence)
+ return -ENOMEM;
+
+ delay = ktime_get();
+
+ ret = kfence_await_hrtimer(fence, CLOCK_MONOTONIC, HRTIMER_MODE_REL,
+ ms_to_ktime(1), 1 << 10,
+ GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(fence);
+ kfence_wait(fence);
+
+ delay = ktime_sub(ktime_get(), delay);
+ kfence_put(fence);
+
+ if (!ktime_to_ms(delay)) {
+ pr_err("kfence woke too early, delay was only %lldns\n",
+ (long long)ktime_to_ns(delay));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct task_ipc {
+ struct work_struct work;
+ struct completion started;
+ struct kfence *in, *out;
+ int value;
+};
+
+static void __init task_ipc(struct work_struct *work)
+{
+ struct task_ipc *ipc = container_of(work, typeof(*ipc), work);
+
+ complete(&ipc->started);
+
+ kfence_wait(ipc->in);
+ smp_store_mb(ipc->value, 1);
+ kfence_complete(ipc->out);
+}
+
+static int __init test_chain(void)
+{
+ const int nfences = 4096;
+ struct kfence **fences;
+ int ret, i;
+
+ /* Test a long chain of fences */
+ pr_debug("%s\n", __func__);
+
+ fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL);
+ if (!fences)
+ return -ENOMEM;
+
+ for (i = 0; i < nfences; i++) {
+ fences[i] = alloc_kfence();
+ if (!fences[i])
+ return -ENOMEM;
+
+ if (i > 0) {
+ ret = kfence_await_kfence(fences[i],
+ fences[i - 1],
+ GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ for (i = nfences; --i; ) {
+ kfence_complete(fences[i]);
+ if (kfence_done(fences[i]))
+ return -EINVAL;
+ }
+
+ kfence_complete(fences[0]);
+ for (i = 0; i < nfences; i++) {
+ if (!kfence_done(fences[i]))
+ return -EINVAL;
+
+ kfence_put(fences[i]);
+ }
+ kfree(fences);
+ return 0;
+}
+
+static int __init test_ipc(void)
+{
+ struct task_ipc ipc;
+ int ret = 0;
+
+ /* Test use of kfence as an interprocess signaling mechanism */
+ pr_debug("%s\n", __func__);
+
+ ipc.in = alloc_kfence();
+ ipc.out = alloc_kfence();
+ if (!ipc.in || !ipc.out)
+ return -ENOMEM;
+
+ /* use a completion to avoid chicken-and-egg testing for kfence */
+ init_completion(&ipc.started);
+
+ ipc.value = 0;
+ INIT_WORK(&ipc.work, task_ipc);
+ schedule_work(&ipc.work);
+
+ wait_for_completion(&ipc.started);
+
+ usleep_range(1000, 2000);
+ if (READ_ONCE(ipc.value)) {
+ pr_err("worker updated value before kfence was signaled\n");
+ ret = -EINVAL;
+ }
+
+ kfence_complete(ipc.in);
+ kfence_wait(ipc.out);
+
+ if (!READ_ONCE(ipc.value)) {
+ pr_err("worker signaled kfence before value was posted\n");
+ ret = -EINVAL;
+ }
+
+ flush_work(&ipc.work);
+ kfence_put(ipc.in);
+ kfence_put(ipc.out);
+ return ret;
+}
+
+static int __init test_kfence_init(void)
+{
+ int ret;
+
+ pr_info("Testing kfences\n");
+
+ ret = test_self();
+ if (ret < 0) {
+ pr_err("self failed\n");
+ return ret;
+ }
+
+ ret = test_stack();
+ if (ret < 0) {
+ pr_err("stack failed\n");
+ return ret;
+ }
+
+ ret = test_dag();
+ if (ret < 0) {
+ pr_err("DAG checker failed\n");
+ return ret;
+ }
+
+ ret = test_AB();
+ if (ret < 0) {
+ pr_err("AB failed\n");
+ return ret;
+ }
+
+ ret = test_ABC();
+ if (ret < 0) {
+ pr_err("ABC failed\n");
+ return ret;
+ }
+
+ ret = test_AB_C();
+ if (ret < 0) {
+ pr_err("AB_C failed\n");
+ return ret;
+ }
+
+ ret = test_C_AB();
+ if (ret < 0) {
+ pr_err("C_AB failed\n");
+ return ret;
+ }
+
+ ret = test_chain();
+ if (ret < 0) {
+ pr_err("chain failed\n");
+ return ret;
+ }
+
+ ret = test_ipc();
+ if (ret < 0) {
+ pr_err("ipc failed\n");
+ return ret;
+ }
+
+ ret = test_completion();
+ if (ret < 0) {
+ pr_err("completion failed\n");
+ return ret;
+ }
+
+ ret = test_delay();
+ if (ret < 0) {
+ pr_err("delay failed\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit test_kfence_cleanup(void)
+{
+}
+
+module_init(test_kfence_init);
+module_exit(test_kfence_cleanup);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/lib/kfence.sh b/tools/testing/selftests/lib/kfence.sh
new file mode 100755
index 000000000000..487320c70ed1
--- /dev/null
+++ b/tools/testing/selftests/lib/kfence.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# Runs infrastructure tests using test-kfence kernel module
+
+if /sbin/modprobe -q test-kfence; then
+ /sbin/modprobe -q -r test-kfence
+ echo "kfence: ok"
+else
+ echo "kfence: [FAIL]"
+ exit 1
+fi
--
2.8.1
More information about the Intel-gfx-trybot
mailing list