[PATCH] mega
Chris Wilson
chris at chris-wilson.co.uk
Sat Aug 6 07:44:36 UTC 2016
---
Documentation/sync_file.txt | 15 +
drivers/dma-buf/fence-array.c | 1 +
drivers/dma-buf/fence.c | 58 +
drivers/dma-buf/reservation.c | 48 +
drivers/dma-buf/sync_file.c | 200 +-
drivers/gpu/drm/Makefile | 2 +-
drivers/gpu/drm/drm_irq.c | 64 +-
drivers/gpu/drm/drm_mm.c | 791 ++++----
drivers/gpu/drm/drm_vma_manager.c | 46 +-
drivers/gpu/drm/i915/Kconfig | 3 +
drivers/gpu/drm/i915/Makefile | 9 +-
drivers/gpu/drm/i915/i915_cmd_parser.c | 327 ++--
drivers/gpu/drm/i915/i915_debugfs.c | 259 ++-
drivers/gpu/drm/i915/i915_drv.c | 37 +-
drivers/gpu/drm/i915/i915_drv.h | 443 +++--
drivers/gpu/drm/i915/i915_gem.c | 2095 ++++++++++----------
drivers/gpu/drm/i915/i915_gem_batch_pool.c | 31 +-
drivers/gpu/drm/i915/i915_gem_context.c | 191 +-
drivers/gpu/drm/i915/i915_gem_dmabuf.c | 79 +-
drivers/gpu/drm/i915/i915_gem_evict.c | 266 ++-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2685 +++++++++++++++-----------
drivers/gpu/drm/i915/i915_gem_fence.c | 472 ++---
drivers/gpu/drm/i915/i915_gem_gtt.c | 579 +++---
drivers/gpu/drm/i915/i915_gem_gtt.h | 67 +-
drivers/gpu/drm/i915/i915_gem_internal.c | 157 ++
drivers/gpu/drm/i915/i915_gem_render_state.c | 159 +-
drivers/gpu/drm/i915/i915_gem_render_state.h | 6 +-
drivers/gpu/drm/i915/i915_gem_request.c | 161 +-
drivers/gpu/drm/i915/i915_gem_request.h | 65 +-
drivers/gpu/drm/i915/i915_gem_shrinker.c | 91 +-
drivers/gpu/drm/i915/i915_gem_stolen.c | 50 +-
drivers/gpu/drm/i915/i915_gem_tiling.c | 85 +-
drivers/gpu/drm/i915/i915_gem_userptr.c | 135 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 698 ++++---
drivers/gpu/drm/i915/i915_guc_submission.c | 142 +-
drivers/gpu/drm/i915/i915_irq.c | 144 +-
drivers/gpu/drm/i915/i915_memcpy.c | 101 +
drivers/gpu/drm/i915/i915_memory.c | 128 ++
drivers/gpu/drm/i915/i915_params.c | 6 +-
drivers/gpu/drm/i915/i915_params.h | 2 +-
drivers/gpu/drm/i915/i915_trace.h | 33 +
drivers/gpu/drm/i915/intel_breadcrumbs.c | 77 +-
drivers/gpu/drm/i915/intel_display.c | 273 ++-
drivers/gpu/drm/i915/intel_drv.h | 29 +-
drivers/gpu/drm/i915/intel_engine_cs.c | 12 +-
drivers/gpu/drm/i915/intel_fbc.c | 17 +-
drivers/gpu/drm/i915/intel_fbdev.c | 40 +-
drivers/gpu/drm/i915/intel_guc.h | 9 +-
drivers/gpu/drm/i915/intel_guc_loader.c | 37 +-
drivers/gpu/drm/i915/intel_lrc.c | 585 +++---
drivers/gpu/drm/i915/intel_lrc.h | 2 -
drivers/gpu/drm/i915/intel_overlay.c | 74 +-
drivers/gpu/drm/i915/intel_pm.c | 5 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 326 ++--
drivers/gpu/drm/i915/intel_ringbuffer.h | 73 +-
drivers/gpu/drm/i915/intel_runtime_pm.c | 3 +-
drivers/gpu/drm/i915/intel_sprite.c | 8 +-
drivers/gpu/drm/i915/intel_uncore.c | 6 +-
drivers/gpu/drm/sis/sis_mm.c | 6 +-
drivers/gpu/drm/ttm/ttm_bo_manager.c | 15 +-
drivers/gpu/drm/via/via_mm.c | 4 +-
drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 10 +-
drivers/staging/android/sync_debug.c | 12 +-
include/drm/drm_gem.h | 15 -
include/drm/drm_mm.h | 193 +-
include/drm/drm_vma_manager.h | 2 -
include/linux/fence-array.h | 10 +
include/linux/fence.h | 6 +
include/linux/io-mapping.h | 87 +-
include/linux/kfence.h | 86 +
include/linux/reservation.h | 7 +
include/linux/sync_file.h | 20 +-
include/uapi/drm/i915_drm.h | 15 +-
kernel/Makefile | 2 +-
kernel/kfence.c | 497 +++++
lib/Kconfig.debug | 23 +
lib/Makefile | 1 +
lib/test-kfence.c | 580 ++++++
tools/testing/selftests/lib/kfence.sh | 10 +
79 files changed, 8182 insertions(+), 5926 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c
create mode 100644 drivers/gpu/drm/i915/i915_memcpy.c
create mode 100644 drivers/gpu/drm/i915/i915_memory.c
create mode 100644 include/linux/kfence.h
create mode 100644 kernel/kfence.c
create mode 100644 lib/test-kfence.c
create mode 100755 tools/testing/selftests/lib/kfence.sh
diff --git a/Documentation/sync_file.txt b/Documentation/sync_file.txt
index e8e2ebafe5fa..ae2dbad142ac 100644
--- a/Documentation/sync_file.txt
+++ b/Documentation/sync_file.txt
@@ -64,6 +64,21 @@ The sync_file fd now can be sent to userspace.
If the creation process fail, or the sync_file needs to be released by any
other reason fput(sync_file->file) should be used.
+Receiving Sync Files from Userspace
+-----------------------------------
+
+When userspace needs to send an in-fence to the driver it pass file descriptor
+of the Sync File to the kernel. The kernel then can retrieve the fences
+from it.
+
+Interface:
+ struct fence *sync_file_get_fence(int fd);
+
+
+The function return a struct fence pointer referencing the fence(s) in the Sync
+File.
+
+
References:
[1] struct sync_file in include/linux/sync_file.h
[2] All interfaces mentioned above defined in include/linux/sync_file.h
diff --git a/drivers/dma-buf/fence-array.c b/drivers/dma-buf/fence-array.c
index a8731c853da6..ee500226197b 100644
--- a/drivers/dma-buf/fence-array.c
+++ b/drivers/dma-buf/fence-array.c
@@ -99,6 +99,7 @@ const struct fence_ops fence_array_ops = {
.wait = fence_default_wait,
.release = fence_array_release,
};
+EXPORT_SYMBOL(fence_array_ops);
/**
* fence_array_create - Create a custom fence array
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 4d51f9e83fa8..97091894ff16 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -22,6 +22,7 @@
#include <linux/export.h>
#include <linux/atomic.h>
#include <linux/fence.h>
+#include <linux/kfence.h>
#define CREATE_TRACE_POINTS
#include <trace/events/fence.h>
@@ -530,3 +531,60 @@ fence_init(struct fence *fence, const struct fence_ops *ops,
trace_fence_init(fence);
}
EXPORT_SYMBOL(fence_init);
+
+struct dma_fence_cb {
+ struct fence_cb base;
+ struct kfence *fence;
+};
+
+static void dma_kfence_wake(struct fence *dma, struct fence_cb *data)
+{
+ struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+ kfence_complete(cb->fence);
+ kfence_put(cb->fence);
+ kfree(cb);
+}
+
+/**
+ * kfence_await_dma_fence - set the fence to wait upon a DMA fence
+ * @fence: this kfence
+ * @dma: target DMA fence to wait upon
+ * @gfp: the allowed allocation type
+ *
+ * kfence_add_dma() causes the @fence to wait upon completion of a DMA fence.
+ *
+ * Returns 1 if the @fence was successfully to the waitqueue of @dma, 0
+ * if @dma was already signaled (and so not added), or a negative error code.
+ */
+int kfence_await_dma_fence(struct kfence *fence, struct fence *dma, gfp_t gfp)
+{
+ struct dma_fence_cb *cb;
+ int ret;
+
+ if (fence_is_signaled(dma))
+ return 0;
+
+ cb = kmalloc(sizeof(*cb), gfp);
+ if (!cb) {
+ if (!gfpflags_allow_blocking(gfp))
+ return -ENOMEM;
+
+ return fence_wait(dma, false);
+ }
+
+ cb->fence = kfence_get(fence);
+ kfence_await(fence);
+
+ ret = fence_add_callback(dma, &cb->base, dma_kfence_wake);
+ if (ret == 0) {
+ ret = 1;
+ } else {
+ dma_kfence_wake(dma, &cb->base);
+ if (ret == -ENOENT) /* fence already signaled */
+ ret = 0;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kfence_await_dma_fence);
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 9566a62ad8e3..138b792af0c3 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -543,3 +543,51 @@ unlock_retry:
goto retry;
}
EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu);
+
+/**
+ * kfence_add_reservation - set the fence to wait upon a reservation_object
+ * @fence: this kfence
+ * @resv: target reservation_object (collection of DMA fences) to wait upon
+ * @write: Wait for read or read/write access
+ * @gfp: the allowed allocation type
+ *
+ * kfence_add_reservation() causes the @fence to wait upon completion of the
+ * reservation object (a collection of DMA fences), either for read access
+ * or for read/write access.
+ *
+ * Returns 1 if the @fence was successfully to the waitqueues of @resv, 0
+ * if @resev was already signaled (and so not added), or a negative error code.
+ */
+int kfence_await_reservation(struct kfence *fence,
+ struct reservation_object *resv,
+ bool write,
+ gfp_t gfp)
+{
+ struct fence *excl, **shared;
+ unsigned int count, i;
+ int ret;
+
+ ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
+ if (ret)
+ return ret;
+
+ if (write) {
+ for (i = 0; i < count; i++) {
+ ret |= kfence_await_dma_fence(fence, shared[i], gfp);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ if (excl)
+ ret |= kfence_await_dma_fence(fence, excl, gfp);
+
+out:
+ fence_put(excl);
+ for (i = 0; i < count; i++)
+ fence_put(shared[i]);
+ kfree(shared);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kfence_await_reservation);
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 9aaa608dfe01..240ef224d3b0 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -28,11 +28,11 @@
static const struct file_operations sync_file_fops;
-static struct sync_file *sync_file_alloc(int size)
+static struct sync_file *sync_file_alloc(void)
{
struct sync_file *sync_file;
- sync_file = kzalloc(size, GFP_KERNEL);
+ sync_file = kzalloc(sizeof(*sync_file), GFP_KERNEL);
if (!sync_file)
return NULL;
@@ -45,6 +45,8 @@ static struct sync_file *sync_file_alloc(int size)
init_waitqueue_head(&sync_file->wq);
+ INIT_LIST_HEAD(&sync_file->cb.node);
+
return sync_file;
err:
@@ -54,14 +56,11 @@ err:
static void fence_check_cb_func(struct fence *f, struct fence_cb *cb)
{
- struct sync_file_cb *check;
struct sync_file *sync_file;
- check = container_of(cb, struct sync_file_cb, cb);
- sync_file = check->sync_file;
+ sync_file = container_of(cb, struct sync_file, cb);
- if (atomic_dec_and_test(&sync_file->status))
- wake_up_all(&sync_file->wq);
+ wake_up_all(&sync_file->wq);
}
/**
@@ -76,23 +75,17 @@ struct sync_file *sync_file_create(struct fence *fence)
{
struct sync_file *sync_file;
- sync_file = sync_file_alloc(offsetof(struct sync_file, cbs[1]));
+ sync_file = sync_file_alloc();
if (!sync_file)
return NULL;
- sync_file->num_fences = 1;
- atomic_set(&sync_file->status, 1);
+ sync_file->fence = fence;
+
snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
fence->ops->get_driver_name(fence),
fence->ops->get_timeline_name(fence), fence->context,
fence->seqno);
- sync_file->cbs[0].fence = fence;
- sync_file->cbs[0].sync_file = sync_file;
- if (fence_add_callback(fence, &sync_file->cbs[0].cb,
- fence_check_cb_func))
- atomic_dec(&sync_file->status);
-
return sync_file;
}
EXPORT_SYMBOL(sync_file_create);
@@ -121,14 +114,72 @@ err:
return NULL;
}
-static void sync_file_add_pt(struct sync_file *sync_file, int *i,
- struct fence *fence)
+/**
+ * sync_file_get_fence - get the fence related to the sync_file fd
+ * @fd: sync_file fd to get the fence from
+ *
+ * Ensures @fd references a valid sync_file and returns a fence that
+ * represents all fence in the sync_file. On error NULL is returned.
+ */
+struct fence *sync_file_get_fence(int fd)
+{
+ struct sync_file *sync_file;
+ struct fence *fence;
+
+ sync_file = sync_file_fdget(fd);
+ if (!sync_file)
+ return NULL;
+
+ fence = fence_get(sync_file->fence);
+ fput(sync_file->file);
+
+ return fence;
+}
+EXPORT_SYMBOL(sync_file_get_fence);
+
+static int sync_file_set_fence(struct sync_file *sync_file,
+ struct fence **fences, int num_fences)
+{
+ struct fence_array *array;
+
+ /*
+ * The reference for the fences in the new sync_file and held
+ * in add_fence() during the merge procedure, so for num_fences == 1
+ * we already own a new reference to the fence. For num_fence > 1
+ * we own the reference of the fence_array creation.
+ */
+ if (num_fences == 1) {
+ sync_file->fence = fences[0];
+ } else {
+ array = fence_array_create(num_fences, fences,
+ fence_context_alloc(1), 1, false);
+ if (!array)
+ return -ENOMEM;
+
+ sync_file->fence = &array->base;
+ }
+
+ return 0;
+}
+
+static struct fence **get_fences(struct sync_file *sync_file, int *num_fences)
+{
+ if (fence_is_array(sync_file->fence)) {
+ struct fence_array *array = to_fence_array(sync_file->fence);
+
+ *num_fences = array->num_fences;
+ return array->fences;
+ }
+
+ *num_fences = 1;
+ return &sync_file->fence;
+}
+
+static void add_fence(struct fence **fences, int *i, struct fence *fence)
{
- sync_file->cbs[*i].fence = fence;
- sync_file->cbs[*i].sync_file = sync_file;
+ fences[*i] = fence;
- if (!fence_add_callback(fence, &sync_file->cbs[*i].cb,
- fence_check_cb_func)) {
+ if (!fence_is_signaled(fence)) {
fence_get(fence);
(*i)++;
}
@@ -147,16 +198,24 @@ static void sync_file_add_pt(struct sync_file *sync_file, int *i,
static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
struct sync_file *b)
{
- int num_fences = a->num_fences + b->num_fences;
struct sync_file *sync_file;
- int i, i_a, i_b;
- unsigned long size = offsetof(struct sync_file, cbs[num_fences]);
+ struct fence **fences, **nfences, **a_fences, **b_fences;
+ int i, i_a, i_b, num_fences, a_num_fences, b_num_fences;
- sync_file = sync_file_alloc(size);
+ sync_file = sync_file_alloc();
if (!sync_file)
return NULL;
- atomic_set(&sync_file->status, num_fences);
+ a_fences = get_fences(a, &a_num_fences);
+ b_fences = get_fences(b, &b_num_fences);
+ if (a_num_fences > INT_MAX - b_num_fences)
+ return NULL;
+
+ num_fences = a_num_fences + b_num_fences;
+
+ fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
+ if (!fences)
+ goto err;
/*
* Assume sync_file a and b are both ordered and have no
@@ -165,55 +224,66 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
* If a sync_file can only be created with sync_file_merge
* and sync_file_create, this is a reasonable assumption.
*/
- for (i = i_a = i_b = 0; i_a < a->num_fences && i_b < b->num_fences; ) {
- struct fence *pt_a = a->cbs[i_a].fence;
- struct fence *pt_b = b->cbs[i_b].fence;
+ for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
+ struct fence *pt_a = a_fences[i_a];
+ struct fence *pt_b = b_fences[i_b];
if (pt_a->context < pt_b->context) {
- sync_file_add_pt(sync_file, &i, pt_a);
+ add_fence(fences, &i, pt_a);
i_a++;
} else if (pt_a->context > pt_b->context) {
- sync_file_add_pt(sync_file, &i, pt_b);
+ add_fence(fences, &i, pt_b);
i_b++;
} else {
if (pt_a->seqno - pt_b->seqno <= INT_MAX)
- sync_file_add_pt(sync_file, &i, pt_a);
+ add_fence(fences, &i, pt_a);
else
- sync_file_add_pt(sync_file, &i, pt_b);
+ add_fence(fences, &i, pt_b);
i_a++;
i_b++;
}
}
- for (; i_a < a->num_fences; i_a++)
- sync_file_add_pt(sync_file, &i, a->cbs[i_a].fence);
+ for (; i_a < a_num_fences; i_a++)
+ add_fence(fences, &i, a_fences[i_a]);
+
+ for (; i_b < b_num_fences; i_b++)
+ add_fence(fences, &i, b_fences[i_b]);
+
+ if (num_fences > i) {
+ nfences = krealloc(fences, i * sizeof(*fences),
+ GFP_KERNEL);
+ if (!nfences)
+ goto err;
- for (; i_b < b->num_fences; i_b++)
- sync_file_add_pt(sync_file, &i, b->cbs[i_b].fence);
+ fences = nfences;
+ }
- if (num_fences > i)
- atomic_sub(num_fences - i, &sync_file->status);
- sync_file->num_fences = i;
+ if (sync_file_set_fence(sync_file, fences, i) < 0) {
+ kfree(fences);
+ goto err;
+ }
strlcpy(sync_file->name, name, sizeof(sync_file->name));
return sync_file;
+
+err:
+ fput(sync_file->file);
+ return NULL;
+
}
static void sync_file_free(struct kref *kref)
{
struct sync_file *sync_file = container_of(kref, struct sync_file,
kref);
- int i;
-
- for (i = 0; i < sync_file->num_fences; ++i) {
- fence_remove_callback(sync_file->cbs[i].fence,
- &sync_file->cbs[i].cb);
- fence_put(sync_file->cbs[i].fence);
- }
+ if (atomic_read(&sync_file->enabled))
+ fence_remove_callback(sync_file->fence, &sync_file->cb);
+ fence_put(sync_file->fence);
kfree(sync_file);
}
@@ -232,13 +302,21 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait)
poll_wait(file, &sync_file->wq, wait);
- status = atomic_read(&sync_file->status);
+ if (!atomic_xchg(&sync_file->enabled, 1)) {
+ if (fence_add_callback(sync_file->fence, &sync_file->cb,
+ fence_check_cb_func) < 0)
+ wake_up_all(&sync_file->wq);
+ }
+
+ status = fence_is_signaled(sync_file->fence);
if (!status)
- return POLLIN;
+ return 0;
+
if (status < 0)
return POLLERR;
- return 0;
+
+ return POLLIN;
}
static long sync_file_ioctl_merge(struct sync_file *sync_file,
@@ -315,8 +393,9 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
{
struct sync_file_info info;
struct sync_fence_info *fence_info = NULL;
+ struct fence **fences;
__u32 size;
- int ret, i;
+ int num_fences, ret, i;
if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
return -EFAULT;
@@ -324,6 +403,8 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
if (info.flags || info.pad)
return -EINVAL;
+ fences = get_fences(sync_file, &num_fences);
+
/*
* Passing num_fences = 0 means that userspace doesn't want to
* retrieve any sync_fence_info. If num_fences = 0 we skip filling
@@ -333,16 +414,16 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
if (!info.num_fences)
goto no_fences;
- if (info.num_fences < sync_file->num_fences)
+ if (info.num_fences < num_fences)
return -EINVAL;
- size = sync_file->num_fences * sizeof(*fence_info);
+ size = num_fences * sizeof(*fence_info);
fence_info = kzalloc(size, GFP_KERNEL);
if (!fence_info)
return -ENOMEM;
- for (i = 0; i < sync_file->num_fences; ++i)
- sync_fill_fence_info(sync_file->cbs[i].fence, &fence_info[i]);
+ for (i = 0; i < num_fences; i++)
+ sync_fill_fence_info(fences[i], &fence_info[i]);
if (copy_to_user(u64_to_user_ptr(info.sync_fence_info), fence_info,
size)) {
@@ -352,11 +433,8 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
no_fences:
strlcpy(info.name, sync_file->name, sizeof(info.name));
- info.status = atomic_read(&sync_file->status);
- if (info.status >= 0)
- info.status = !info.status;
-
- info.num_fences = sync_file->num_fences;
+ info.status = fence_is_signaled(sync_file->fence);
+ info.num_fences = num_fences;
if (copy_to_user((void __user *)arg, &info, sizeof(info)))
ret = -EFAULT;
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index e3dba6f44a79..a79e25e4d547 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -46,7 +46,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
obj-$(CONFIG_DRM_MGA) += mga/
obj-$(CONFIG_DRM_I810) += i810/
-obj-$(CONFIG_DRM_I915) += i915/
+obj-y += i915/
obj-$(CONFIG_DRM_MGAG200) += mgag200/
obj-$(CONFIG_DRM_VC4) += vc4/
obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 77f357b2c386..0206d63ba472 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -1172,9 +1172,9 @@ static void drm_vblank_put(struct drm_device *dev, unsigned int pipe)
if (atomic_dec_and_test(&vblank->refcount)) {
if (drm_vblank_offdelay == 0)
return;
- else if (dev->vblank_disable_immediate || drm_vblank_offdelay < 0)
+ else if (drm_vblank_offdelay < 0)
vblank_disable_fn((unsigned long)vblank);
- else
+ else if (!dev->vblank_disable_immediate)
mod_timer(&vblank->disable_timer,
jiffies + ((drm_vblank_offdelay * HZ)/1000));
}
@@ -1614,6 +1614,17 @@ err_put:
return ret;
}
+static bool drm_wait_vblank_is_query(union drm_wait_vblank *vblwait)
+{
+ if (vblwait->request.sequence)
+ return false;
+
+ return _DRM_VBLANK_RELATIVE ==
+ (vblwait->request.type & (_DRM_VBLANK_TYPES_MASK |
+ _DRM_VBLANK_EVENT |
+ _DRM_VBLANK_NEXTONMISS));
+}
+
/*
* Wait for VBLANK.
*
@@ -1663,9 +1674,24 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
vblank = &dev->vblank[pipe];
+ /* If the counter is currently enabled and accurate, short-circuit queries
+ * to return the cached timestamp of the last vblank.
+ */
+ if (dev->vblank_disable_immediate &&
+ drm_wait_vblank_is_query(vblwait) &&
+ vblank->enabled) {
+ struct timeval now;
+
+ vblwait->reply.sequence =
+ drm_vblank_count_and_time(dev, pipe, &now);
+ vblwait->reply.tval_sec = now.tv_sec;
+ vblwait->reply.tval_usec = now.tv_usec;
+ return 0;
+ }
+
ret = drm_vblank_get(dev, pipe);
if (ret) {
- DRM_DEBUG("failed to acquire vblank counter, %d\n", ret);
+ DRM_DEBUG("crtc %d failed to acquire vblank counter, %d\n", pipe, ret);
return ret;
}
seq = drm_vblank_count(dev, pipe);
@@ -1693,13 +1719,15 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
return drm_queue_vblank_event(dev, pipe, vblwait, file_priv);
}
- DRM_DEBUG("waiting on vblank count %d, crtc %u\n",
- vblwait->request.sequence, pipe);
- DRM_WAIT_ON(ret, vblank->queue, 3 * HZ,
- (((drm_vblank_count(dev, pipe) -
- vblwait->request.sequence) <= (1 << 23)) ||
- !vblank->enabled ||
- !dev->irq_enabled));
+ if (vblwait->request.sequence != seq) {
+ DRM_DEBUG("waiting on vblank count %d, crtc %u\n",
+ vblwait->request.sequence, pipe);
+ DRM_WAIT_ON(ret, vblank->queue, 3 * HZ,
+ (((drm_vblank_count(dev, pipe) -
+ vblwait->request.sequence) <= (1 << 23)) ||
+ !vblank->enabled ||
+ !dev->irq_enabled));
+ }
if (ret != -EINTR) {
struct timeval now;
@@ -1708,10 +1736,10 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
vblwait->reply.tval_sec = now.tv_sec;
vblwait->reply.tval_usec = now.tv_usec;
- DRM_DEBUG("returning %d to client\n",
- vblwait->reply.sequence);
+ DRM_DEBUG("crtc %d returning %d to client\n",
+ pipe, vblwait->reply.sequence);
} else {
- DRM_DEBUG("vblank wait interrupted by signal\n");
+ DRM_DEBUG("crtc %d vblank wait interrupted by signal\n", pipe);
}
done:
@@ -1789,6 +1817,16 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe)
wake_up(&vblank->queue);
drm_handle_vblank_events(dev, pipe);
+ /* With instant-off, we defer disabling the interrupt until after
+ * we finish processing the following vblank. The disable has to
+ * be last (after drm_handle_vblank_events) so that the timestamp
+ * is always accurate.
+ */
+ if (dev->vblank_disable_immediate &&
+ drm_vblank_offdelay > 0 &&
+ !atomic_read(&vblank->refcount))
+ vblank_disable_fn((unsigned long)vblank);
+
spin_unlock_irqrestore(&dev->event_lock, irqflags);
return true;
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index cb39f45d6a16..ae4c58592c70 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -46,6 +46,7 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/export.h>
+#include <linux/interval_tree_generic.h>
/**
* DOC: Overview
@@ -90,76 +91,111 @@
* some basic allocator dumpers for debugging.
*/
-static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
- u64 size,
- unsigned alignment,
- unsigned long color,
- enum drm_mm_search_flags flags);
-static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
- u64 size,
- unsigned alignment,
- unsigned long color,
- u64 start,
- u64 end,
- enum drm_mm_search_flags flags);
-
-static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
- struct drm_mm_node *node,
- u64 size, unsigned alignment,
- unsigned long color,
- enum drm_mm_allocator_flags flags)
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->start + (node)->size - 1)
+
+INTERVAL_TREE_DEFINE(struct drm_mm_node, rb,
+ u64, __subtree_last,
+ START, LAST, static inline, drm_mm_interval_tree)
+
+struct drm_mm_node *
+drm_mm_interval_first(struct drm_mm *mm, u64 start, u64 last)
{
- struct drm_mm *mm = hole_node->mm;
- u64 hole_start = drm_mm_hole_node_start(hole_node);
- u64 hole_end = drm_mm_hole_node_end(hole_node);
- u64 adj_start = hole_start;
- u64 adj_end = hole_end;
+ return drm_mm_interval_tree_iter_first(&mm->interval_tree,
+ start, last);
+}
+EXPORT_SYMBOL(drm_mm_interval_first);
- BUG_ON(node->allocated);
+struct drm_mm_node *
+drm_mm_interval_next(struct drm_mm_node *node, u64 start, u64 last)
+{
+ return drm_mm_interval_tree_iter_next(node, start, last);
+}
+EXPORT_SYMBOL(drm_mm_interval_next);
- if (mm->color_adjust)
- mm->color_adjust(hole_node, color, &adj_start, &adj_end);
+static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
+ struct drm_mm_node *node)
+{
+ struct drm_mm *mm = hole_node->mm;
+ struct rb_node **link, *rb;
+ struct drm_mm_node *parent;
- if (flags & DRM_MM_CREATE_TOP)
- adj_start = adj_end - size;
+ node->__subtree_last = LAST(node);
- if (alignment) {
- u64 tmp = adj_start;
- unsigned rem;
+ if (hole_node->allocated) {
+ rb = &hole_node->rb;
+ while (rb) {
+ parent = rb_entry(rb, struct drm_mm_node, rb);
+ if (parent->__subtree_last >= node->__subtree_last)
+ break;
- rem = do_div(tmp, alignment);
- if (rem) {
- if (flags & DRM_MM_CREATE_TOP)
- adj_start -= rem;
- else
- adj_start += alignment - rem;
+ parent->__subtree_last = node->__subtree_last;
+ rb = rb_parent(rb);
}
- }
- BUG_ON(adj_start < hole_start);
- BUG_ON(adj_end > hole_end);
+ rb = &hole_node->rb;
+ link = &hole_node->rb.rb_right;
+ } else {
+ rb = NULL;
+ link = &mm->interval_tree.rb_node;
+ }
- if (adj_start == hole_start) {
- hole_node->hole_follows = 0;
- list_del(&hole_node->hole_stack);
+ while (*link) {
+ rb = *link;
+ parent = rb_entry(rb, struct drm_mm_node, rb);
+ if (parent->__subtree_last < node->__subtree_last)
+ parent->__subtree_last = node->__subtree_last;
+ if (node->start < parent->start)
+ link = &parent->rb.rb_left;
+ else
+ link = &parent->rb.rb_right;
}
- node->start = adj_start;
- node->size = size;
- node->mm = mm;
- node->color = color;
- node->allocated = 1;
+ rb_link_node(&node->rb, rb, link);
+ rb_insert_augmented(&node->rb,
+ &mm->interval_tree,
+ &drm_mm_interval_tree_augment);
+}
- INIT_LIST_HEAD(&node->hole_stack);
- list_add(&node->node_list, &hole_node->node_list);
+#define RB_INSERT(root, member, expr) do { \
+ struct rb_node **link = &root.rb_node, *rb = NULL; \
+ u64 x = expr(node); \
+ while (*link) { \
+ rb = *link; \
+ if (x < expr(rb_entry(rb, struct drm_mm_node, member))) \
+ link = &rb->rb_left; \
+ else \
+ link = &rb->rb_right; \
+ } \
+ rb_link_node(&node->member, rb, link); \
+ rb_insert_color(&node->member, &root); \
+} while (0)
+
+#define HOLE_SIZE(NODE) ((NODE)->hole_size)
+#define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
+
+static void add_hole(struct drm_mm_node *node)
+{
+ struct drm_mm *mm = node->mm;
- BUG_ON(node->start + node->size > adj_end);
+ node->hole_size =
+ __drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node);
- node->hole_follows = 0;
- if (__drm_mm_hole_node_start(node) < hole_end) {
- list_add(&node->hole_stack, &mm->hole_stack);
- node->hole_follows = 1;
- }
+ RB_INSERT(mm->holes_size, rb_hole_size, HOLE_SIZE);
+ RB_INSERT(mm->holes_addr, rb_hole_addr, HOLE_ADDR);
+
+ list_add(&node->hole_stack, &mm->hole_stack);
+}
+
+static void rm_hole(struct drm_mm_node *node)
+{
+ if (!node->hole_size)
+ return;
+
+ list_del(&node->hole_stack);
+ rb_erase(&node->rb_hole_size, &node->mm->holes_size);
+ rb_erase(&node->rb_hole_addr, &node->mm->holes_addr);
+ node->hole_size = 0;
}
/**
@@ -178,139 +214,151 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
*/
int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
{
+ u64 end = node->start + node->size;
struct drm_mm_node *hole;
- u64 end;
- u64 hole_start;
- u64 hole_end;
+ u64 hole_start, hole_end;
- BUG_ON(node == NULL);
+ if (WARN_ON(node->size == 0))
+ return -EINVAL;
end = node->start + node->size;
/* Find the relevant hole to add our node to */
- drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
- if (hole_start > node->start || hole_end < end)
- continue;
-
- node->mm = mm;
- node->allocated = 1;
+ hole = drm_mm_interval_tree_iter_first(&mm->interval_tree,
+ node->start, ~(u64)0);
+ if (hole) {
+ if (hole->start < end)
+ return -ENOSPC;
+ } else {
+ hole = list_entry(&mm->head_node.node_list,
+ typeof(*hole), node_list);
+ }
- INIT_LIST_HEAD(&node->hole_stack);
- list_add(&node->node_list, &hole->node_list);
+ hole = list_last_entry(&hole->node_list, typeof(*hole), node_list);
+ if (!hole->hole_size)
+ return -ENOSPC;
- if (node->start == hole_start) {
- hole->hole_follows = 0;
- list_del_init(&hole->hole_stack);
- }
+ hole_start = __drm_mm_hole_node_start(hole);
+ hole_end = hole_start + hole->hole_size;
+ if (hole_start > node->start || hole_end < end)
+ return -ENOSPC;
- node->hole_follows = 0;
- if (end != hole_end) {
- list_add(&node->hole_stack, &mm->hole_stack);
- node->hole_follows = 1;
- }
+ if (mm->color_adjust) {
+ u64 adj_start = hole_start, adj_end = hole_end;
- return 0;
+ mm->color_adjust(hole, node->color, &adj_start, &adj_end);
+ if (adj_start > node->start ||
+ adj_end < node->start + node->size)
+ return -ENOSPC;
}
- return -ENOSPC;
-}
-EXPORT_SYMBOL(drm_mm_reserve_node);
+ node->mm = mm;
-/**
- * drm_mm_insert_node_generic - search for space and insert @node
- * @mm: drm_mm to allocate from
- * @node: preallocate node to insert
- * @size: size of the allocation
- * @alignment: alignment of the allocation
- * @color: opaque tag value to use for this node
- * @sflags: flags to fine-tune the allocation search
- * @aflags: flags to fine-tune the allocation behavior
- *
- * The preallocated node must be cleared to 0.
- *
- * Returns:
- * 0 on success, -ENOSPC if there's no suitable hole.
- */
-int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
- u64 size, unsigned alignment,
- unsigned long color,
- enum drm_mm_search_flags sflags,
- enum drm_mm_allocator_flags aflags)
-{
- struct drm_mm_node *hole_node;
+ list_add(&node->node_list, &hole->node_list);
+ drm_mm_interval_tree_add_node(hole, node);
+ node->allocated = 1;
+ node->hole_size = 0;
- hole_node = drm_mm_search_free_generic(mm, size, alignment,
- color, sflags);
- if (!hole_node)
- return -ENOSPC;
+ rm_hole(hole);
+ if (node->start > hole_start)
+ add_hole(hole);
+ if (end < hole_end)
+ add_hole(node);
- drm_mm_insert_helper(hole_node, node, size, alignment, color, aflags);
return 0;
}
-EXPORT_SYMBOL(drm_mm_insert_node_generic);
-
-static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
- struct drm_mm_node *node,
- u64 size, unsigned alignment,
- unsigned long color,
- u64 start, u64 end,
- enum drm_mm_allocator_flags flags)
-{
- struct drm_mm *mm = hole_node->mm;
- u64 hole_start = drm_mm_hole_node_start(hole_node);
- u64 hole_end = drm_mm_hole_node_end(hole_node);
- u64 adj_start = hole_start;
- u64 adj_end = hole_end;
-
- BUG_ON(!hole_node->hole_follows || node->allocated);
-
- if (adj_start < start)
- adj_start = start;
- if (adj_end > end)
- adj_end = end;
+EXPORT_SYMBOL(drm_mm_reserve_node);
- if (mm->color_adjust)
- mm->color_adjust(hole_node, color, &adj_start, &adj_end);
+static inline struct drm_mm_node *rb_hole_size_to_node(struct rb_node *rb)
+{
+ return rb ? rb_entry(rb, struct drm_mm_node, rb_hole_size) : NULL;
+}
- if (flags & DRM_MM_CREATE_TOP)
- adj_start = adj_end - size;
+static inline struct drm_mm_node *rb_hole_addr_to_node(struct rb_node *rb)
+{
+ return rb ? rb_entry(rb, struct drm_mm_node, rb_hole_addr) : NULL;
+}
- if (alignment) {
- u64 tmp = adj_start;
- unsigned rem;
+static inline u64 rb_hole_size(struct rb_node *rb)
+{
+ return rb_entry(rb, struct drm_mm_node, rb_hole_size)->hole_size;
+}
- rem = do_div(tmp, alignment);
- if (rem) {
- if (flags & DRM_MM_CREATE_TOP)
- adj_start -= rem;
- else
- adj_start += alignment - rem;
- }
+static struct drm_mm_node *best_hole(struct drm_mm *mm, u64 size)
+{
+ struct rb_node *best = NULL;
+ struct rb_node **link = &mm->holes_size.rb_node;
+ while (*link) {
+ struct rb_node *rb = *link;
+ if (size <= rb_hole_size(rb)) {
+ best = rb;
+ link = &rb->rb_left;
+ } else
+ link = &rb->rb_right;
}
+ return rb_hole_size_to_node(best);
+}
- if (adj_start == hole_start) {
- hole_node->hole_follows = 0;
- list_del(&hole_node->hole_stack);
+static struct drm_mm_node *low_hole(struct drm_mm *mm, u64 addr)
+{
+ struct drm_mm_node *node = NULL;
+ struct rb_node **link = &mm->holes_addr.rb_node;
+ while (*link) {
+ node = rb_hole_addr_to_node(*link);
+ if (addr == __drm_mm_hole_node_start(node))
+ return node;
+
+ if (addr < __drm_mm_hole_node_start(node))
+ link = &node->rb_hole_addr.rb_left;
+ else
+ link = &node->rb_hole_addr.rb_right;
}
+ return node;
+}
- node->start = adj_start;
- node->size = size;
- node->mm = mm;
- node->color = color;
- node->allocated = 1;
+static struct drm_mm_node *
+first_hole(struct drm_mm *mm, u64 start, u64 end, u64 size, unsigned flags)
+{
+ if (RB_EMPTY_ROOT(&mm->holes_size))
+ return NULL;
+
+ switch (flags) {
+ default:
+ case DRM_MM_INSERT_BEST:
+ return best_hole(mm, size);
- INIT_LIST_HEAD(&node->hole_stack);
- list_add(&node->node_list, &hole_node->node_list);
+ case DRM_MM_INSERT_LOW:
+ return low_hole(mm, start);
- BUG_ON(node->start < start);
- BUG_ON(node->start < adj_start);
- BUG_ON(node->start + node->size > adj_end);
- BUG_ON(node->start + node->size > end);
+ case DRM_MM_INSERT_HIGH:
+ return rb_hole_addr_to_node(rb_last(&mm->holes_addr));
- node->hole_follows = 0;
- if (__drm_mm_hole_node_start(node) < hole_end) {
- list_add(&node->hole_stack, &mm->hole_stack);
- node->hole_follows = 1;
+ case DRM_MM_INSERT_EVICT:
+ return list_first_entry_or_null(&mm->hole_stack,
+ struct drm_mm_node,
+ hole_stack);
+ }
+}
+
+static struct drm_mm_node *
+next_hole(struct drm_mm *mm, struct drm_mm_node *node, unsigned flags)
+{
+ switch (flags) {
+ default:
+ case DRM_MM_INSERT_BEST:
+ return rb_hole_size_to_node(rb_next(&node->rb_hole_size));
+
+ case DRM_MM_INSERT_LOW:
+ return rb_hole_addr_to_node(rb_next(&node->rb_hole_addr));
+
+ case DRM_MM_INSERT_HIGH:
+ return rb_hole_addr_to_node(rb_prev(&node->rb_hole_addr));
+
+ case DRM_MM_INSERT_EVICT:
+ node = list_entry(node->hole_stack.next,
+ struct drm_mm_node,
+ hole_stack);
+ return &node->hole_stack == &mm->hole_stack ? NULL : node;
}
}
@@ -331,25 +379,101 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
* Returns:
* 0 on success, -ENOSPC if there's no suitable hole.
*/
-int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
- u64 size, unsigned alignment,
+int drm_mm_insert_node_in_range_generic(struct drm_mm * const mm,
+ struct drm_mm_node * const node,
+ u64 size, u64 alignment,
unsigned long color,
u64 start, u64 end,
- enum drm_mm_search_flags sflags,
- enum drm_mm_allocator_flags aflags)
+ unsigned flags)
{
- struct drm_mm_node *hole_node;
+ struct drm_mm_node *entry;
+ u64 alignment_mask;
+ u64 min_size;
- hole_node = drm_mm_search_free_in_range_generic(mm,
- size, alignment, color,
- start, end, sflags);
- if (!hole_node)
+ if (WARN_ON(size == 0))
+ return -EINVAL;
+
+ if (end - start < size)
return -ENOSPC;
- drm_mm_insert_helper_range(hole_node, node,
- size, alignment, color,
- start, end, aflags);
- return 0;
+ min_size = alignment + max(size, alignment);
+ alignment_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
+restart:
+ for (entry = first_hole(mm, start, end, min_size, flags); entry;
+ entry = next_hole(mm, entry, flags)) {
+ u64 hole_start = __drm_mm_hole_node_start(entry);
+ u64 hole_end = hole_start + entry->hole_size;
+ u64 adj_start = hole_start;
+ u64 adj_end = hole_end;
+
+ if (flags == DRM_MM_INSERT_LOW && hole_start >= end)
+ break;
+
+ if (flags == DRM_MM_INSERT_HIGH && hole_end <= start)
+ break;
+
+ if (mm->color_adjust)
+ mm->color_adjust(entry, color, &adj_start, &adj_end);
+
+ if (adj_start < start)
+ adj_start = start;
+ if (adj_end > end)
+ adj_end = end;
+ if (adj_end <= adj_start || adj_end - adj_start < size)
+ continue;
+
+ if (flags == DRM_MM_INSERT_HIGH)
+ adj_start = adj_end - size;
+
+ if (alignment) {
+ u64 rem;
+
+ if (alignment_mask)
+ rem = adj_start & alignment_mask;
+ else
+ div64_u64_rem(adj_start, alignment, &rem);
+ if (rem) {
+ adj_start -= rem;
+ if (flags != DRM_MM_INSERT_HIGH)
+ adj_start += alignment;
+ if (adj_start < hole_start ||
+ adj_end <= adj_start ||
+ adj_end - adj_start < size) {
+ /* Searching for alignment may cause
+ * many many misses and trigger the
+ * NMI watchdog if we are not careful.
+ */
+ cond_resched();
+ continue;
+ }
+ }
+ }
+
+ node->mm = mm;
+ node->size = size;
+ node->start = adj_start;
+ node->color = color;
+ node->hole_size = 0;
+
+ list_add(&node->node_list, &entry->node_list);
+ drm_mm_interval_tree_add_node(entry, node);
+ node->allocated = 1;
+
+ rm_hole(entry);
+ if (adj_start > hole_start)
+ add_hole(entry);
+ if (adj_start + size < hole_end)
+ add_hole(node);
+
+ return 0;
+ }
+
+ if (min_size != size) {
+ min_size = size;
+ goto restart;
+ }
+
+ return -ENOSPC;
}
EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic);
@@ -369,139 +493,21 @@ void drm_mm_remove_node(struct drm_mm_node *node)
if (WARN_ON(!node->allocated))
return;
- BUG_ON(node->scanned_block || node->scanned_prev_free
- || node->scanned_next_free);
+ BUG_ON(node->scanned_block);
prev_node =
list_entry(node->node_list.prev, struct drm_mm_node, node_list);
- if (node->hole_follows) {
- BUG_ON(__drm_mm_hole_node_start(node) ==
- __drm_mm_hole_node_end(node));
- list_del(&node->hole_stack);
- } else
- BUG_ON(__drm_mm_hole_node_start(node) !=
- __drm_mm_hole_node_end(node));
-
-
- if (!prev_node->hole_follows) {
- prev_node->hole_follows = 1;
- list_add(&prev_node->hole_stack, &mm->hole_stack);
- } else
- list_move(&prev_node->hole_stack, &mm->hole_stack);
+ rm_hole(node);
+ drm_mm_interval_tree_remove(node, &mm->interval_tree);
list_del(&node->node_list);
node->allocated = 0;
-}
-EXPORT_SYMBOL(drm_mm_remove_node);
-
-static int check_free_hole(u64 start, u64 end, u64 size, unsigned alignment)
-{
- if (end - start < size)
- return 0;
-
- if (alignment) {
- u64 tmp = start;
- unsigned rem;
-
- rem = do_div(tmp, alignment);
- if (rem)
- start += alignment - rem;
- }
-
- return end >= start + size;
-}
-
-static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
- u64 size,
- unsigned alignment,
- unsigned long color,
- enum drm_mm_search_flags flags)
-{
- struct drm_mm_node *entry;
- struct drm_mm_node *best;
- u64 adj_start;
- u64 adj_end;
- u64 best_size;
-
- BUG_ON(mm->scanned_blocks);
-
- best = NULL;
- best_size = ~0UL;
-
- __drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
- flags & DRM_MM_SEARCH_BELOW) {
- u64 hole_size = adj_end - adj_start;
-
- if (mm->color_adjust) {
- mm->color_adjust(entry, color, &adj_start, &adj_end);
- if (adj_end <= adj_start)
- continue;
- }
-
- if (!check_free_hole(adj_start, adj_end, size, alignment))
- continue;
-
- if (!(flags & DRM_MM_SEARCH_BEST))
- return entry;
-
- if (hole_size < best_size) {
- best = entry;
- best_size = hole_size;
- }
- }
-
- return best;
-}
-
-static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
- u64 size,
- unsigned alignment,
- unsigned long color,
- u64 start,
- u64 end,
- enum drm_mm_search_flags flags)
-{
- struct drm_mm_node *entry;
- struct drm_mm_node *best;
- u64 adj_start;
- u64 adj_end;
- u64 best_size;
-
- BUG_ON(mm->scanned_blocks);
-
- best = NULL;
- best_size = ~0UL;
-
- __drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
- flags & DRM_MM_SEARCH_BELOW) {
- u64 hole_size = adj_end - adj_start;
-
- if (adj_start < start)
- adj_start = start;
- if (adj_end > end)
- adj_end = end;
-
- if (mm->color_adjust) {
- mm->color_adjust(entry, color, &adj_start, &adj_end);
- if (adj_end <= adj_start)
- continue;
- }
-
- if (!check_free_hole(adj_start, adj_end, size, alignment))
- continue;
-
- if (!(flags & DRM_MM_SEARCH_BEST))
- return entry;
-
- if (hole_size < best_size) {
- best = entry;
- best_size = hole_size;
- }
- }
- return best;
+ rm_hole(prev_node);
+ add_hole(prev_node);
}
+EXPORT_SYMBOL(drm_mm_remove_node);
/**
* drm_mm_replace_node - move an allocation from @old to @new
@@ -514,13 +520,20 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
*/
void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
{
+ memcpy(old, new, sizeof(struct drm_mm_node));
+
list_replace(&old->node_list, &new->node_list);
- list_replace(&old->hole_stack, &new->hole_stack);
- new->hole_follows = old->hole_follows;
- new->mm = old->mm;
- new->start = old->start;
- new->size = old->size;
- new->color = old->color;
+ rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree);
+
+ if (old->hole_size) {
+ list_replace(&old->hole_stack, &new->hole_stack);
+ rb_replace_node(&old->rb_hole_size,
+ &new->rb_hole_size,
+ &old->mm->holes_size);
+ rb_replace_node(&old->rb_hole_addr,
+ &new->rb_hole_addr,
+ &old->mm->holes_addr);
+ }
old->allocated = 0;
new->allocated = 1;
@@ -556,37 +569,6 @@ EXPORT_SYMBOL(drm_mm_replace_node);
*/
/**
- * drm_mm_init_scan - initialize lru scanning
- * @mm: drm_mm to scan
- * @size: size of the allocation
- * @alignment: alignment of the allocation
- * @color: opaque tag value to use for the allocation
- *
- * This simply sets up the scanning routines with the parameters for the desired
- * hole. Note that there's no need to specify allocation flags, since they only
- * change the place a node is allocated from within a suitable hole.
- *
- * Warning:
- * As long as the scan list is non-empty, no other operations than
- * adding/removing nodes to/from the scan list are allowed.
- */
-void drm_mm_init_scan(struct drm_mm *mm,
- u64 size,
- unsigned alignment,
- unsigned long color)
-{
- mm->scan_color = color;
- mm->scan_alignment = alignment;
- mm->scan_size = size;
- mm->scanned_blocks = 0;
- mm->scan_hit_start = 0;
- mm->scan_hit_end = 0;
- mm->scan_check_range = 0;
- mm->prev_scanned_node = NULL;
-}
-EXPORT_SYMBOL(drm_mm_init_scan);
-
-/**
* drm_mm_init_scan - initialize range-restricted lru scanning
* @mm: drm_mm to scan
* @size: size of the allocation
@@ -603,23 +585,25 @@ EXPORT_SYMBOL(drm_mm_init_scan);
* As long as the scan list is non-empty, no other operations than
* adding/removing nodes to/from the scan list are allowed.
*/
-void drm_mm_init_scan_with_range(struct drm_mm *mm,
+void drm_mm_init_scan_with_range(struct drm_mm_scan *scan,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color,
u64 start,
- u64 end)
+ u64 end,
+ unsigned flags)
{
- mm->scan_color = color;
- mm->scan_alignment = alignment;
- mm->scan_size = size;
- mm->scanned_blocks = 0;
- mm->scan_hit_start = 0;
- mm->scan_hit_end = 0;
- mm->scan_start = start;
- mm->scan_end = end;
- mm->scan_check_range = 1;
- mm->prev_scanned_node = NULL;
+ scan->flags = flags;
+ scan->color = color;
+ scan->alignment = alignment;
+ scan->alignment_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
+ scan->size = size;
+ scan->hit_start = 0;
+ scan->hit_end = 0;
+ scan->start = start;
+ scan->end = end;
+
+ scan->prev_node = NULL;
}
EXPORT_SYMBOL(drm_mm_init_scan_with_range);
@@ -633,46 +617,57 @@ EXPORT_SYMBOL(drm_mm_init_scan_with_range);
* Returns:
* True if a hole has been found, false otherwise.
*/
-bool drm_mm_scan_add_block(struct drm_mm_node *node)
+bool drm_mm_scan_add_block(struct drm_mm_scan *scan,
+ struct drm_mm_node *node)
{
struct drm_mm *mm = node->mm;
struct drm_mm_node *prev_node;
- u64 hole_start, hole_end;
u64 adj_start, adj_end;
- mm->scanned_blocks++;
-
BUG_ON(node->scanned_block);
node->scanned_block = 1;
prev_node = list_entry(node->node_list.prev, struct drm_mm_node,
node_list);
- node->scanned_preceeds_hole = prev_node->hole_follows;
- prev_node->hole_follows = 1;
- list_del(&node->node_list);
+ __list_del_entry(&node->node_list);
node->node_list.prev = &prev_node->node_list;
- node->node_list.next = &mm->prev_scanned_node->node_list;
- mm->prev_scanned_node = node;
+ node->node_list.next = &scan->prev_node->node_list;
+ scan->prev_node = node;
- adj_start = hole_start = drm_mm_hole_node_start(prev_node);
- adj_end = hole_end = drm_mm_hole_node_end(prev_node);
-
- if (mm->scan_check_range) {
- if (adj_start < mm->scan_start)
- adj_start = mm->scan_start;
- if (adj_end > mm->scan_end)
- adj_end = mm->scan_end;
- }
+ adj_start = __drm_mm_hole_node_start(prev_node);
+ adj_end = __drm_mm_hole_node_end(prev_node);
if (mm->color_adjust)
- mm->color_adjust(prev_node, mm->scan_color,
- &adj_start, &adj_end);
+ mm->color_adjust(prev_node, scan->color, &adj_start, &adj_end);
+
+ if (adj_start < scan->start)
+ adj_start = scan->start;
+ if (adj_end > scan->end)
+ adj_end = scan->end;
- if (check_free_hole(adj_start, adj_end,
- mm->scan_size, mm->scan_alignment)) {
- mm->scan_hit_start = hole_start;
- mm->scan_hit_end = hole_end;
+ if (scan->flags == DRM_MM_INSERT_HIGH)
+ adj_start = adj_end - scan->size;
+
+ if (scan->alignment) {
+ u64 rem;
+
+ if (scan->alignment_mask)
+ rem = adj_start & scan->alignment_mask;
+ else
+ div64_u64_rem(adj_start, scan->alignment, &rem);
+ if (rem) {
+ adj_start -= rem;
+ if (scan->flags != DRM_MM_INSERT_HIGH)
+ adj_start += scan->alignment;
+ if (adj_start < __drm_mm_hole_node_start(prev_node))
+ return false;
+ }
+ }
+
+ if (adj_end > adj_start && adj_end - adj_start >= scan->size) {
+ scan->hit_start = adj_start;
+ scan->hit_end = adj_end;
return true;
}
@@ -696,44 +691,25 @@ EXPORT_SYMBOL(drm_mm_scan_add_block);
* True if this block should be evicted, false otherwise. Will always
* return false when no hole has been found.
*/
-bool drm_mm_scan_remove_block(struct drm_mm_node *node)
+bool drm_mm_scan_remove_block(struct drm_mm_scan *scan,
+ struct drm_mm_node *node)
{
- struct drm_mm *mm = node->mm;
struct drm_mm_node *prev_node;
- mm->scanned_blocks--;
-
BUG_ON(!node->scanned_block);
node->scanned_block = 0;
prev_node = list_entry(node->node_list.prev, struct drm_mm_node,
node_list);
- prev_node->hole_follows = node->scanned_preceeds_hole;
list_add(&node->node_list, &prev_node->node_list);
- return (drm_mm_hole_node_end(node) > mm->scan_hit_start &&
- node->start < mm->scan_hit_end);
+ return (node->start + node->size > scan->hit_start &&
+ node->start < scan->hit_end);
}
EXPORT_SYMBOL(drm_mm_scan_remove_block);
/**
- * drm_mm_clean - checks whether an allocator is clean
- * @mm: drm_mm allocator to check
- *
- * Returns:
- * True if the allocator is completely free, false if there's still a node
- * allocated in it.
- */
-bool drm_mm_clean(struct drm_mm * mm)
-{
- struct list_head *head = &mm->head_node.node_list;
-
- return (head->next->next == head);
-}
-EXPORT_SYMBOL(drm_mm_clean);
-
-/**
* drm_mm_init - initialize a drm-mm allocator
* @mm: the drm_mm structure to initialize
* @start: start of the range managed by @mm
@@ -743,22 +719,20 @@ EXPORT_SYMBOL(drm_mm_clean);
*/
void drm_mm_init(struct drm_mm * mm, u64 start, u64 size)
{
+ mm->color_adjust = NULL;
+
INIT_LIST_HEAD(&mm->hole_stack);
- mm->scanned_blocks = 0;
+ mm->interval_tree = RB_ROOT;
+ mm->holes_size = RB_ROOT;
+ mm->holes_addr = RB_ROOT;
/* Clever trick to avoid a special case in the free hole tracking. */
INIT_LIST_HEAD(&mm->head_node.node_list);
- INIT_LIST_HEAD(&mm->head_node.hole_stack);
- mm->head_node.hole_follows = 1;
- mm->head_node.scanned_block = 0;
- mm->head_node.scanned_prev_free = 0;
- mm->head_node.scanned_next_free = 0;
+ mm->head_node.scanned_block = 1;
mm->head_node.mm = mm;
mm->head_node.start = start + size;
- mm->head_node.size = start - mm->head_node.start;
- list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
-
- mm->color_adjust = NULL;
+ mm->head_node.size = -size;
+ add_hole(&mm->head_node);
}
EXPORT_SYMBOL(drm_mm_init);
@@ -771,8 +745,7 @@ EXPORT_SYMBOL(drm_mm_init);
*/
void drm_mm_takedown(struct drm_mm * mm)
{
- WARN(!list_empty(&mm->head_node.node_list),
- "Memory manager not clean during takedown.\n");
+ WARN(!drm_mm_clean(mm), "Memory manager not clean during takedown.\n");
}
EXPORT_SYMBOL(drm_mm_takedown);
@@ -781,7 +754,7 @@ static u64 drm_mm_debug_hole(struct drm_mm_node *entry,
{
u64 hole_start, hole_end, hole_size;
- if (entry->hole_follows) {
+ if (entry->hole_size) {
hole_start = drm_mm_hole_node_start(entry);
hole_end = drm_mm_hole_node_end(entry);
hole_size = hole_end - hole_start;
@@ -823,7 +796,7 @@ static u64 drm_mm_dump_hole(struct seq_file *m, struct drm_mm_node *entry)
{
u64 hole_start, hole_end, hole_size;
- if (entry->hole_follows) {
+ if (entry->hole_size) {
hole_start = drm_mm_hole_node_start(entry);
hole_end = drm_mm_hole_node_end(entry);
hole_size = hole_end - hole_start;
diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c
index f306c8855978..31a1f8294441 100644
--- a/drivers/gpu/drm/drm_vma_manager.c
+++ b/drivers/gpu/drm/drm_vma_manager.c
@@ -86,7 +86,6 @@ void drm_vma_offset_manager_init(struct drm_vma_offset_manager *mgr,
unsigned long page_offset, unsigned long size)
{
rwlock_init(&mgr->vm_lock);
- mgr->vm_addr_space_rb = RB_ROOT;
drm_mm_init(&mgr->vm_addr_space_mm, page_offset, size);
}
EXPORT_SYMBOL(drm_vma_offset_manager_init);
@@ -145,16 +144,16 @@ struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_m
unsigned long start,
unsigned long pages)
{
- struct drm_vma_offset_node *node, *best;
+ struct drm_mm_node *node, *best;
struct rb_node *iter;
unsigned long offset;
- iter = mgr->vm_addr_space_rb.rb_node;
+ iter = mgr->vm_addr_space_mm.interval_tree.rb_node;
best = NULL;
while (likely(iter)) {
- node = rb_entry(iter, struct drm_vma_offset_node, vm_rb);
- offset = node->vm_node.start;
+ node = rb_entry(iter, struct drm_mm_node, rb);
+ offset = node->start;
if (start >= offset) {
iter = iter->rb_right;
best = node;
@@ -167,38 +166,17 @@ struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_m
/* verify that the node spans the requested area */
if (best) {
- offset = best->vm_node.start + best->vm_node.size;
+ offset = best->start + best->size;
if (offset < start + pages)
best = NULL;
}
- return best;
-}
-EXPORT_SYMBOL(drm_vma_offset_lookup_locked);
-
-/* internal helper to link @node into the rb-tree */
-static void _drm_vma_offset_add_rb(struct drm_vma_offset_manager *mgr,
- struct drm_vma_offset_node *node)
-{
- struct rb_node **iter = &mgr->vm_addr_space_rb.rb_node;
- struct rb_node *parent = NULL;
- struct drm_vma_offset_node *iter_node;
-
- while (likely(*iter)) {
- parent = *iter;
- iter_node = rb_entry(*iter, struct drm_vma_offset_node, vm_rb);
+ if (!best)
+ return NULL;
- if (node->vm_node.start < iter_node->vm_node.start)
- iter = &(*iter)->rb_left;
- else if (node->vm_node.start > iter_node->vm_node.start)
- iter = &(*iter)->rb_right;
- else
- BUG();
- }
-
- rb_link_node(&node->vm_rb, parent, iter);
- rb_insert_color(&node->vm_rb, &mgr->vm_addr_space_rb);
+ return container_of(best, struct drm_vma_offset_node, vm_node);
}
+EXPORT_SYMBOL(drm_vma_offset_lookup_locked);
/**
* drm_vma_offset_add() - Add offset node to manager
@@ -235,13 +213,10 @@ int drm_vma_offset_add(struct drm_vma_offset_manager *mgr,
goto out_unlock;
}
- ret = drm_mm_insert_node(&mgr->vm_addr_space_mm, &node->vm_node,
- pages, 0, DRM_MM_SEARCH_DEFAULT);
+ ret = drm_mm_insert_node(&mgr->vm_addr_space_mm, &node->vm_node, pages);
if (ret)
goto out_unlock;
- _drm_vma_offset_add_rb(mgr, node);
-
out_unlock:
write_unlock(&mgr->vm_lock);
return ret;
@@ -265,7 +240,6 @@ void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr,
write_lock(&mgr->vm_lock);
if (drm_mm_node_allocated(&node->vm_node)) {
- rb_erase(&node->vm_rb, &mgr->vm_addr_space_rb);
drm_mm_remove_node(&node->vm_node);
memset(&node->vm_node, 0, sizeof(node->vm_node));
}
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..2dff7b807b05 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -4,6 +4,8 @@ config DRM_I915
depends on X86 && PCI
select INTEL_GTT
select INTERVAL_TREE
+ select STOP_MACHINE
+ select ZLIB_DEFLATE
# we need shmfs for the swappable backing store, and in particular
# the shmem_readpage() which depends upon tmpfs
select SHMEM
@@ -18,6 +20,7 @@ config DRM_I915
select INPUT if ACPI
select ACPI_VIDEO if ACPI
select ACPI_BUTTON if ACPI
+ select SYNC_FILE
help
Choose this option if you have a system that has "Intel Graphics
Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index dda724f04445..0afe7b3cad07 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -3,12 +3,15 @@
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
+subdir-ccflags-y += \
+ $(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA)
# Please keep these build lists sorted!
# core driver code
i915-y := i915_drv.o \
i915_irq.o \
+ i915_memcpy.o \
i915_params.o \
i915_pci.o \
i915_suspend.o \
@@ -30,6 +33,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_execbuffer.o \
i915_gem_fence.o \
i915_gem_gtt.o \
+ i915_gem_internal.o \
i915_gem.o \
i915_gem_render_state.o \
i915_gem_request.o \
@@ -110,6 +114,9 @@ i915-y += intel_gvt.o
include $(src)/gvt/Makefile
endif
-obj-$(CONFIG_DRM_I915) += i915.o
+obj-$(CONFIG_DRM_I915) += i915.o
+ifdef CONFIG_DRM_I915
+obj-y += i915_memory.o
+endif
CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 1db829c8b912..c3a3a3fe3d27 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -86,24 +86,24 @@
* general bitmasking mechanism.
*/
-#define STD_MI_OPCODE_MASK 0xFF800000
-#define STD_3D_OPCODE_MASK 0xFFFF0000
-#define STD_2D_OPCODE_MASK 0xFFC00000
-#define STD_MFX_OPCODE_MASK 0xFFFF0000
+#define STD_MI_OPCODE_SHIFT (32 - 9)
+#define STD_3D_OPCODE_SHIFT (32 - 16)
+#define STD_2D_OPCODE_SHIFT (32 - 10)
+#define STD_MFX_OPCODE_SHIFT (32 - 16)
#define CMD(op, opm, f, lm, fl, ...) \
{ \
.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
- .cmd = { (op), (opm) }, \
+ .cmd = { (op), ~0u << (opm) }, \
.length = { (lm) }, \
__VA_ARGS__ \
}
/* Convenience macros to compress the tables */
-#define SMI STD_MI_OPCODE_MASK
-#define S3D STD_3D_OPCODE_MASK
-#define S2D STD_2D_OPCODE_MASK
-#define SMFX STD_MFX_OPCODE_MASK
+#define SMI STD_MI_OPCODE_SHIFT
+#define S3D STD_3D_OPCODE_SHIFT
+#define S2D STD_2D_OPCODE_SHIFT
+#define SMFX STD_MFX_OPCODE_SHIFT
#define F true
#define S CMD_DESC_SKIP
#define R CMD_DESC_REJECT
@@ -350,6 +350,9 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
};
+static const struct drm_i915_cmd_descriptor noop_desc =
+ CMD(MI_NOOP, SMI, F, 1, S);
+
#undef CMD
#undef SMI
#undef S3D
@@ -458,6 +461,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
REG32(GEN7_GPGPU_DISPATCHDIMX),
REG32(GEN7_GPGPU_DISPATCHDIMY),
REG32(GEN7_GPGPU_DISPATCHDIMZ),
+ REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0),
REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1),
REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2),
@@ -473,6 +477,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
REG32(GEN7_L3SQCREG1),
REG32(GEN7_L3CNTLREG2),
REG32(GEN7_L3CNTLREG3),
+ REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
@@ -502,7 +507,10 @@ static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
};
static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
+ REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
+ REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
REG32(BCS_SWCTRL),
+ REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
@@ -691,12 +699,26 @@ struct cmd_node {
* non-opcode bits being set. But if we don't include those bits, some 3D
* commands may hash to the same bucket due to not including opcode bits that
* make the command unique. For now, we will risk hashing to the same bucket.
- *
- * If we attempt to generate a perfect hash, we should be able to look at bits
- * 31:29 of a command from a batch buffer and use the full mask for that
- * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
*/
-#define CMD_HASH_MASK STD_MI_OPCODE_MASK
+static inline u32 cmd_header_key(u32 x)
+{
+ u32 shift;
+
+ switch (x >> INSTR_CLIENT_SHIFT) {
+ default:
+ case INSTR_MI_CLIENT:
+ shift = STD_MI_OPCODE_SHIFT;
+ break;
+ case INSTR_RC_CLIENT:
+ shift = STD_3D_OPCODE_SHIFT;
+ break;
+ case INSTR_BC_CLIENT:
+ shift = STD_2D_OPCODE_SHIFT;
+ break;
+ }
+
+ return x >> shift;
+}
static int init_hash_table(struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
@@ -720,7 +742,7 @@ static int init_hash_table(struct intel_engine_cs *engine,
desc_node->desc = desc;
hash_add(engine->cmd_hash, &desc_node->node,
- desc->cmd.value & CMD_HASH_MASK);
+ cmd_header_key(desc->cmd.value));
}
}
@@ -746,17 +768,15 @@ static void fini_hash_table(struct intel_engine_cs *engine)
* Optionally initializes fields related to batch buffer command parsing in the
* struct intel_engine_cs based on whether the platform requires software
* command parsing.
- *
- * Return: non-zero if initialization fails
*/
-int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
+void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
{
const struct drm_i915_cmd_table *cmd_tables;
int cmd_table_count;
int ret;
if (!IS_GEN7(engine->i915))
- return 0;
+ return;
switch (engine->id) {
case RCS:
@@ -811,24 +831,32 @@ int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
break;
default:
MISSING_CASE(engine->id);
- BUG();
+ return;
}
- BUG_ON(!validate_cmds_sorted(engine, cmd_tables, cmd_table_count));
- BUG_ON(!validate_regs_sorted(engine));
+ if (!hash_empty(engine->cmd_hash)) {
+ DRM_DEBUG_DRIVER("%s: no commands?\n", engine->name);
+ return;
+ }
- WARN_ON(!hash_empty(engine->cmd_hash));
+ if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) {
+ DRM_ERROR("%s: command descriptions are not sorted\n",
+ engine->name);
+ return;
+ }
+ if (!validate_regs_sorted(engine)) {
+ DRM_ERROR("%s: registers are not sorted\n", engine->name);
+ return;
+ }
ret = init_hash_table(engine, cmd_tables, cmd_table_count);
if (ret) {
- DRM_ERROR("CMD: cmd_parser_init failed!\n");
+ DRM_ERROR("%s: initialised failed!\n", engine->name);
fini_hash_table(engine);
- return ret;
+ return;
}
engine->needs_cmd_parser = true;
-
- return 0;
}
/**
@@ -853,12 +881,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
struct cmd_node *desc_node;
hash_for_each_possible(engine->cmd_hash, desc_node, node,
- cmd_header & CMD_HASH_MASK) {
+ cmd_header_key(cmd_header)) {
const struct drm_i915_cmd_descriptor *desc = desc_node->desc;
- u32 masked_cmd = desc->cmd.mask & cmd_header;
- u32 masked_value = desc->cmd.value & desc->cmd.mask;
-
- if (masked_cmd == masked_value)
+ if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
return desc;
}
@@ -876,11 +901,14 @@ find_cmd_in_table(struct intel_engine_cs *engine,
static const struct drm_i915_cmd_descriptor*
find_cmd(struct intel_engine_cs *engine,
u32 cmd_header,
+ const struct drm_i915_cmd_descriptor *desc,
struct drm_i915_cmd_descriptor *default_desc)
{
- const struct drm_i915_cmd_descriptor *desc;
u32 mask;
+ if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
+ return desc;
+
desc = find_cmd_in_table(engine, cmd_header);
if (desc)
return desc;
@@ -889,161 +917,122 @@ find_cmd(struct intel_engine_cs *engine,
if (!mask)
return NULL;
- BUG_ON(!default_desc);
- default_desc->flags = CMD_DESC_SKIP;
+ default_desc->cmd.value = cmd_header;
+ default_desc->cmd.mask = 0xffff0000;
default_desc->length.mask = mask;
-
+ default_desc->flags = CMD_DESC_SKIP;
return default_desc;
}
static const struct drm_i915_reg_descriptor *
-find_reg(const struct drm_i915_reg_descriptor *table,
- int count, u32 addr)
+__find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
{
- int i;
-
- for (i = 0; i < count; i++) {
- if (i915_mmio_reg_offset(table[i].addr) == addr)
- return &table[i];
+ int start = 0, end = count;
+ while (start < end) {
+ int mid = start + (end - start) / 2;
+ int ret = addr - i915_mmio_reg_offset(table[mid].addr);
+ if (ret < 0)
+ end = mid;
+ else if (ret > 0)
+ start = mid + 1;
+ else
+ return &table[mid];
}
-
return NULL;
}
static const struct drm_i915_reg_descriptor *
-find_reg_in_tables(const struct drm_i915_reg_table *tables,
- int count, bool is_master, u32 addr)
+find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
{
- int i;
- const struct drm_i915_reg_table *table;
- const struct drm_i915_reg_descriptor *reg;
+ const struct drm_i915_reg_table *table = engine->reg_tables;
+ int count = engine->reg_table_count;
- for (i = 0; i < count; i++) {
- table = &tables[i];
+ do {
if (!table->master || is_master) {
- reg = find_reg(table->regs, table->num_regs,
- addr);
+ const struct drm_i915_reg_descriptor *reg;
+
+ reg = __find_reg(table->regs, table->num_regs, addr);
if (reg != NULL)
return reg;
}
- }
+ } while (table++, --count);
return NULL;
}
-static u32 *vmap_batch(struct drm_i915_gem_object *obj,
- unsigned start, unsigned len)
-{
- int i;
- void *addr = NULL;
- struct sg_page_iter sg_iter;
- int first_page = start >> PAGE_SHIFT;
- int last_page = (len + start + 4095) >> PAGE_SHIFT;
- int npages = last_page - first_page;
- struct page **pages;
-
- pages = drm_malloc_ab(npages, sizeof(*pages));
- if (pages == NULL) {
- DRM_DEBUG_DRIVER("Failed to get space for pages\n");
- goto finish;
- }
-
- i = 0;
- for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
- pages[i++] = sg_page_iter_page(&sg_iter);
- if (i == npages)
- break;
- }
-
- addr = vmap(pages, i, 0, PAGE_KERNEL);
- if (addr == NULL) {
- DRM_DEBUG_DRIVER("Failed to vmap pages\n");
- goto finish;
- }
-
-finish:
- if (pages)
- drm_free_large(pages);
- return (u32*)addr;
-}
-
-/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
-static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
+/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
+static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
struct drm_i915_gem_object *src_obj,
u32 batch_start_offset,
- u32 batch_len)
+ u32 batch_len,
+ bool *needs_clflush_after)
{
- int needs_clflush = 0;
- void *src_base, *src;
- void *dst = NULL;
+ unsigned int src_needs_clflush;
+ unsigned int dst_needs_clflush;
+ void *dst;
int ret;
- if (batch_len > dest_obj->base.size ||
- batch_len + batch_start_offset > src_obj->base.size)
- return ERR_PTR(-E2BIG);
-
- if (WARN_ON(dest_obj->pages_pin_count == 0))
- return ERR_PTR(-ENODEV);
-
- ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
- if (ret) {
- DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
+ ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
+ if (ret)
return ERR_PTR(ret);
- }
-
- src_base = vmap_batch(src_obj, batch_start_offset, batch_len);
- if (!src_base) {
- DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
- ret = -ENOMEM;
- goto unpin_src;
- }
- ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
+ ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
if (ret) {
- DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
- goto unmap_src;
+ dst = ERR_PTR(ret);
+ goto unpin_src;
}
- dst = vmap_batch(dest_obj, 0, batch_len);
- if (!dst) {
- DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
- ret = -ENOMEM;
- goto unmap_src;
+ dst = i915_gem_object_pin_map(dst_obj, false);
+ if (IS_ERR(dst))
+ goto unpin_dst;
+
+ if (src_needs_clflush &&
+ i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) {
+ void *src;
+
+ src = i915_gem_object_pin_map(src_obj, true);
+ if (IS_ERR(src))
+ goto shmem_copy;
+
+ i915_memcpy_from_wc(dst,
+ src + batch_start_offset,
+ ALIGN(batch_len, 16));
+ i915_gem_object_unpin_map(src_obj);
+ } else {
+ void *ptr;
+ int offset, n;
+
+shmem_copy:
+ offset = offset_in_page(batch_start_offset);
+ if (dst_needs_clflush & CLFLUSH_BEFORE)
+ batch_len = roundup(batch_len,
+ boot_cpu_data.x86_clflush_size);
+
+ ptr = dst;
+ for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
+ int len = min_t(int, batch_len, PAGE_SIZE - offset);
+ void *vaddr;
+
+ vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+ if (src_needs_clflush)
+ drm_clflush_virt_range(vaddr + offset, len);
+ memcpy(ptr, vaddr + offset, len);
+ kunmap_atomic(vaddr);
+
+ ptr += len;
+ batch_len -= len;
+ offset = 0;
+ }
}
- src = src_base + offset_in_page(batch_start_offset);
- if (needs_clflush)
- drm_clflush_virt_range(src, batch_len);
-
- memcpy(dst, src, batch_len);
+ /* dst_obj is returned with vmap pinned */
+ *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
-unmap_src:
- vunmap(src_base);
+unpin_dst:
+ i915_gem_object_unpin_pages(dst_obj);
unpin_src:
i915_gem_object_unpin_pages(src_obj);
-
- return ret ? ERR_PTR(ret) : dst;
-}
-
-/**
- * intel_engine_needs_cmd_parser() - should a given engine use software
- * command parsing?
- * @engine: the engine in question
- *
- * Only certain platforms require software batch buffer command parsing, and
- * only when enabled via module parameter.
- *
- * Return: true if the engine requires software command parsing
- */
-bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
-{
- if (!engine->needs_cmd_parser)
- return false;
-
- if (!USES_PPGTT(engine->i915))
- return false;
-
- return (i915.enable_cmd_parser == 1);
+ return dst;
}
static bool check_cmd(const struct intel_engine_cs *engine,
@@ -1052,6 +1041,9 @@ static bool check_cmd(const struct intel_engine_cs *engine,
const bool is_master,
bool *oacontrol_set)
{
+ if (desc->flags & CMD_DESC_SKIP)
+ return true;
+
if (desc->flags & CMD_DESC_REJECT) {
DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
return false;
@@ -1076,10 +1068,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
offset += step) {
const u32 reg_addr = cmd[offset] & desc->reg.mask;
const struct drm_i915_reg_descriptor *reg =
- find_reg_in_tables(engine->reg_tables,
- engine->reg_table_count,
- is_master,
- reg_addr);
+ find_reg(engine, is_master, reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
@@ -1200,16 +1189,19 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
u32 batch_len,
bool is_master)
{
- u32 *cmd, *batch_base, *batch_end;
- struct drm_i915_cmd_descriptor default_desc = { 0 };
+ u32 *cmd, *batch_end;
+ struct drm_i915_cmd_descriptor default_desc = noop_desc;
+ const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
+ bool needs_clflush_after = false;
int ret = 0;
- batch_base = copy_batch(shadow_batch_obj, batch_obj,
- batch_start_offset, batch_len);
- if (IS_ERR(batch_base)) {
+ cmd = copy_batch(shadow_batch_obj, batch_obj,
+ batch_start_offset, batch_len,
+ &needs_clflush_after);
+ if (IS_ERR(cmd)) {
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
- return PTR_ERR(batch_base);
+ return PTR_ERR(cmd);
}
/*
@@ -1217,17 +1209,14 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
* large or larger and copy_batch() will write MI_NOPs to the extra
* space. Parsing should be faster in some cases this way.
*/
- batch_end = batch_base + (batch_len / sizeof(*batch_end));
-
- cmd = batch_base;
+ batch_end = cmd + (batch_len / sizeof(*batch_end));
while (cmd < batch_end) {
- const struct drm_i915_cmd_descriptor *desc;
u32 length;
if (*cmd == MI_BATCH_BUFFER_END)
break;
- desc = find_cmd(engine, *cmd, &default_desc);
+ desc = find_cmd(engine, *cmd, desc, &default_desc);
if (!desc) {
DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
*cmd);
@@ -1278,7 +1267,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
ret = -EINVAL;
}
- vunmap(batch_base);
+ if (ret == 0 && needs_clflush_after)
+ drm_clflush_virt_range(shadow_batch_obj->mm.mapping, batch_len);
+ i915_gem_object_unpin_map(shadow_batch_obj);
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9bd41581b592..34f44050af25 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -111,25 +111,12 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
static char get_global_flag(struct drm_i915_gem_object *obj)
{
- return i915_gem_obj_to_ggtt(obj) ? 'g' : ' ';
+ return i915_gem_object_to_ggtt(obj, NULL) ? 'g' : ' ';
}
static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
{
- return obj->mapping ? 'M' : ' ';
-}
-
-static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
-{
- u64 size = 0;
- struct i915_vma *vma;
-
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
- if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node))
- size += vma->node.size;
- }
-
- return size;
+ return obj->mm.mapping ? 'M' : ' ';
}
static void
@@ -158,14 +145,12 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
seq_printf(m, "%x ",
i915_gem_active_get_seqno(&obj->last_read[id],
&obj->base.dev->struct_mutex));
- seq_printf(m, "] %x %x%s%s%s",
+ seq_printf(m, "] %x %s%s%s",
i915_gem_active_get_seqno(&obj->last_write,
&obj->base.dev->struct_mutex),
- i915_gem_active_get_seqno(&obj->last_fence,
- &obj->base.dev->struct_mutex),
i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
- obj->dirty ? " dirty" : "",
- obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
+ i915_gem_object_is_dirty(obj) ? " dirty" : "",
+ obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
if (obj->base.name)
seq_printf(m, " (name: %d)", obj->base.name);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
@@ -175,8 +160,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
seq_printf(m, " (pinned x %d)", pin_count);
if (obj->pin_display)
seq_printf(m, " (display)");
- if (obj->fence_reg != I915_FENCE_REG_NONE)
- seq_printf(m, " (fence: %d)", obj->fence_reg);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
@@ -186,6 +169,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
vma->node.start, vma->node.size);
if (i915_vma_is_ggtt(vma))
seq_printf(m, ", type: %u", vma->ggtt_view.type);
+ if (vma->fence)
+ seq_printf(m, " , fence: %d%s",
+ vma->fence->id,
+ i915_gem_active_isset(&vma->last_fence) ? "*" : "");
seq_puts(m, ")");
}
if (obj->stolen)
@@ -261,9 +248,9 @@ static int obj_rank_by_stolen(void *priv,
struct list_head *A, struct list_head *B)
{
struct drm_i915_gem_object *a =
- container_of(A, struct drm_i915_gem_object, obj_exec_link);
+ container_of(A, struct drm_i915_gem_object, tmp_link);
struct drm_i915_gem_object *b =
- container_of(B, struct drm_i915_gem_object, obj_exec_link);
+ container_of(B, struct drm_i915_gem_object, tmp_link);
if (a->stolen->start < b->stolen->start)
return -1;
@@ -278,7 +265,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *obj;
- u64 total_obj_size, total_gtt_size;
+ u64 total_obj_size;
LIST_HEAD(stolen);
int count, ret;
@@ -286,22 +273,21 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
if (ret)
return ret;
- total_obj_size = total_gtt_size = count = 0;
+ total_obj_size = count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (obj->stolen == NULL)
continue;
- list_add(&obj->obj_exec_link, &stolen);
+ list_add(&obj->tmp_link, &stolen);
total_obj_size += obj->base.size;
- total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
count++;
}
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
if (obj->stolen == NULL)
continue;
- list_add(&obj->obj_exec_link, &stolen);
+ list_add(&obj->tmp_link, &stolen);
total_obj_size += obj->base.size;
count++;
@@ -309,30 +295,19 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
list_sort(NULL, &stolen, obj_rank_by_stolen);
seq_puts(m, "Stolen:\n");
while (!list_empty(&stolen)) {
- obj = list_first_entry(&stolen, typeof(*obj), obj_exec_link);
+ obj = list_first_entry(&stolen, typeof(*obj), tmp_link);
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
- list_del_init(&obj->obj_exec_link);
+ list_del(&obj->tmp_link);
}
mutex_unlock(&dev->struct_mutex);
- seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
- count, total_obj_size, total_gtt_size);
+ seq_printf(m, "Total %d objects, %llu bytes\n",
+ count, total_obj_size);
return 0;
}
-#define count_objects(list, member) do { \
- list_for_each_entry(obj, list, member) { \
- size += i915_gem_obj_total_ggtt_size(obj); \
- ++count; \
- if (obj->map_and_fenceable) { \
- mappable_size += i915_gem_obj_ggtt_size(obj); \
- ++mappable_count; \
- } \
- } \
-} while (0)
-
struct file_stats {
struct drm_i915_file_private *file_priv;
unsigned long count;
@@ -418,9 +393,9 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) {
if (ctx->engine[n].state)
- per_file_stats(0, ctx->engine[n].state, data);
+ per_file_stats(0, ctx->engine[n].state->obj, data);
if (ctx->engine[n].ring)
- per_file_stats(0, ctx->engine[n].ring->obj, data);
+ per_file_stats(0, ctx->engine[n].ring->vma->obj, data);
}
return 0;
@@ -449,10 +424,10 @@ static void print_context_stats(struct seq_file *m,
#define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
- size += i915_gem_obj_total_ggtt_size(vma->obj); \
+ size += vma->size; \
++count; \
- if (vma->obj->map_and_fenceable) { \
- mappable_size += i915_gem_obj_ggtt_size(vma->obj); \
+ if (i915_vma_is_map_and_fenceable(vma)) { \
+ mappable_size += vma->size; \
++mappable_count; \
} \
} \
@@ -482,7 +457,25 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
dev_priv->mm.object_memory);
size = count = mappable_size = mappable_count = 0;
- count_objects(&dev_priv->mm.bound_list, global_list);
+ list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+ bool allocated = false, mappable = false;
+
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ if (!i915_vma_is_ggtt(vma))
+ continue;
+
+ allocated = true;
+ size += vma->node.size;
+
+ if (i915_vma_is_map_and_fenceable(vma)) {
+ mappable = true;
+ mappable_size += vma->node.size;
+ }
+ }
+
+ count += allocated;
+ mappable_count += mappable;
+ }
seq_printf(m, "%u [%u] objects, %llu [%llu] bytes in gtt\n",
count, mappable_count, size, mappable_size);
@@ -499,12 +492,12 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
size = count = purgeable_size = purgeable_count = 0;
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
size += obj->base.size, ++count;
- if (obj->madv == I915_MADV_DONTNEED)
+ if (obj->mm.madv == I915_MADV_DONTNEED)
purgeable_size += obj->base.size, ++purgeable_count;
- if (obj->mapping) {
+ if (obj->mm.mapping) {
pin_mapped_count++;
pin_mapped_size += obj->base.size;
- if (obj->pages_pin_count == 0) {
+ if (atomic_read(&obj->mm.pages_pin_count) == 0) {
pin_mapped_purgeable_count++;
pin_mapped_purgeable_size += obj->base.size;
}
@@ -515,21 +508,21 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
size = count = mappable_size = mappable_count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (obj->fault_mappable) {
- size += i915_gem_obj_ggtt_size(obj);
+ size += obj->base.size;
++count;
}
if (obj->pin_display) {
- mappable_size += i915_gem_obj_ggtt_size(obj);
+ mappable_size += obj->base.size;
++mappable_count;
}
- if (obj->madv == I915_MADV_DONTNEED) {
+ if (obj->mm.madv == I915_MADV_DONTNEED) {
purgeable_size += obj->base.size;
++purgeable_count;
}
- if (obj->mapping) {
+ if (obj->mm.mapping) {
pin_mapped_count++;
pin_mapped_size += obj->base.size;
- if (obj->pages_pin_count == 0) {
+ if (atomic_read(&obj->mm.pages_pin_count) == 0) {
pin_mapped_purgeable_count++;
pin_mapped_purgeable_size += obj->base.size;
}
@@ -557,6 +550,8 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
print_context_stats(m, dev_priv);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct file_stats stats;
+ struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct drm_i915_gem_request *request;
struct task_struct *task;
memset(&stats, 0, sizeof(stats));
@@ -570,10 +565,17 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
* still alive (e.g. get_pid(current) => fork() => exit()).
* Therefore, we need to protect this ->comm access using RCU.
*/
+ mutex_lock(&dev->struct_mutex);
+ request = list_first_entry_or_null(&file_priv->mm.request_list,
+ struct drm_i915_gem_request,
+ client_link);
rcu_read_lock();
- task = pid_task(file->pid, PIDTYPE_PID);
+ task = pid_task(request && request->ctx->pid ?
+ request->ctx->pid : file->pid,
+ PIDTYPE_PID);
print_file_stats(m, task ? task->comm : "<unknown>", stats);
rcu_read_unlock();
+ mutex_unlock(&dev->struct_mutex);
}
mutex_unlock(&dev->filelist_mutex);
@@ -587,30 +589,29 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
uintptr_t list = (uintptr_t) node->info_ent->data;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *obj;
- u64 total_obj_size, total_gtt_size;
+ u64 total_obj_size;
int count, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
- total_obj_size = total_gtt_size = count = 0;
+ total_obj_size = count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj))
+ if (list == PINNED_LIST && !obj->pin_display)
continue;
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
total_obj_size += obj->base.size;
- total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
count++;
}
mutex_unlock(&dev->struct_mutex);
- seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
- count, total_obj_size, total_gtt_size);
+ seq_printf(m, "Total %d objects, %llu bytes\n",
+ count, total_obj_size);
return 0;
}
@@ -755,12 +756,11 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
seq_printf(m, "%s requests: %d\n", engine->name, count);
list_for_each_entry(req, &engine->request_list, link) {
+ struct pid *pid = req->ctx->pid;
struct task_struct *task;
rcu_read_lock();
- task = NULL;
- if (req->pid)
- task = pid_task(req->pid, PIDTYPE_PID);
+ task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
seq_printf(m, " %x @ %d: %s [%d]\n",
req->fence.seqno,
(int) (jiffies - req->emitted_jiffies),
@@ -787,8 +787,6 @@ static void i915_ring_seqno_info(struct seq_file *m,
seq_printf(m, "Current sequence (%s): %x\n",
engine->name, intel_engine_get_seqno(engine));
- seq_printf(m, "Current user interrupts (%s): %lx\n",
- engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups));
spin_lock(&b->lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -806,18 +804,13 @@ static int i915_gem_seqno_info(struct seq_file *m, void *data)
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
- int ret;
- ret = mutex_lock_interruptible(&dev->struct_mutex);
- if (ret)
- return ret;
intel_runtime_pm_get(dev_priv);
for_each_engine(engine, dev_priv)
i915_ring_seqno_info(m, engine);
intel_runtime_pm_put(dev_priv);
- mutex_unlock(&dev->struct_mutex);
return 0;
}
@@ -1027,14 +1020,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
for (i = 0; i < dev_priv->num_fence_regs; i++) {
- struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj;
+ struct i915_vma *vma = dev_priv->fence_regs[i].vma;
seq_printf(m, "Fence %d, pin count = %d, object = ",
i, dev_priv->fence_regs[i].pin_count);
- if (obj == NULL)
+ if (!vma)
seq_puts(m, "unused");
else
- describe_obj(m, obj);
+ describe_obj(m, vma->obj);
seq_putc(m, '\n');
}
@@ -1434,11 +1427,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
engine->hangcheck.seqno,
seqno[id],
engine->last_submitted_seqno);
- seq_printf(m, "\twaiters? %d\n",
- intel_engine_has_waiter(engine));
- seq_printf(m, "\tuser interrupts = %lx [current %lx]\n",
- engine->hangcheck.user_interrupts,
- READ_ONCE(engine->breadcrumbs.irq_wakeups));
+ seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
+ yesno(intel_engine_has_waiter(engine)),
+ yesno(test_bit(engine->id,
+ &dev_priv->gpu_error.missed_irq_rings)));
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
@@ -1471,11 +1463,7 @@ static int ironlake_drpc_info(struct seq_file *m)
struct drm_i915_private *dev_priv = to_i915(dev);
u32 rgvmodectl, rstdbyctl;
u16 crstandvid;
- int ret;
- ret = mutex_lock_interruptible(&dev->struct_mutex);
- if (ret)
- return ret;
intel_runtime_pm_get(dev_priv);
rgvmodectl = I915_READ(MEMMODECTL);
@@ -1483,7 +1471,6 @@ static int ironlake_drpc_info(struct seq_file *m)
crstandvid = I915_READ16(CRSTANDVID);
intel_runtime_pm_put(dev_priv);
- mutex_unlock(&dev->struct_mutex);
seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
seq_printf(m, "Boost freq: %d\n",
@@ -2052,18 +2039,17 @@ static int i915_context_status(struct seq_file *m, void *unused)
list_for_each_entry(ctx, &dev_priv->context_list, link) {
seq_printf(m, "HW context %u ", ctx->hw_id);
- if (IS_ERR(ctx->file_priv)) {
- seq_puts(m, "(deleted) ");
- } else if (ctx->file_priv) {
- struct pid *pid = ctx->file_priv->file->pid;
+ if (ctx->pid) {
struct task_struct *task;
- task = get_pid_task(pid, PIDTYPE_PID);
+ task = get_pid_task(ctx->pid, PIDTYPE_PID);
if (task) {
seq_printf(m, "(%s [%d]) ",
task->comm, task->pid);
put_task_struct(task);
}
+ } else if (IS_ERR(ctx->file_priv)) {
+ seq_puts(m, "(deleted) ");
} else {
seq_puts(m, "(kernel) ");
}
@@ -2077,12 +2063,17 @@ static int i915_context_status(struct seq_file *m, void *unused)
seq_printf(m, "%s: ", engine->name);
seq_putc(m, ce->initialised ? 'I' : 'i');
if (ce->state)
- describe_obj(m, ce->state);
+ describe_obj(m, ce->state->obj);
if (ce->ring)
describe_ctx_ring(m, ce->ring);
seq_putc(m, '\n');
}
+ seq_printf(m, "\tvma hashtable size=%u (actual %u), count=%u\n",
+ ctx->vma.ht_size,
+ 1 << ctx->vma.ht_bits,
+ ctx->vma.ht_count);
+
seq_putc(m, '\n');
}
@@ -2095,43 +2086,42 @@ static void i915_dump_lrc_obj(struct seq_file *m,
struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
- struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
+ struct i915_vma *vma = ctx->engine[engine->id].state;
struct page *page;
- uint32_t *reg_state;
int j;
- unsigned long ggtt_offset = 0;
seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id);
-
- if (ctx_obj == NULL) {
- seq_puts(m, "\tNot allocated\n");
+ if (!vma) {
+ seq_puts(m, "\tNot bound in GGTT\n");
return;
+ } else {
+ seq_printf(m, "\tBound in GGTT at %x\n",
+ lower_32_bits(vma->node.start));
}
- if (!i915_gem_obj_ggtt_bound(ctx_obj))
- seq_puts(m, "\tNot bound in GGTT\n");
- else
- ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj);
-
- if (i915_gem_object_get_pages(ctx_obj)) {
- seq_puts(m, "\tFailed to get pages for context object\n");
+ if (i915_gem_object_pin_pages(vma->obj)) {
+ seq_puts(m, "\tFailed to get pages for context object\n\n");
return;
}
- page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
- if (!WARN_ON(page == NULL)) {
- reg_state = kmap_atomic(page);
+ page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN);
+ if (page) {
+ u32 *reg_state = kmap_atomic(page);
for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
- seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n",
- ggtt_offset + 4096 + (j * 4),
- reg_state[j], reg_state[j + 1],
- reg_state[j + 2], reg_state[j + 3]);
+ seq_printf(m,
+ "\t[0x%08x] 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ j * 4,
+ reg_state[j],
+ reg_state[j + 1],
+ reg_state[j + 2],
+ reg_state[j + 3]);
}
kunmap_atomic(reg_state);
}
seq_putc(m, '\n');
+ i915_gem_object_unpin_pages(vma->obj);
}
static int i915_dump_lrc(struct seq_file *m, void *unused)
@@ -2200,7 +2190,7 @@ static int i915_execlists(struct seq_file *m, void *data)
status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
- read_pointer = engine->next_context_status_buffer;
+ read_pointer = GEN8_CSB_READ_PTR(status_pointer);
write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
if (read_pointer > write_pointer)
write_pointer += GEN8_CSB_ENTRIES;
@@ -2269,11 +2259,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
- int ret;
- ret = mutex_lock_interruptible(&dev->struct_mutex);
- if (ret)
- return ret;
intel_runtime_pm_get(dev_priv);
seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
@@ -2313,7 +2299,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
seq_puts(m, "L-shaped memory detected\n");
intel_runtime_pm_put(dev_priv);
- mutex_unlock(&dev->struct_mutex);
return 0;
}
@@ -2625,15 +2610,15 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
- struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj;
- u32 *log;
+ struct drm_i915_gem_object *obj;
int i = 0, pg;
- if (!log_obj)
+ if (!dev_priv->guc.log)
return 0;
- for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) {
- log = kmap_atomic(i915_gem_object_get_page(log_obj, pg));
+ obj = dev_priv->guc.log->obj;
+ for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
+ u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
@@ -3246,7 +3231,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
struct page *page;
uint64_t *seqno;
- page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0);
+ page = i915_gem_object_get_page(dev_priv->semaphore_vma->obj, 0);
seqno = (uint64_t *)kmap_atomic(page);
for_each_engine_id(engine, dev_priv, id) {
@@ -4818,13 +4803,9 @@ i915_wedged_set(void *data, u64 val)
if (i915_reset_in_progress(&dev_priv->gpu_error))
return -EAGAIN;
- intel_runtime_pm_get(dev_priv);
-
i915_handle_error(dev_priv, val,
"Manually setting wedged to %llu", val);
- intel_runtime_pm_put(dev_priv);
-
return 0;
}
@@ -4895,10 +4876,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
#define DROP_BOUND 0x2
#define DROP_RETIRE 0x4
#define DROP_ACTIVE 0x8
-#define DROP_ALL (DROP_UNBOUND | \
- DROP_BOUND | \
- DROP_RETIRE | \
- DROP_ACTIVE)
+#define DROP_FREED 0x10
+#define DROP_ALL (DROP_UNBOUND | \
+ DROP_BOUND | \
+ DROP_RETIRE | \
+ DROP_ACTIVE | \
+ DROP_FREED)
static int
i915_drop_caches_get(void *data, u64 *val)
{
@@ -4940,6 +4923,9 @@ i915_drop_caches_set(void *data, u64 val)
unlock:
mutex_unlock(&dev->struct_mutex);
+ if (val & DROP_FREED)
+ flush_work(&dev_priv->mm.free_work);
+
return ret;
}
@@ -5065,20 +5051,15 @@ i915_cache_sharing_get(void *data, u64 *val)
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 snpcr;
- int ret;
if (!(IS_GEN6(dev) || IS_GEN7(dev)))
return -ENODEV;
- ret = mutex_lock_interruptible(&dev->struct_mutex);
- if (ret)
- return ret;
intel_runtime_pm_get(dev_priv);
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
intel_runtime_pm_put(dev_priv);
- mutex_unlock(&dev_priv->drm.struct_mutex);
*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 8cfc264ec9f6..1476569b10dd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -354,6 +354,15 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_MIN_EU_IN_POOL:
value = INTEL_INFO(dev)->min_eu_in_pool;
break;
+ case I915_PARAM_HAS_EXEC_BATCH_FIRST:
+ value = 1;
+ break;
+ case I915_PARAM_HAS_EXEC_ASYNC:
+ value = 1;
+ break;
+ case I915_PARAM_HAS_EXEC_FENCE:
+ value = 1;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
@@ -577,8 +586,11 @@ static void i915_gem_fini(struct drm_device *dev)
i915_gem_reset(dev);
i915_gem_cleanup_engines(dev);
i915_gem_context_fini(dev);
+ i915_gem_cleanup_userptr(dev_priv);
mutex_unlock(&dev->struct_mutex);
+ flush_workqueue(dev_priv->wq);
+
WARN_ON(!list_empty(&to_i915(dev)->context_list));
}
@@ -827,6 +839,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
mutex_init(&dev_priv->wm.wm_mutex);
mutex_init(&dev_priv->pps_mutex);
+ i915_memcpy_init_early(dev_priv);
+
ret = i915_workqueues_init(dev_priv);
if (ret < 0)
return ret;
@@ -2289,24 +2303,6 @@ static int intel_runtime_suspend(struct device *device)
DRM_DEBUG_KMS("Suspending device\n");
- /*
- * We could deadlock here in case another thread holding struct_mutex
- * calls RPM suspend concurrently, since the RPM suspend will wait
- * first for this RPM suspend to finish. In this case the concurrent
- * RPM resume will be followed by its RPM suspend counterpart. Still
- * for consistency return -EAGAIN, which will reschedule this suspend.
- */
- if (!mutex_trylock(&dev->struct_mutex)) {
- DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
- /*
- * Bump the expiration timestamp, otherwise the suspend won't
- * be rescheduled.
- */
- pm_runtime_mark_last_busy(device);
-
- return -EAGAIN;
- }
-
disable_rpm_wakeref_asserts(dev_priv);
/*
@@ -2314,7 +2310,6 @@ static int intel_runtime_suspend(struct device *device)
* an RPM reference.
*/
i915_gem_release_all_mmaps(dev_priv);
- mutex_unlock(&dev->struct_mutex);
intel_guc_suspend(dev);
@@ -2530,7 +2525,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
- DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
@@ -2579,7 +2574,7 @@ static struct drm_driver driver = {
.set_busid = drm_pci_set_busid,
.gem_close_object = i915_gem_close_object,
- .gem_free_object = i915_gem_free_object,
+ .gem_free_object_unlocked = i915_gem_free_object,
.gem_vm_ops = &i915_gem_vm_ops,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index feec00f769e1..568ff1b35f8b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -37,10 +37,11 @@
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
#include <linux/backlight.h>
-#include <linux/hashtable.h>
+#include <linux/hash.h>
#include <linux/intel-iommu.h>
#include <linux/kref.h>
#include <linux/pm_qos.h>
+#include <linux/rhashtable.h>
#include <linux/shmem_fs.h>
#include <drm/drmP.h>
@@ -455,15 +456,21 @@ struct intel_opregion {
struct intel_overlay;
struct intel_overlay_error_state;
-#define I915_FENCE_REG_NONE -1
-#define I915_MAX_NUM_FENCES 32
-/* 32 fences + sign bit for FENCE_REG_NONE */
-#define I915_MAX_NUM_FENCE_BITS 6
-
struct drm_i915_fence_reg {
struct list_head lru_list;
- struct drm_i915_gem_object *obj;
+ struct drm_i915_private *i915;
+ struct i915_vma *vma;
int pin_count;
+ int id;
+ /**
+ * Whether the tiling parameters for the currently
+ * associated fence register have changed. Note that
+ * for the purposes of tracking tiling changes we also
+ * treat the unfenced register, the register slot that
+ * the object occupies whilst it executes a fenced
+ * command (such as BLT on gen2/3), as a "fence".
+ */
+ bool dirty;
};
struct sdvo_device_mapping {
@@ -481,6 +488,8 @@ struct drm_i915_error_state {
struct kref ref;
struct timeval time;
+ struct drm_i915_private *i915;
+
char error_msg[128];
bool simulated;
int iommu;
@@ -517,6 +526,7 @@ struct drm_i915_error_state {
int num_waiters;
int hangcheck_score;
enum intel_engine_hangcheck_action hangcheck_action;
+ struct i915_address_space *vm;
int num_requests;
/* our own tracking of ring head and tail */
@@ -531,6 +541,7 @@ struct drm_i915_error_state {
u32 tail;
u32 head;
u32 ctl;
+ u32 mode;
u32 hws;
u32 ipeir;
u32 ipehr;
@@ -547,8 +558,10 @@ struct drm_i915_error_state {
u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
struct drm_i915_error_object {
- int page_count;
u64 gtt_offset;
+ u64 gtt_size;
+ int page_count;
+ int unused;
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
@@ -556,7 +569,9 @@ struct drm_i915_error_state {
struct drm_i915_error_request {
long jiffies;
+ pid_t pid;
u32 seqno;
+ u32 head;
u32 tail;
} *requests;
@@ -586,17 +601,15 @@ struct drm_i915_error_state {
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
- s32 pinned:2;
u32 tiling:2;
u32 dirty:1;
u32 purgeable:1;
u32 userptr:1;
s32 engine:4;
u32 cache_level:3;
- } **active_bo, **pinned_bo;
-
- u32 *active_bo_count, *pinned_bo_count;
- u32 vm_count;
+ } *active_bo[I915_NUM_ENGINES], *pinned_bo;
+ u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
+ struct i915_address_space *active_vm[I915_NUM_ENGINES];
};
struct intel_connector;
@@ -875,9 +888,13 @@ struct i915_ctx_hang_stats {
*/
struct i915_gem_context {
struct kref ref;
+
struct drm_i915_private *i915;
struct drm_i915_file_private *file_priv;
+ struct list_head link;
+
struct i915_hw_ppgtt *ppgtt;
+ struct pid *pid;
struct i915_ctx_hang_stats hang_stats;
@@ -888,12 +905,9 @@ struct i915_gem_context {
unsigned hw_id;
u32 user_handle;
- u32 ggtt_alignment;
-
struct intel_context {
- struct drm_i915_gem_object *state;
+ struct i915_vma *state;
struct intel_ring *ring;
- struct i915_vma *lrc_vma;
uint32_t *lrc_reg_state;
u64 lrc_desc;
int pin_count;
@@ -904,7 +918,13 @@ struct i915_gem_context {
struct atomic_notifier_head status_notifier;
bool execlists_force_single_submission;
- struct list_head link;
+ struct {
+ struct work_struct resize;
+ struct hlist_head *ht;
+ unsigned int ht_bits;
+ unsigned int ht_size;
+ unsigned int ht_count;
+ } vma;
u8 remap_slice;
bool closed:1;
@@ -1304,11 +1324,17 @@ struct i915_gem_mm {
struct list_head bound_list;
/**
* List of objects which are not bound to the GTT (thus
- * are idle and not used by the GPU) but still have
- * (presumably uncached) pages still attached.
+ * are idle and not used by the GPU). These objects may or may
+ * not actually have any pages attached.
*/
struct list_head unbound_list;
+ /**
+ * List of objects which are pending destruction.
+ */
+ struct llist_head free_list;
+ struct work_struct free_work;
+
/** Usable portion of the GTT for GEM */
unsigned long stolen_base; /* limited to low memory (32-bit) */
@@ -1323,6 +1349,13 @@ struct i915_gem_mm {
struct list_head fence_list;
/**
+ * Workqueue to fault in userptr pages, flushed by the execbuf
+ * when required but otherwise left to userspace to try again
+ * on EAGAIN.
+ */
+ struct workqueue_struct *userptr_wq;
+
+ /**
* Are we in a non-interruptible section of code like
* modesetting?
*/
@@ -1632,7 +1665,6 @@ struct skl_wm_level {
*/
struct i915_runtime_pm {
atomic_t wakeref_count;
- atomic_t atomic_seq;
bool suspended;
bool irqs_enabled;
};
@@ -1754,7 +1786,7 @@ struct drm_i915_private {
struct pci_dev *bridge_dev;
struct i915_gem_context *kernel_context;
struct intel_engine_cs engine[I915_NUM_ENGINES];
- struct drm_i915_gem_object *semaphore_obj;
+ struct i915_vma *semaphore_vma;
u32 next_seqno;
struct drm_dma_handle *status_page_dmah;
@@ -1780,6 +1812,7 @@ struct drm_i915_private {
u32 de_irq_mask[I915_MAX_PIPES];
};
u32 gt_irq_mask;
+ u32 sde_irq_mask;
u32 pm_irq_mask;
u32 pm_rps_events;
u32 pipestat_irq_mask[I915_MAX_PIPES];
@@ -2108,8 +2141,8 @@ struct drm_i915_gem_object_ops {
* being released or under memory pressure (where we attempt to
* reap pages for the shrinker).
*/
- int (*get_pages)(struct drm_i915_gem_object *);
- void (*put_pages)(struct drm_i915_gem_object *);
+ struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
+ void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
int (*dmabuf_export)(struct drm_i915_gem_object *);
void (*release)(struct drm_i915_gem_object *);
@@ -2143,15 +2176,16 @@ struct drm_i915_gem_object {
/** List of VMAs backed by this object */
struct list_head vma_list;
+ struct i915_vma *vma_hashed;
+ struct rhashtable *vma_ht;
/** Stolen memory for this object, instead of being backed by shmem. */
struct drm_mm_node *stolen;
struct list_head global_list;
-
- /** Used in execbuf to temporarily hold a ref */
- struct list_head obj_exec_link;
+ struct llist_node free_link;
struct list_head batch_pool_link;
+ struct list_head tmp_link;
unsigned long flags;
/**
@@ -2165,37 +2199,18 @@ struct drm_i915_gem_object {
((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
/**
- * This is set if the object has been written to since last bound
- * to the GTT
+ * Have we taken a reference for the object for incomplete GPU
+ * activity?
*/
- unsigned int dirty:1;
+#define I915_BO_ACTIVE_REF_SHIFT (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
+#define I915_BO_ACTIVE_REF_BIT BIT(I915_BO_ACTIVE_REF_SHIFT)
/**
- * Fence register bits (if any) for this object. Will be set
- * as needed when mapped into the GTT.
- * Protected by dev->struct_mutex.
- */
- signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
-
- /**
- * Advice: are the backing pages purgeable?
- */
- unsigned int madv:2;
-
- /**
- * Whether the tiling parameters for the currently associated fence
- * register have changed. Note that for the purposes of tracking
- * tiling changes we also treat the unfenced register, the register
- * slot that the object occupies whilst it executes a fenced
- * command (such as BLT on gen2/3), as a "fence".
- */
- unsigned int fence_dirty:1;
-
- /**
- * Is the object at the current location in the gtt mappable and
- * fenceable? Used to avoid costly recalculations.
+ * This is set if the object has been written to since last bound
+ * to the GTT
*/
- unsigned int map_and_fenceable:1;
+#define I915_BO_DIRTY_SHIFT (I915_BO_ACTIVE_REF_SHIFT + 1)
+#define I915_BO_DIRTY_BIT BIT(I915_BO_DIRTY_SHIFT)
/**
* Whether the current gtt mapping needs to be mappable (and isn't just
@@ -2225,13 +2240,26 @@ struct drm_i915_gem_object {
unsigned int bind_count;
unsigned int pin_display;
- struct sg_table *pages;
- int pages_pin_count;
- struct get_page {
- struct scatterlist *sg;
- int last;
- } get_page;
- void *mapping;
+ struct {
+ struct mutex lock;
+ atomic_t pages_pin_count;
+
+ struct sg_table *pages;
+ void *mapping;
+
+ struct i915_gem_object_page_iter {
+ struct scatterlist *sg_pos;
+ unsigned long sg_idx;
+
+ struct radix_tree_root radix;
+ struct mutex lock;
+ } get_page;
+
+ /**
+ * Advice: are the backing pages purgeable?
+ */
+ unsigned int madv:2;
+ } mm;
/** Breadcrumb of last rendering to the buffer.
* There can only be one writer, but we allow for multiple readers.
@@ -2245,10 +2273,9 @@ struct drm_i915_gem_object {
*/
struct i915_gem_active last_read[I915_NUM_ENGINES];
struct i915_gem_active last_write;
- struct i915_gem_active last_fence;
/** References from framebuffers, locks out tiling changes. */
- unsigned long framebuffer_references;
+ atomic_t framebuffer_references;
/** Record of address bit 17 of each page at last unbind. */
unsigned long *bit_17;
@@ -2304,19 +2331,11 @@ __attribute__((nonnull))
static inline void
i915_gem_object_put(struct drm_i915_gem_object *obj)
{
- drm_gem_object_unreference(&obj->base);
+ __drm_gem_object_unreference(&obj->base);
}
__deprecated
extern void drm_gem_object_unreference(struct drm_gem_object *);
-
-__attribute__((nonnull))
-static inline void
-i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj)
-{
- drm_gem_object_unreference_unlocked(&obj->base);
-}
-
__deprecated
extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
@@ -2357,6 +2376,45 @@ i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
}
+static inline bool
+i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
+{
+ return obj->flags & I915_BO_ACTIVE_REF_BIT;
+}
+
+static inline void
+i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
+{
+ obj->flags |= I915_BO_ACTIVE_REF_BIT;
+}
+
+static inline void
+i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
+{
+ obj->flags &= ~I915_BO_ACTIVE_REF_BIT;
+}
+
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
+
+static inline bool
+i915_gem_object_is_dirty(const struct drm_i915_gem_object *obj)
+{
+ return obj->flags & I915_BO_DIRTY_BIT;
+}
+
+static inline void
+i915_gem_object_set_dirty(struct drm_i915_gem_object *obj)
+{
+ GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) == 0);
+ obj->flags |= I915_BO_DIRTY_BIT;
+}
+
+static inline void
+i915_gem_object_clear_dirty(struct drm_i915_gem_object *obj)
+{
+ obj->flags &= ~I915_BO_DIRTY_BIT;
+}
+
static inline unsigned int
i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
{
@@ -3043,7 +3101,8 @@ int i915_gem_set_tiling(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_get_tiling(struct drm_device *dev, void *data,
struct drm_file *file_priv);
-void i915_gem_init_userptr(struct drm_i915_private *dev_priv);
+int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
+void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
@@ -3057,6 +3116,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
void *i915_gem_object_alloc(struct drm_device *dev);
void i915_gem_object_free(struct drm_i915_gem_object *obj);
+bool i915_gem_object_flush_active(struct drm_i915_gem_object *obj);
void i915_gem_object_init(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_object_ops *ops);
struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
@@ -3066,7 +3126,7 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data(
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
void i915_gem_free_object(struct drm_gem_object *obj);
-int __must_check
+struct i915_vma * __must_check
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
u64 size,
@@ -3077,91 +3137,80 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
u32 flags);
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
+void i915_vma_unlink_ctx(struct i915_vma *vma);
void i915_vma_close(struct i915_vma *vma);
void i915_vma_destroy(struct i915_vma *vma);
int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
-int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
- int *needs_clflush);
+ unsigned int *needs_clflush);
+int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
+ unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE 0x1
+#define CLFLUSH_AFTER 0x2
-int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
-
-static inline int __sg_page_count(struct scatterlist *sg)
+static inline int __sg_page_count(const struct scatterlist *sg)
{
return sg->length >> PAGE_SHIFT;
}
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+ unsigned long n, unsigned int *offset);
-static inline dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
-{
- if (n < obj->get_page.last) {
- obj->get_page.sg = obj->pages->sgl;
- obj->get_page.last = 0;
- }
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+ unsigned long n);
- while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
- obj->get_page.last += __sg_page_count(obj->get_page.sg++);
- if (unlikely(sg_is_chain(obj->get_page.sg)))
- obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
- }
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+ unsigned long n);
- return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT);
-}
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+ unsigned long n);
-static inline struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+static inline int __must_check
+i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
{
- if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
- return NULL;
+ if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
+ return 0;
- if (n < obj->get_page.last) {
- obj->get_page.sg = obj->pages->sgl;
- obj->get_page.last = 0;
- }
-
- while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
- obj->get_page.last += __sg_page_count(obj->get_page.sg++);
- if (unlikely(sg_is_chain(obj->get_page.sg)))
- obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
- }
-
- return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last);
+ return __i915_gem_object_get_pages(obj);
}
-static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
-{
- BUG_ON(obj->pages == NULL);
- obj->pages_pin_count++;
-}
-
-static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
{
- BUG_ON(obj->pages_pin_count == 0);
- obj->pages_pin_count--;
+ GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) <= 0);
+ atomic_dec(&obj->mm.pages_pin_count);
}
+void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
/**
* i915_gem_object_pin_map - return a contiguous mapping of the entire object
* @obj - the object to map into kernel address space
+ * &use_wc - whether the mapping should be using WC or WB pgprot_t
*
* Calls i915_gem_object_pin_pages() to prevent reaping of the object's
* pages and then returns a contiguous mapping of the backing storage into
* the kernel address space.
*
- * The caller must hold the struct_mutex, and is responsible for calling
- * i915_gem_object_unpin_map() when the mapping is no longer required.
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
*
* Returns the pointer through which to access the mapped object, or an
* ERR_PTR() on error.
*/
-void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj);
+void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+ bool use_wc);
/**
* i915_gem_object_unpin_map - releases an earlier mapping
@@ -3171,18 +3220,16 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj);
* with your access, call i915_gem_object_unpin_map() to release the pin
* upon the mapping. Once the pin count reaches zero, that mapping may be
* removed.
- *
- * The caller must hold the struct_mutex.
*/
static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
{
- lockdep_assert_held(&obj->base.dev->struct_mutex);
i915_gem_object_unpin_pages(obj);
}
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
int i915_gem_object_sync(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_request *to);
+ struct drm_i915_gem_request *to,
+ bool write);
void i915_vma_move_to_active(struct i915_vma *vma,
struct drm_i915_gem_request *req,
unsigned int flags);
@@ -3262,12 +3309,11 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
int __must_check
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
+struct i915_vma * __must_check
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
u32 alignment,
const struct i915_ggtt_view *view);
-void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view);
+void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
int align);
int i915_gem_open(struct drm_device *dev, struct drm_file *file);
@@ -3287,77 +3333,89 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gem_obj, int flags);
-u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
- const struct i915_ggtt_view *view);
-u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
- struct i915_address_space *vm);
-static inline u64
-i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
-{
- return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal);
-}
-
-bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
- const struct i915_ggtt_view *view);
-bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
- struct i915_address_space *vm);
-
struct i915_vma *
i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm);
-struct i915_vma *
-i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view);
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view);
struct i915_vma *
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm);
-struct i915_vma *
-i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view);
-
-static inline struct i915_vma *
-i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
-{
- return i915_gem_obj_to_ggtt_view(obj, &i915_ggtt_view_normal);
-}
-bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj);
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view);
-/* Some GGTT VM helpers */
static inline struct i915_hw_ppgtt *
i915_vm_to_ppgtt(struct i915_address_space *vm)
{
return container_of(vm, struct i915_hw_ppgtt, base);
}
-static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
+static inline struct i915_vma *
+i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj,
+ const struct i915_ggtt_view *view)
{
- return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal);
+ return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view);
}
-unsigned long
-i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj);
-
-void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view);
-static inline void
-i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
+static inline unsigned long
+i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
+ const struct i915_ggtt_view *view)
{
- i915_gem_object_ggtt_unpin_view(obj, &i915_ggtt_view_normal);
+ return i915_gem_object_to_ggtt(o, view)->node.start;
}
/* i915_gem_fence.c */
-int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
+int __must_check i915_vma_get_fence(struct i915_vma *vma);
+int __must_check i915_vma_put_fence(struct i915_vma *vma);
+
+/**
+ * i915_vma_pin_fence - pin fencing state
+ * @vma: vma to pin fencing for
+ *
+ * This pins the fencing state (whether tiled or untiled) to make sure the
+ * vma (and its object) is ready to be used as a scanout target. Fencing
+ * status must be synchronize first by calling i915_vma_get_fence():
+ *
+ * The resulting fence pin reference must be released again with
+ * i915_vma_unpin_fence().
+ *
+ * Returns:
+ *
+ * True if the vma has a fence, false otherwise.
+ */
+static inline bool
+i915_vma_pin_fence(struct i915_vma *vma)
+{
+ if (vma->fence) {
+ vma->fence->pin_count++;
+ return true;
+ } else
+ return false;
+}
-bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
+/**
+ * i915_vma_unpin_fence - unpin fencing state
+ * @vma: vma to unpin fencing for
+ *
+ * This releases the fence pin reference acquired through
+ * i915_vma_pin_fence. It will handle both objects with and without an
+ * attached fence correctly, callers do not need to distinguish this.
+ */
+static inline void
+i915_vma_unpin_fence(struct i915_vma *vma)
+{
+ if (vma->fence) {
+ GEM_BUG_ON(vma->fence->pin_count <= 0);
+ vma->fence->pin_count--;
+ }
+}
void i915_gem_restore_fences(struct drm_device *dev);
void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
-void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj);
-void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
+void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
/* i915_gem_context.c */
int __must_check i915_gem_context_init(struct drm_device *dev);
@@ -3419,12 +3477,15 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
+ struct drm_mm_node *node,
u64 min_size, u64 alignment,
unsigned cache_level,
u64 start, u64 end,
unsigned flags);
-int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
-int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
+int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
+ struct drm_mm_node *node,
+ unsigned int flags);
+int i915_gem_evict_vm(struct i915_address_space *vm);
/* belongs in i915_gem_gtt.h */
static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
@@ -3453,6 +3514,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
u32 gtt_offset,
u32 size);
+/* i915_gem_internal.c */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_device *dev,
+ unsigned int size);
+
/* i915_gem_shrinker.c */
unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
unsigned long target,
@@ -3516,9 +3582,8 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
/* i915_cmd_parser.c */
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
-int intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
+void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine);
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
@@ -3826,6 +3891,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
schedule_timeout_uninterruptible(remaining_jiffies);
}
}
+
static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *engine = req->engine;
@@ -3847,8 +3913,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
* but it is easier and safer to do it every time the waiter
* is woken.
*/
- if (engine->irq_seqno_barrier &&
- READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current &&
+ if (rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
struct task_struct *tsk;
@@ -3864,7 +3929,8 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
* barrier on the next pass, and the read may not see the
* seqno update.
*/
- engine->irq_seqno_barrier(engine);
+ if (engine->irq_seqno_barrier)
+ engine->irq_seqno_barrier(engine);
/* If we consume the irq, but we are no longer the bottom-half,
* the real bottom-half may not have serialised their own
@@ -3884,7 +3950,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
wake_up_process(tsk);
rcu_read_unlock();
- if (i915_gem_request_completed(req))
+ if (i915_spin_request(req, TASK_UNINTERRUPTIBLE, 10))
return true;
}
@@ -3902,4 +3968,11 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
return false;
}
+void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
+bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+
+int remap_io_mapping(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long pfn, unsigned long size,
+ struct io_mapping *iomap);
+
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f4f8eaa90f2a..c03fea6b0677 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -63,15 +63,14 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
}
static int
-insert_mappable_node(struct drm_i915_private *i915,
+insert_mappable_node(struct i915_ggtt *ggtt,
struct drm_mm_node *node, u32 size)
{
memset(node, 0, sizeof(*node));
- return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
- size, 0, 0, 0,
- i915->ggtt.mappable_end,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
+ return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
+ size, 0, -1,
+ 0, ggtt->mappable_end,
+ DRM_MM_INSERT_LOW);
}
static void
@@ -167,7 +166,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
return 0;
}
-static int
+static struct sg_table *
i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
{
struct address_space *mapping = obj->base.filp->f_mapping;
@@ -177,7 +176,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
int i;
if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
struct page *page;
@@ -185,7 +184,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
page = shmem_read_mapping_page(mapping, i);
if (IS_ERR(page))
- return PTR_ERR(page);
+ return ERR_CAST(page);
src = kmap_atomic(page);
memcpy(vaddr, src, PAGE_SIZE);
@@ -200,11 +199,11 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (st == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
if (sg_alloc_table(st, 1, GFP_KERNEL)) {
kfree(st);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
sg = st->sgl;
@@ -214,29 +213,31 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
sg_dma_address(sg) = obj->phys_handle->busaddr;
sg_dma_len(sg) = obj->base.size;
- obj->pages = st;
- return 0;
+ return st;
}
static void
-i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
+__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj)
{
- int ret;
+ GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
- BUG_ON(obj->madv == __I915_MADV_PURGED);
+ if (obj->mm.madv == I915_MADV_DONTNEED)
+ i915_gem_object_clear_dirty(obj);
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (WARN_ON(ret)) {
- /* In the event of a disaster, abandon all caches and
- * hope for the best.
- */
- obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- }
+ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
+ i915_gem_clflush_object(obj, false);
- if (obj->madv == I915_MADV_DONTNEED)
- obj->dirty = 0;
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ __i915_gem_object_release_shmem(obj);
- if (obj->dirty) {
+ if (i915_gem_object_is_dirty(obj)) {
struct address_space *mapping = obj->base.filp->f_mapping;
char *vaddr = obj->phys_handle->vaddr;
int i;
@@ -255,16 +256,16 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
kunmap_atomic(dst);
set_page_dirty(page);
- if (obj->madv == I915_MADV_WILLNEED)
+ if (obj->mm.madv == I915_MADV_WILLNEED)
mark_page_accessed(page);
put_page(page);
vaddr += PAGE_SIZE;
}
- obj->dirty = 0;
+ i915_gem_object_clear_dirty(obj);
}
- sg_free_table(obj->pages);
- kfree(obj->pages);
+ sg_free_table(pages);
+ kfree(pages);
}
static void
@@ -406,21 +407,16 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
return 0;
}
- if (obj->madv != I915_MADV_WILLNEED)
+ if (obj->mm.madv != I915_MADV_WILLNEED)
return -EFAULT;
if (obj->base.filp == NULL)
return -EINVAL;
- ret = i915_gem_object_unbind(obj);
+ ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
if (ret)
return ret;
- ret = i915_gem_object_put_pages(obj);
- if (ret)
- return ret;
-
- /* create a new object */
phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
if (!phys)
return -ENOMEM;
@@ -428,13 +424,12 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
obj->phys_handle = phys;
obj->ops = &i915_gem_phys_ops;
- return i915_gem_object_get_pages(obj);
+ return i915_gem_object_pin_pages(obj);
}
static int
i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pwrite *args,
- struct drm_file *file_priv)
+ const struct drm_i915_gem_pwrite *args)
{
struct drm_device *dev = obj->base.dev;
void *vaddr = obj->phys_handle->vaddr + args->offset;
@@ -506,7 +501,7 @@ i915_gem_create(struct drm_file *file,
ret = drm_gem_handle_create(file, &obj->base, &handle);
/* drop reference from allocate - handle holds it now */
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
if (ret)
return ret;
@@ -600,61 +595,103 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
* flush the object from the CPU cache.
*/
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
- int *needs_clflush)
+ unsigned int *needs_clflush)
{
int ret;
*needs_clflush = 0;
- if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
- return -EINVAL;
+ if (!i915_gem_object_has_struct_page(obj))
+ return -ENODEV;
ret = i915_gem_object_wait_rendering(obj, true);
if (ret)
return ret;
- if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
- /* If we're not in the cpu read domain, set ourself into the gtt
- * read domain and manually flush cachelines (if required). This
- * optimizes for the case when the gpu will dirty the data
- * anyway again before the next pread happens. */
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ return ret;
+
+ i915_gem_object_flush_gtt_write_domain(obj);
+
+ /* If we're not in the cpu read domain, set ourself into the gtt
+ * read domain and manually flush cachelines (if required). This
+ * optimizes for the case when the gpu will dirty the data
+ * anyway again before the next pread happens.
+ */
+ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
obj->cache_level);
- }
- ret = i915_gem_object_get_pages(obj);
- if (ret)
- return ret;
+ if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, false);
+ if (ret)
+ goto err_unpin;
+
+ *needs_clflush = 0;
+ }
- i915_gem_object_pin_pages(obj);
+ /* return with the pages pinned */
+ return 0;
+err_unpin:
+ i915_gem_object_unpin_pages(obj);
return ret;
}
-/* Per-page copy function for the shmem pread fastpath.
- * Flushes invalid cachelines before reading the target if
- * needs_clflush is set. */
-static int
-shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
- char __user *user_data,
- bool page_do_bit17_swizzling, bool needs_clflush)
+int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
+ unsigned int *needs_clflush)
{
- char *vaddr;
int ret;
- if (unlikely(page_do_bit17_swizzling))
- return -EINVAL;
+ *needs_clflush = 0;
+ if (!i915_gem_object_has_struct_page(obj))
+ return -ENODEV;
- vaddr = kmap_atomic(page);
- if (needs_clflush)
- drm_clflush_virt_range(vaddr + shmem_page_offset,
- page_length);
- ret = __copy_to_user_inatomic(user_data,
- vaddr + shmem_page_offset,
- page_length);
- kunmap_atomic(vaddr);
+ ret = i915_gem_object_wait_rendering(obj, false);
+ if (ret)
+ return ret;
- return ret ? -EFAULT : 0;
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ return ret;
+
+ i915_gem_object_flush_gtt_write_domain(obj);
+
+ /* If we're not in the cpu write domain, set ourself into the
+ * gtt write domain and manually flush cachelines (as required).
+ * This optimizes for the case when the gpu will use the data
+ * right away and we therefore have to clflush anyway.
+ */
+ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+ *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
+
+ /* Same trick applies to invalidate partially written cachelines read
+ * before writing.
+ */
+ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+ *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
+ obj->cache_level);
+
+ if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ if (ret)
+ goto err_unpin;
+
+ *needs_clflush = 0;
+ }
+
+ if ((*needs_clflush & CLFLUSH_AFTER) == 0)
+ obj->cache_dirty = true;
+
+ intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+ i915_gem_object_set_dirty(obj);
+ /* return with the pages pinned */
+ return 0;
+
+err_unpin:
+ i915_gem_object_unpin_pages(obj);
+ return ret;
}
static void
@@ -682,7 +719,7 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length,
/* Only difference to the fast-path function is that this can handle bit17
* and uses non-atomic copy and kmap functions. */
static int
-shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pread_slow(struct page *page, int offset, int length,
char __user *user_data,
bool page_do_bit17_swizzling, bool needs_clflush)
{
@@ -691,75 +728,143 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
vaddr = kmap(page);
if (needs_clflush)
- shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
- page_length,
+ shmem_clflush_swizzled_range(vaddr + offset, length,
page_do_bit17_swizzling);
if (page_do_bit17_swizzling)
- ret = __copy_to_user_swizzled(user_data,
- vaddr, shmem_page_offset,
- page_length);
+ ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
else
- ret = __copy_to_user(user_data,
- vaddr + shmem_page_offset,
- page_length);
+ ret = __copy_to_user(user_data, vaddr + offset, length);
kunmap(page);
return ret ? - EFAULT : 0;
}
-static inline unsigned long
-slow_user_access(struct io_mapping *mapping,
- uint64_t page_base, int page_offset,
- char __user *user_data,
- unsigned long length, bool pwrite)
+static int
+shmem_pread(struct page *page, int offset, int length, char __user *user_data,
+ bool page_do_bit17_swizzling, bool needs_clflush)
+{
+ int ret;
+
+ ret = -ENODEV;
+ if (!page_do_bit17_swizzling) {
+ char *vaddr = kmap_atomic(page);
+
+ if (needs_clflush)
+ drm_clflush_virt_range(vaddr + offset, length);
+ ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
+ kunmap_atomic(vaddr);
+ }
+ if (ret == 0)
+ return 0;
+
+ return shmem_pread_slow(page, offset, length, user_data,
+ page_do_bit17_swizzling, needs_clflush);
+}
+
+static int
+i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_pread *args)
+{
+ char __user *user_data;
+ u64 remain;
+ unsigned obj_do_bit17_swizzling;
+ unsigned needs_clflush;
+ unsigned idx, offset;
+ int ret;
+
+ obj_do_bit17_swizzling = 0;
+ if (i915_gem_object_needs_bit17_swizzle(obj))
+ obj_do_bit17_swizzling = 1 << 17;
+
+ ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+ mutex_unlock(&obj->base.dev->struct_mutex);
+ if (ret)
+ return ret;
+
+ remain = args->size;
+ user_data = u64_to_user_ptr(args->data_ptr);
+ offset = offset_in_page(args->offset);
+ for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
+ struct page *page = i915_gem_object_get_page(obj, idx);
+ int length;
+
+ length = remain;
+ if ((offset + length) > PAGE_SIZE)
+ length = PAGE_SIZE - offset;
+
+ ret = shmem_pread(page, offset, length, user_data,
+ page_to_phys(page) & obj_do_bit17_swizzling,
+ needs_clflush);
+ if (ret)
+ break;
+
+ remain -= length;
+ user_data += length;
+ offset = 0;
+ }
+
+ i915_gem_object_unpin_pages(obj);
+
+ return ret;
+}
+
+static inline int
+gtt_user_read(struct io_mapping *mapping,
+ loff_t page_base, int page_offset,
+ char __user *user_data,
+ int length)
{
- void __iomem *ioaddr;
+ void __iomem *vaddr_atomic;
void *vaddr;
- uint64_t unwritten;
+ unsigned long unwritten;
- ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
+ vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
/* We can use the cpu mem copy function because this is X86. */
- vaddr = (void __force *)ioaddr + page_offset;
- if (pwrite)
- unwritten = __copy_from_user(vaddr, user_data, length);
- else
- unwritten = __copy_to_user(user_data, vaddr, length);
-
- io_mapping_unmap(ioaddr);
+ vaddr = (void __force*)vaddr_atomic + page_offset;
+ unwritten = __copy_to_user_inatomic(user_data, vaddr, length);
+ io_mapping_unmap_atomic(vaddr_atomic);
+ if (unwritten) {
+ vaddr_atomic = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
+ /* We can use the cpu mem copy function because this is X86. */
+ vaddr = (void __force*)vaddr_atomic + page_offset;
+ unwritten = copy_to_user(user_data, vaddr, length);
+ io_mapping_unmap(vaddr_atomic);
+ }
return unwritten;
}
static int
-i915_gem_gtt_pread(struct drm_device *dev,
- struct drm_i915_gem_object *obj, uint64_t size,
- uint64_t data_offset, uint64_t data_ptr)
+i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_pread *args)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
struct drm_mm_node node;
+ struct i915_vma *vma;
char __user *user_data;
uint64_t remain;
uint64_t offset;
int ret;
- ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
- if (ret) {
- ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
- if (ret)
- goto out;
-
- ret = i915_gem_object_get_pages(obj);
- if (ret) {
- remove_mappable_node(&node);
- goto out;
- }
+ ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
+ if (ret)
+ return ret;
- i915_gem_object_pin_pages(obj);
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ PIN_MAPPABLE | PIN_NONBLOCK);
+ if (IS_ERR(vma)) {
+ ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
+ if (ret)
+ goto out_unlock;
} else {
- node.start = i915_gem_obj_ggtt_offset(obj);
+ node.start = vma->node.start;
node.allocated = false;
- ret = i915_gem_object_put_fence(obj);
+
+ ret = i915_vma_put_fence(vma);
if (ret)
goto out_unpin;
}
@@ -768,18 +873,11 @@ i915_gem_gtt_pread(struct drm_device *dev,
if (ret)
goto out_unpin;
- user_data = u64_to_user_ptr(data_ptr);
- remain = size;
- offset = data_offset;
+ mutex_unlock(&obj->base.dev->struct_mutex);
- mutex_unlock(&dev->struct_mutex);
- if (likely(!i915.prefault_disable)) {
- ret = fault_in_multipages_writeable(user_data, remain);
- if (ret) {
- mutex_lock(&dev->struct_mutex);
- goto out_unpin;
- }
- }
+ user_data = u64_to_user_ptr(args->data_ptr);
+ remain = args->size;
+ offset = args->offset;
while (remain > 0) {
/* Operation in this page
@@ -796,19 +894,13 @@ i915_gem_gtt_pread(struct drm_device *dev,
wmb();
ggtt->base.insert_page(&ggtt->base,
i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
- node.start,
- I915_CACHE_NONE, 0);
- wmb();
+ node.start, I915_CACHE_NONE, 0);
} else {
page_base += offset & PAGE_MASK;
}
- /* This is a slow read/write as it tries to read from
- * and write to user memory which may result into page
- * faults, and so we cannot perform this under struct_mutex.
- */
- if (slow_user_access(ggtt->mappable, page_base,
- page_offset, user_data,
- page_length, false)) {
+
+ if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
+ user_data, page_length)) {
ret = -EFAULT;
break;
}
@@ -818,116 +910,19 @@ i915_gem_gtt_pread(struct drm_device *dev,
offset += page_length;
}
- mutex_lock(&dev->struct_mutex);
- if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
- /* The user has modified the object whilst we tried
- * reading from it, and we now have no idea what domain
- * the pages should be in. As we have just been touching
- * them directly, flush everything back to the GTT
- * domain.
- */
- ret = i915_gem_object_set_to_gtt_domain(obj, false);
- }
-
+ mutex_lock(&obj->base.dev->struct_mutex);
out_unpin:
if (node.allocated) {
wmb();
ggtt->base.clear_range(&ggtt->base,
node.start, node.size,
true);
- i915_gem_object_unpin_pages(obj);
remove_mappable_node(&node);
} else {
- i915_gem_object_ggtt_unpin(obj);
- }
-out:
- return ret;
-}
-
-static int
-i915_gem_shmem_pread(struct drm_device *dev,
- struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pread *args,
- struct drm_file *file)
-{
- char __user *user_data;
- ssize_t remain;
- loff_t offset;
- int shmem_page_offset, page_length, ret = 0;
- int obj_do_bit17_swizzling, page_do_bit17_swizzling;
- int prefaulted = 0;
- int needs_clflush = 0;
- struct sg_page_iter sg_iter;
-
- if (!i915_gem_object_has_struct_page(obj))
- return -ENODEV;
-
- user_data = u64_to_user_ptr(args->data_ptr);
- remain = args->size;
-
- obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-
- ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
- if (ret)
- return ret;
-
- offset = args->offset;
-
- for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
- offset >> PAGE_SHIFT) {
- struct page *page = sg_page_iter_page(&sg_iter);
-
- if (remain <= 0)
- break;
-
- /* Operation in this page
- *
- * shmem_page_offset = offset within page in shmem file
- * page_length = bytes to copy for this page
- */
- shmem_page_offset = offset_in_page(offset);
- page_length = remain;
- if ((shmem_page_offset + page_length) > PAGE_SIZE)
- page_length = PAGE_SIZE - shmem_page_offset;
-
- page_do_bit17_swizzling = obj_do_bit17_swizzling &&
- (page_to_phys(page) & (1 << 17)) != 0;
-
- ret = shmem_pread_fast(page, shmem_page_offset, page_length,
- user_data, page_do_bit17_swizzling,
- needs_clflush);
- if (ret == 0)
- goto next_page;
-
- mutex_unlock(&dev->struct_mutex);
-
- if (likely(!i915.prefault_disable) && !prefaulted) {
- ret = fault_in_multipages_writeable(user_data, remain);
- /* Userspace is tricking us, but we've already clobbered
- * its pages with the prefault and promised to write the
- * data up to the first fault. Hence ignore any errors
- * and just continue. */
- (void)ret;
- prefaulted = 1;
- }
-
- ret = shmem_pread_slow(page, shmem_page_offset, page_length,
- user_data, page_do_bit17_swizzling,
- needs_clflush);
-
- mutex_lock(&dev->struct_mutex);
-
- if (ret)
- goto out;
-
-next_page:
- remain -= page_length;
- user_data += page_length;
- offset += page_length;
+ i915_vma_unpin(vma);
}
-
-out:
- i915_gem_object_unpin_pages(obj);
+out_unlock:
+ mutex_unlock(&obj->base.dev->struct_mutex);
return ret;
}
@@ -946,7 +941,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_pread *args = data;
struct drm_i915_gem_object *obj;
- int ret = 0;
+ int ret;
if (args->size == 0)
return 0;
@@ -964,36 +959,29 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
if (args->offset > obj->base.size ||
args->size > obj->base.size - args->offset) {
ret = -EINVAL;
- goto err;
+ goto out;
}
trace_i915_gem_object_pread(obj, args->offset, args->size);
ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
if (ret)
- goto err;
+ goto out;
- ret = i915_mutex_lock_interruptible(dev);
+ ret = i915_gem_object_pin_pages(obj);
if (ret)
- goto err;
-
- ret = i915_gem_shmem_pread(dev, obj, args, file);
+ goto out;
- /* pread for non shmem backed objects */
+ ret = i915_gem_shmem_pread(obj, args);
if (ret == -EFAULT || ret == -ENODEV) {
intel_runtime_pm_get(to_i915(dev));
- ret = i915_gem_gtt_pread(dev, obj, args->size,
- args->offset, args->data_ptr);
+ ret = i915_gem_gtt_pread(obj, args);
intel_runtime_pm_put(to_i915(dev));
}
+ i915_gem_object_unpin_pages(obj);
+out:
i915_gem_object_put(obj);
- mutex_unlock(&dev->struct_mutex);
-
- return ret;
-
-err:
- i915_gem_object_put_unlocked(obj);
return ret;
}
@@ -1002,10 +990,10 @@ err:
*/
static inline int
-fast_user_write(struct io_mapping *mapping,
- loff_t page_base, int page_offset,
- char __user *user_data,
- int length)
+ggtt_write(struct io_mapping *mapping,
+ loff_t page_base, int page_offset,
+ char __user *user_data,
+ int length)
{
void __iomem *vaddr_atomic;
void *vaddr;
@@ -1017,6 +1005,15 @@ fast_user_write(struct io_mapping *mapping,
unwritten = __copy_from_user_inatomic_nocache(vaddr,
user_data, length);
io_mapping_unmap_atomic(vaddr_atomic);
+
+ if (unwritten) {
+ vaddr_atomic = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
+ /* We can use the cpu mem copy function because this is X86. */
+ vaddr = (void __force*)vaddr_atomic + page_offset;
+ unwritten = copy_from_user(vaddr, user_data, length);
+ io_mapping_unmap(vaddr_atomic);
+ }
+
return unwritten;
}
@@ -1029,40 +1026,31 @@ fast_user_write(struct io_mapping *mapping,
* @file: drm file pointer
*/
static int
-i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
- struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pwrite *args,
- struct drm_file *file)
+i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_pwrite *args)
{
- struct i915_ggtt *ggtt = &i915->ggtt;
- struct drm_device *dev = obj->base.dev;
+ struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
struct drm_mm_node node;
+ struct i915_vma *vma;
uint64_t remain, offset;
char __user *user_data;
int ret;
- bool hit_slow_path = false;
- if (i915_gem_object_is_tiled(obj))
- return -EFAULT;
+ ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
+ if (ret)
+ return ret;
- ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE | PIN_NONBLOCK);
- if (ret) {
- ret = insert_mappable_node(i915, &node, PAGE_SIZE);
+ if (IS_ERR(vma)) {
+ ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
if (ret)
- goto out;
-
- ret = i915_gem_object_get_pages(obj);
- if (ret) {
- remove_mappable_node(&node);
- goto out;
- }
-
- i915_gem_object_pin_pages(obj);
+ goto out_unlock;
} else {
- node.start = i915_gem_obj_ggtt_offset(obj);
+ node.start = vma->node.start;
node.allocated = false;
- ret = i915_gem_object_put_fence(obj);
+
+ ret = i915_vma_put_fence(vma);
if (ret)
goto out_unpin;
}
@@ -1071,8 +1059,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
if (ret)
goto out_unpin;
+ mutex_unlock(&obj->base.dev->struct_mutex);
+
intel_fb_obj_invalidate(obj, ORIGIN_GTT);
- obj->dirty = true;
user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
@@ -1093,7 +1082,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
ggtt->base.insert_page(&ggtt->base,
i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
node.start, I915_CACHE_NONE, 0);
- wmb(); /* flush modifications to the GGTT (insert_page) */
} else {
page_base += offset & PAGE_MASK;
}
@@ -1103,92 +1091,35 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
* If the object is non-shmem backed, we retry again with the
* path that handles page fault.
*/
- if (fast_user_write(ggtt->mappable, page_base,
- page_offset, user_data, page_length)) {
- hit_slow_path = true;
- mutex_unlock(&dev->struct_mutex);
- if (slow_user_access(ggtt->mappable,
- page_base,
- page_offset, user_data,
- page_length, true)) {
- ret = -EFAULT;
- mutex_lock(&dev->struct_mutex);
- goto out_flush;
- }
-
- mutex_lock(&dev->struct_mutex);
+ if (ggtt_write(&ggtt->mappable, page_base, page_offset,
+ user_data, page_length)) {
+ ret = -EFAULT;
+ break;
}
remain -= page_length;
user_data += page_length;
offset += page_length;
}
-
-out_flush:
- if (hit_slow_path) {
- if (ret == 0 &&
- (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
- /* The user has modified the object whilst we tried
- * reading from it, and we now have no idea what domain
- * the pages should be in. As we have just been touching
- * them directly, flush everything back to the GTT
- * domain.
- */
- ret = i915_gem_object_set_to_gtt_domain(obj, false);
- }
- }
-
intel_fb_obj_flush(obj, false, ORIGIN_GTT);
+
+ mutex_lock(&obj->base.dev->struct_mutex);
out_unpin:
if (node.allocated) {
wmb();
ggtt->base.clear_range(&ggtt->base,
node.start, node.size,
true);
- i915_gem_object_unpin_pages(obj);
remove_mappable_node(&node);
- } else {
- i915_gem_object_ggtt_unpin(obj);
- }
-out:
+ } else
+ i915_vma_unpin(vma);
+out_unlock:
+ mutex_unlock(&obj->base.dev->struct_mutex);
return ret;
}
-/* Per-page copy function for the shmem pwrite fastpath.
- * Flushes invalid cachelines before writing to the target if
- * needs_clflush_before is set and flushes out any written cachelines after
- * writing if needs_clflush is set. */
-static int
-shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
- char __user *user_data,
- bool page_do_bit17_swizzling,
- bool needs_clflush_before,
- bool needs_clflush_after)
-{
- char *vaddr;
- int ret;
-
- if (unlikely(page_do_bit17_swizzling))
- return -EINVAL;
-
- vaddr = kmap_atomic(page);
- if (needs_clflush_before)
- drm_clflush_virt_range(vaddr + shmem_page_offset,
- page_length);
- ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
- user_data, page_length);
- if (needs_clflush_after)
- drm_clflush_virt_range(vaddr + shmem_page_offset,
- page_length);
- kunmap_atomic(vaddr);
-
- return ret ? -EFAULT : 0;
-}
-
-/* Only difference to the fast-path function is that this can handle bit17
- * and uses non-atomic copy and kmap functions. */
static int
-shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pwrite_slow(struct page *page, int offset, int length,
char __user *user_data,
bool page_do_bit17_swizzling,
bool needs_clflush_before,
@@ -1199,151 +1130,114 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
vaddr = kmap(page);
if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
- shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
- page_length,
+ shmem_clflush_swizzled_range(vaddr + offset, length,
page_do_bit17_swizzling);
if (page_do_bit17_swizzling)
- ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
- user_data,
- page_length);
+ ret = __copy_from_user_swizzled(vaddr, offset, user_data,
+ length);
else
- ret = __copy_from_user(vaddr + shmem_page_offset,
- user_data,
- page_length);
+ ret = __copy_from_user(vaddr + offset, user_data, length);
if (needs_clflush_after)
- shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
- page_length,
+ shmem_clflush_swizzled_range(vaddr + offset, length,
page_do_bit17_swizzling);
kunmap(page);
return ret ? -EFAULT : 0;
}
+/* Per-page copy function for the shmem pwrite fastpath.
+ * Flushes invalid cachelines before writing to the target if
+ * needs_clflush_before is set and flushes out any written cachelines after
+ * writing if needs_clflush is set. */
static int
-i915_gem_shmem_pwrite(struct drm_device *dev,
- struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pwrite *args,
- struct drm_file *file)
+shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
+ bool page_do_bit17_swizzling,
+ bool needs_clflush_before,
+ bool needs_clflush_after)
{
- ssize_t remain;
- loff_t offset;
- char __user *user_data;
- int shmem_page_offset, page_length, ret = 0;
- int obj_do_bit17_swizzling, page_do_bit17_swizzling;
- int hit_slowpath = 0;
- int needs_clflush_after = 0;
- int needs_clflush_before = 0;
- struct sg_page_iter sg_iter;
+ int ret;
- user_data = u64_to_user_ptr(args->data_ptr);
- remain = args->size;
+ ret = -ENODEV;
+ if (!page_do_bit17_swizzling) {
+ char *vaddr = kmap_atomic(page);
- obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+ if (needs_clflush_before)
+ drm_clflush_virt_range(vaddr + offset, len);
+ ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
+ if (needs_clflush_after)
+ drm_clflush_virt_range(vaddr + offset, len);
+
+ kunmap_atomic(vaddr);
+ }
+ if (ret == 0)
+ return ret;
+
+ return shmem_pwrite_slow(page, offset, len, user_data,
+ page_do_bit17_swizzling,
+ needs_clflush_before,
+ needs_clflush_after);
+}
+
+static int
+i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_pwrite *args)
+{
+ char __user *user_data;
+ u64 remain;
+ unsigned int obj_do_bit17_swizzling;
+ unsigned int partial_cacheline_write;
+ unsigned int needs_clflush;
+ unsigned int offset, idx;
+ int ret;
- ret = i915_gem_object_wait_rendering(obj, false);
+ ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
if (ret)
return ret;
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- /* If we're not in the cpu write domain, set ourself into the gtt
- * write domain and manually flush cachelines (if required). This
- * optimizes for the case when the gpu will use the data
- * right away and we therefore have to clflush anyway. */
- needs_clflush_after = cpu_write_needs_clflush(obj);
- }
- /* Same trick applies to invalidate partially written cachelines read
- * before writing. */
- if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
- needs_clflush_before =
- !cpu_cache_is_coherent(dev, obj->cache_level);
-
- ret = i915_gem_object_get_pages(obj);
+ ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+ mutex_unlock(&obj->base.dev->struct_mutex);
if (ret)
return ret;
- intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-
- i915_gem_object_pin_pages(obj);
-
- offset = args->offset;
- obj->dirty = 1;
-
- for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
- offset >> PAGE_SHIFT) {
- struct page *page = sg_page_iter_page(&sg_iter);
- int partial_cacheline_write;
-
- if (remain <= 0)
- break;
-
- /* Operation in this page
- *
- * shmem_page_offset = offset within page in shmem file
- * page_length = bytes to copy for this page
- */
- shmem_page_offset = offset_in_page(offset);
-
- page_length = remain;
- if ((shmem_page_offset + page_length) > PAGE_SIZE)
- page_length = PAGE_SIZE - shmem_page_offset;
-
- /* If we don't overwrite a cacheline completely we need to be
- * careful to have up-to-date data by first clflushing. Don't
- * overcomplicate things and flush the entire patch. */
- partial_cacheline_write = needs_clflush_before &&
- ((shmem_page_offset | page_length)
- & (boot_cpu_data.x86_clflush_size - 1));
-
- page_do_bit17_swizzling = obj_do_bit17_swizzling &&
- (page_to_phys(page) & (1 << 17)) != 0;
-
- ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
- user_data, page_do_bit17_swizzling,
- partial_cacheline_write,
- needs_clflush_after);
- if (ret == 0)
- goto next_page;
-
- hit_slowpath = 1;
- mutex_unlock(&dev->struct_mutex);
- ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
- user_data, page_do_bit17_swizzling,
- partial_cacheline_write,
- needs_clflush_after);
+ obj_do_bit17_swizzling = 0;
+ if (i915_gem_object_needs_bit17_swizzle(obj))
+ obj_do_bit17_swizzling = 1 << 17;
- mutex_lock(&dev->struct_mutex);
+ /* If we don't overwrite a cacheline completely we need to be
+ * careful to have up-to-date data by first clflushing. Don't
+ * overcomplicate things and flush the entire patch.
+ */
+ partial_cacheline_write = 0;
+ if (needs_clflush & CLFLUSH_BEFORE)
+ partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
+ user_data = u64_to_user_ptr(args->data_ptr);
+ remain = args->size;
+ offset = offset_in_page(args->offset);
+ for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
+ struct page *page = i915_gem_object_get_page(obj, idx);
+ int length;
+
+ length = remain;
+ if ((offset + length) > PAGE_SIZE)
+ length = PAGE_SIZE - offset;
+
+ ret = shmem_pwrite(page, offset, length, user_data,
+ page_to_phys(page) & obj_do_bit17_swizzling,
+ (offset | length) & partial_cacheline_write,
+ needs_clflush & CLFLUSH_AFTER);
if (ret)
- goto out;
+ break;
-next_page:
- remain -= page_length;
- user_data += page_length;
- offset += page_length;
+ remain -= length;
+ user_data += length;
+ offset = 0;
}
-out:
- i915_gem_object_unpin_pages(obj);
-
- if (hit_slowpath) {
- /*
- * Fixup: Flush cpu caches in case we didn't flush the dirty
- * cachelines in-line while writing and the object moved
- * out of the cpu write domain while we've dropped the lock.
- */
- if (!needs_clflush_after &&
- obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- if (i915_gem_clflush_object(obj, obj->pin_display))
- needs_clflush_after = true;
- }
- }
+ intel_fb_obj_flush(obj, false, ORIGIN_CPU);
- if (needs_clflush_after)
- i915_gem_chipset_flush(to_i915(dev));
- else
- obj->cache_dirty = true;
+ i915_gem_object_unpin_pages(obj);
- intel_fb_obj_flush(obj, false, ORIGIN_CPU);
return ret;
}
@@ -1359,7 +1253,6 @@ int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_pwrite *args = data;
struct drm_i915_gem_object *obj;
int ret;
@@ -1372,13 +1265,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
args->size))
return -EFAULT;
- if (likely(!i915.prefault_disable)) {
- ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr),
- args->size);
- if (ret)
- return -EFAULT;
- }
-
obj = i915_gem_object_lookup(file, args->handle);
if (!obj)
return -ENOENT;
@@ -1396,11 +1282,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret)
goto err;
- intel_runtime_pm_get(dev_priv);
-
- ret = i915_mutex_lock_interruptible(dev);
+ ret = i915_gem_object_pin_pages(obj);
if (ret)
- goto err_rpm;
+ goto err;
ret = -EFAULT;
/* We can only do the GTT pwrite on untiled buffers, as otherwise
@@ -1411,7 +1295,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
*/
if (!i915_gem_object_has_struct_page(obj) ||
cpu_write_needs_clflush(obj)) {
- ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
+ ret = i915_gem_gtt_pwrite_fast(obj, args);
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
* textures). Fallback to the shmem path in that case. */
@@ -1419,23 +1303,14 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret == -EFAULT || ret == -ENOSPC) {
if (obj->phys_handle)
- ret = i915_gem_phys_pwrite(obj, args, file);
- else if (i915_gem_object_has_struct_page(obj))
- ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+ ret = i915_gem_phys_pwrite(obj, args);
else
- ret = -ENODEV;
+ ret = i915_gem_shmem_pwrite(obj, args);
}
- i915_gem_object_put(obj);
- mutex_unlock(&dev->struct_mutex);
- intel_runtime_pm_put(dev_priv);
-
- return ret;
-
-err_rpm:
- intel_runtime_pm_put(dev_priv);
+ i915_gem_object_unpin_pages(obj);
err:
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return ret;
}
@@ -1483,26 +1358,36 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
*/
ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
if (ret)
- goto err;
+ goto err_unlocked;
+
+ /* Flush and acquire obj->pages so that we are coherent through
+ * direct access in memory with previous cached writes through
+ * shmemfs and that our cache domain tracking remains valid.
+ * For example, if the obj->filp was moved to swap without us
+ * being notified and releasing the pages, we would mistakenly
+ * continue to assume that the obj remained out of the CPU cached
+ * domain.
+ */
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ goto err_unlocked;
ret = i915_mutex_lock_interruptible(dev);
if (ret)
- goto err;
+ goto err_pages;
if (read_domains & I915_GEM_DOMAIN_GTT)
ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
else
ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+ mutex_unlock(&dev->struct_mutex);
if (write_domain != 0)
intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
-
+err_pages:
+ i915_gem_object_unpin_pages(obj);
+err_unlocked:
i915_gem_object_put(obj);
- mutex_unlock(&dev->struct_mutex);
- return ret;
-
-err:
- i915_gem_object_put_unlocked(obj);
return ret;
}
@@ -1533,7 +1418,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
}
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return err;
}
@@ -1579,7 +1464,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
* pages from.
*/
if (!obj->base.filp) {
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return -EINVAL;
}
@@ -1591,7 +1476,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct vm_area_struct *vma;
if (down_write_killable(&mm->mmap_sem)) {
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return -EINTR;
}
vma = find_vma(mm, addr);
@@ -1605,7 +1490,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
/* This may race, but that's ok, it only gets set */
WRITE_ONCE(obj->has_wc_mmap, true);
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
if (IS_ERR((void *)addr))
return addr;
@@ -1614,6 +1499,16 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
return 0;
}
+static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
+{
+ u64 size;
+
+ size = i915_gem_object_get_stride(obj);
+ size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
+
+ return size >> PAGE_SHIFT;
+}
+
/**
* i915_gem_fault - fault a page into the GTT
* @vma: VMA in question
@@ -1635,11 +1530,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
- struct i915_ggtt_view view = i915_ggtt_view_normal;
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
+ struct i915_vma *ggtt;
pgoff_t page_offset;
- unsigned long pfn;
+ unsigned int flags;
int ret;
/* We don't use vmf->pgoff since that has the fake offset */
@@ -1657,6 +1551,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto err;
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ goto err;
+
intel_runtime_pm_get(dev_priv);
ret = i915_mutex_lock_interruptible(dev);
@@ -1669,83 +1567,61 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
goto err_unlock;
}
- /* Use a partial view if the object is bigger than the aperture. */
- if (obj->base.size >= ggtt->mappable_end &&
- !i915_gem_object_is_tiled(obj)) {
- static const unsigned int chunk_size = 256; // 1 MiB
-
- memset(&view, 0, sizeof(view));
- view.type = I915_GGTT_VIEW_PARTIAL;
- view.params.partial.offset = rounddown(page_offset, chunk_size);
- view.params.partial.size =
+ flags = PIN_MAPPABLE;
+ if (obj->base.size > 2 << 20)
+ flags |= PIN_NONBLOCK | PIN_NOFAULT;
+ ggtt = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
+ if (IS_ERR(ggtt)) {
+ struct i915_ggtt_view partial;
+ unsigned int chunk_size;
+
+ chunk_size = 256; /* 1 MiB */
+ if (i915_gem_object_is_tiled(obj))
+ chunk_size = max(chunk_size, tile_row_pages(obj));
+
+ memset(&partial, 0, sizeof(partial));
+ partial.type = I915_GGTT_VIEW_PARTIAL;
+ partial.params.partial.offset =
+ rounddown(page_offset, chunk_size);
+ partial.params.partial.size =
min_t(unsigned int,
chunk_size,
(vma->vm_end - vma->vm_start)/PAGE_SIZE -
- view.params.partial.offset);
- }
+ partial.params.partial.offset);
- /* Now pin it into the GTT if needed */
- ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
- if (ret)
+ ggtt = i915_gem_object_ggtt_pin(obj, &partial, 0, 0,
+ PIN_MAPPABLE);
+ }
+ if (IS_ERR(ggtt)) {
+ ret = PTR_ERR(ggtt);
goto err_unlock;
+ }
ret = i915_gem_object_set_to_gtt_domain(obj, write);
if (ret)
goto err_unpin;
- ret = i915_gem_object_get_fence(obj);
+ ret = i915_vma_get_fence(ggtt);
if (ret)
goto err_unpin;
/* Finally, remap it using the new GTT offset */
- pfn = ggtt->mappable_base +
- i915_gem_obj_ggtt_offset_view(obj, &view);
- pfn >>= PAGE_SHIFT;
-
- if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
- /* Overriding existing pages in partial view does not cause
- * us any trouble as TLBs are still valid because the fault
- * is due to userspace losing part of the mapping or never
- * having accessed it before (at this partials' range).
- */
- unsigned long base = vma->vm_start +
- (view.params.partial.offset << PAGE_SHIFT);
- unsigned int i;
-
- for (i = 0; i < view.params.partial.size; i++) {
- ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
- if (ret)
- break;
- }
-
- obj->fault_mappable = true;
- } else {
- if (!obj->fault_mappable) {
- unsigned long size = min_t(unsigned long,
- vma->vm_end - vma->vm_start,
- obj->base.size);
- int i;
-
- for (i = 0; i < size >> PAGE_SHIFT; i++) {
- ret = vm_insert_pfn(vma,
- (unsigned long)vma->vm_start + i * PAGE_SIZE,
- pfn + i);
- if (ret)
- break;
- }
+ ret = remap_io_mapping(vma,
+ vma->vm_start + (ggtt->ggtt_view.params.partial.offset << PAGE_SHIFT),
+ (dev_priv->ggtt.mappable_base + ggtt->node.start) >> PAGE_SHIFT,
+ min_t(u64, ggtt->size, vma->vm_end - vma->vm_start),
+ &dev_priv->ggtt.mappable);
+ if (ret)
+ goto err_unpin;
- obj->fault_mappable = true;
- } else
- ret = vm_insert_pfn(vma,
- (unsigned long)vmf->virtual_address,
- pfn + page_offset);
- }
+ obj->fault_mappable = true;
err_unpin:
- i915_gem_object_ggtt_unpin_view(obj, &view);
+ __i915_vma_unpin(ggtt);
err_unlock:
mutex_unlock(&dev->struct_mutex);
err_rpm:
intel_runtime_pm_put(dev_priv);
+ i915_gem_object_unpin_pages(obj);
err:
switch (ret) {
case -EIO:
@@ -1810,9 +1686,13 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
/* Serialisation between user GTT access and our code depends upon
* revoking the CPU's PTE whilst the mutex is held. The next user
* pagefault then has to wait until we release the mutex.
+ *
+ * Note that RPM complicates somewhat by adding an additional
+ * requirement that operations to the GGTT be made holding the RPM
+ * wakeref. This in turns allow us to release the mmap from within
+ * the RPM suspend code ignoring the struct_mutex serialisation in
+ * lieu of the RPM barriers.
*/
- lockdep_assert_held(&obj->base.dev->struct_mutex);
-
if (!obj->fault_mappable)
return;
@@ -1831,11 +1711,21 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
obj->fault_mappable = false;
}
+static void assert_rpm_release_all_mmaps(struct drm_i915_private *dev_priv)
+{
+ assert_rpm_wakelock_held(dev_priv);
+}
+
void
i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
{
struct drm_i915_gem_object *obj;
+ /* This should only be called by RPM as we require the bound_list
+ * to be protected by the RPM barriers and not struct_mutex.
+ * We check that we are holding the wakeref whenever we manipulate
+ * the dev_priv->mm.bound_list (via assert_rpm_release_all_mmaps).
+ */
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
i915_gem_release_mmap(obj);
}
@@ -1951,7 +1841,7 @@ i915_gem_mmap_gtt(struct drm_file *file,
if (ret == 0)
*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return ret;
}
@@ -1994,16 +1884,15 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
* backing pages, *now*.
*/
shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
- obj->madv = __I915_MADV_PURGED;
+ obj->mm.madv = __I915_MADV_PURGED;
}
/* Try to discard unwanted pages */
-static void
-i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
{
struct address_space *mapping;
- switch (obj->madv) {
+ switch (obj->mm.madv) {
case I915_MADV_DONTNEED:
i915_gem_object_truncate(obj);
case __I915_MADV_PURGED:
@@ -2018,81 +1907,84 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
}
static void
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
+i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
{
struct sgt_iter sgt_iter;
struct page *page;
- int ret;
-
- BUG_ON(obj->madv == __I915_MADV_PURGED);
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (WARN_ON(ret)) {
- /* In the event of a disaster, abandon all caches and
- * hope for the best.
- */
- i915_gem_clflush_object(obj, true);
- obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- }
+ __i915_gem_object_release_shmem(obj);
- i915_gem_gtt_finish_object(obj);
+ i915_gem_gtt_finish_pages(obj, pages);
if (i915_gem_object_needs_bit17_swizzle(obj))
- i915_gem_object_save_bit_17_swizzle(obj);
-
- if (obj->madv == I915_MADV_DONTNEED)
- obj->dirty = 0;
+ i915_gem_object_save_bit_17_swizzle(obj, pages);
- for_each_sgt_page(page, sgt_iter, obj->pages) {
- if (obj->dirty)
+ for_each_sgt_page(page, sgt_iter, pages) {
+ if (i915_gem_object_is_dirty(obj))
set_page_dirty(page);
- if (obj->madv == I915_MADV_WILLNEED)
+ if (obj->mm.madv == I915_MADV_WILLNEED)
mark_page_accessed(page);
put_page(page);
}
- obj->dirty = 0;
+ i915_gem_object_clear_dirty(obj);
- sg_free_table(obj->pages);
- kfree(obj->pages);
+ sg_free_table(pages);
+ kfree(pages);
}
-int
-i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
{
- const struct drm_i915_gem_object_ops *ops = obj->ops;
+ struct radix_tree_iter iter;
+ void **slot;
- if (obj->pages == NULL)
- return 0;
+ radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
+ radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+}
- if (obj->pages_pin_count)
- return -EBUSY;
+void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
+{
+ const struct drm_i915_gem_object_ops *ops = obj->ops;
+ struct sg_table *pages;
+
+ if (atomic_read(&obj->mm.pages_pin_count))
+ return;
GEM_BUG_ON(obj->bind_count);
+ if (obj->mm.pages == NULL)
+ return;
+
+ /* May be called by shrinker from within get_pages() (on another bo) */
+ mutex_lock_nested(&obj->mm.lock, SINGLE_DEPTH_NESTING);
+ if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
+ goto unlock;
/* ->put_pages might need to allocate memory for the bit17 swizzle
* array, hence protect them from being reaped by removing them from gtt
* lists early. */
- list_del(&obj->global_list);
-
- if (obj->mapping) {
- if (is_vmalloc_addr(obj->mapping))
- vunmap(obj->mapping);
+ pages = obj->mm.pages;
+ obj->mm.pages = NULL;
+ GEM_BUG_ON(pages == NULL);
+
+ if (obj->mm.mapping) {
+ void *ptr = (void *)((uintptr_t)obj->mm.mapping & ~1);
+ if (is_vmalloc_addr(ptr))
+ vunmap(ptr);
else
- kunmap(kmap_to_page(obj->mapping));
- obj->mapping = NULL;
+ kunmap(kmap_to_page(ptr));
+ obj->mm.mapping = NULL;
}
- ops->put_pages(obj);
- obj->pages = NULL;
-
- i915_gem_object_invalidate(obj);
+ __i915_gem_object_reset_page_iter(obj);
- return 0;
+ ops->put_pages(obj, pages);
+unlock:
+ mutex_unlock(&obj->mm.lock);
}
-static int
+static struct sg_table *
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
@@ -2110,17 +2002,17 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
* wasn't in the GTT, there shouldn't be any way it could have been in
* a GPU cache
*/
- BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
- BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
+ GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
+ GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (st == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
page_count = obj->base.size / PAGE_SIZE;
if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
kfree(st);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
/* Get the list of pages out of our struct file. They'll be pinned
@@ -2180,20 +2072,19 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
if (!swiotlb_nr_tbl())
#endif
sg_mark_end(sg);
- obj->pages = st;
- ret = i915_gem_gtt_prepare_object(obj);
+ ret = i915_gem_gtt_prepare_pages(obj, st);
if (ret)
goto err_pages;
if (i915_gem_object_needs_bit17_swizzle(obj))
- i915_gem_object_do_bit_17_swizzle(obj);
+ i915_gem_object_do_bit_17_swizzle(obj, st);
if (i915_gem_object_is_tiled(obj) &&
dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
- i915_gem_object_pin_pages(obj);
+ atomic_inc(&obj->mm.pages_pin_count);
- return 0;
+ return st;
err_pages:
sg_mark_end(sg);
@@ -2213,50 +2104,77 @@ err_pages:
if (ret == -ENOSPC)
ret = -ENOMEM;
- return ret;
+ return ERR_PTR(ret);
+}
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ lockdep_assert_held(&obj->mm.lock);
+
+ obj->mm.get_page.sg_pos = pages->sgl;
+ obj->mm.get_page.sg_idx = 0;
+
+ obj->mm.pages = pages;
+}
+
+static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+ struct sg_table *pages;
+
+ if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
+ DRM_DEBUG("Attempting to obtain a purgeable object\n");
+ return -EFAULT;
+ }
+
+ pages = obj->ops->get_pages(obj);
+ if (unlikely(IS_ERR(pages)))
+ return PTR_ERR(pages);
+
+ __i915_gem_object_set_pages(obj, pages);
+ return 0;
}
/* Ensure that the associated pages are gathered from the backing storage
- * and pinned into our object. i915_gem_object_get_pages() may be called
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
* multiple times before they are released by a single call to
- * i915_gem_object_put_pages() - once the pages are no longer referenced
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
* either as a result of memory pressure (reaping pages under the shrinker)
* or as the object is itself released.
*/
int
-i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+__i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- const struct drm_i915_gem_object_ops *ops = obj->ops;
int ret;
- if (obj->pages)
- return 0;
-
- if (obj->madv != I915_MADV_WILLNEED) {
- DRM_DEBUG("Attempting to obtain a purgeable object\n");
- return -EFAULT;
- }
-
- BUG_ON(obj->pages_pin_count);
-
- ret = ops->get_pages(obj);
+ ret = mutex_lock_interruptible(&obj->mm.lock);
if (ret)
return ret;
- list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
+ if (likely(obj->mm.pages)) {
+ atomic_inc(&obj->mm.pages_pin_count);
+ goto unlock;
+ }
- obj->get_page.sg = obj->pages->sgl;
- obj->get_page.last = 0;
+ GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count));
- return 0;
+ ret = ____i915_gem_object_get_pages(obj);
+ if (ret == 0) {
+ smp_mb__before_atomic();
+ atomic_set(&obj->mm.pages_pin_count, 1);
+ }
+
+unlock:
+ mutex_unlock(&obj->mm.lock);
+ return ret;
}
/* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+ bool use_wc)
{
unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
- struct sg_table *sgt = obj->pages;
+ struct sg_table *sgt = obj->mm.pages;
struct sgt_iter sgt_iter;
struct page *page;
struct page *stack_pages[32];
@@ -2265,7 +2183,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
void *addr;
/* A single page can always be kmapped */
- if (n_pages == 1)
+ if (n_pages == 1 && !use_wc)
return kmap(sg_page(sgt->sgl));
if (n_pages > ARRAY_SIZE(stack_pages)) {
@@ -2281,7 +2199,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
/* Check that we have the expected number of pages */
GEM_BUG_ON(i != n_pages);
- addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+ addr = vmap(pages, n_pages, VM_NO_GUARD,
+ use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL);
if (pages != stack_pages)
drm_free_large(pages);
@@ -2290,27 +2209,69 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
}
/* get, pin, and map the pages of the object into kernel space */
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, bool use_wc)
{
+ void *ptr;
+ bool has_wc;
+ bool pinned;
int ret;
- lockdep_assert_held(&obj->base.dev->struct_mutex);
+ GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
- ret = i915_gem_object_get_pages(obj);
+ ret = mutex_lock_interruptible(&obj->mm.lock);
if (ret)
return ERR_PTR(ret);
- i915_gem_object_pin_pages(obj);
+ pinned = true;
+ if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+ ret = ____i915_gem_object_get_pages(obj);
+ if (ret)
+ goto err_unlock;
+
+ smp_mb__before_atomic();
+ GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count));
+ atomic_set(&obj->mm.pages_pin_count, 1);
+ pinned = false;
+ }
+
+ GEM_BUG_ON(obj->mm.pages == NULL);
+
+ ptr = (void *)((uintptr_t)obj->mm.mapping & ~1);
+ has_wc = (uintptr_t)obj->mm.mapping & 1;
+
+ if (ptr && has_wc != use_wc) {
+ if (pinned) {
+ ret = -EBUSY;
+ goto err_unpin;
+ }
+
+ if (is_vmalloc_addr(ptr))
+ vunmap(ptr);
+ else
+ kunmap(kmap_to_page(ptr));
+
+ ptr = obj->mm.mapping = NULL;
+ }
- if (!obj->mapping) {
- obj->mapping = i915_gem_object_map(obj);
- if (!obj->mapping) {
- i915_gem_object_unpin_pages(obj);
- return ERR_PTR(-ENOMEM);
+ if (!ptr) {
+ ptr = i915_gem_object_map(obj, use_wc);
+ if (!ptr) {
+ ret = -ENOMEM;
+ goto err_unpin;
}
+
+ obj->mm.mapping = (void *)((uintptr_t)ptr | use_wc);
}
- return obj->mapping;
+out_unlock:
+ mutex_unlock(&obj->mm.lock);
+ return ptr;
+
+err_unpin:
+ atomic_dec(&obj->mm.pages_pin_count);
+err_unlock:
+ ptr = ERR_PTR(ret);
+ goto out_unlock;
}
static void
@@ -2341,11 +2302,16 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
* so that we don't steal from recently used but inactive objects
* (unless we are forced to ofc!)
*/
- if (obj->bind_count)
+ if (obj->bind_count) {
+ assert_rpm_release_all_mmaps(request->i915);
list_move_tail(&obj->global_list,
&request->i915->mm.bound_list);
+ }
- i915_gem_object_put(obj);
+ if (i915_gem_object_has_active_reference(obj)) {
+ i915_gem_object_clear_active_reference(obj);
+ i915_gem_object_put(obj);
+ }
}
static bool i915_context_is_banned(const struct i915_gem_context *ctx)
@@ -2443,7 +2409,11 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
/* Ensure irq handler finishes or is cancelled. */
tasklet_kill(&engine->irq_tasklet);
- intel_execlists_cancel_requests(engine);
+ INIT_LIST_HEAD(&engine->execlist_queue);
+ i915_gem_request_assign(&engine->execlist_port[0].request,
+ NULL);
+ i915_gem_request_assign(&engine->execlist_port[1].request,
+ NULL);
}
/*
@@ -2526,7 +2496,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
container_of(work, typeof(*dev_priv), gt.idle_work.work);
struct drm_device *dev = &dev_priv->drm;
struct intel_engine_cs *engine;
- unsigned int stuck_engines;
bool rearm_hangcheck;
if (!READ_ONCE(dev_priv->gt.awake))
@@ -2556,15 +2525,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
dev_priv->gt.awake = false;
rearm_hangcheck = false;
- /* As we have disabled hangcheck, we need to unstick any waiters still
- * hanging around. However, as we may be racing against the interrupt
- * handler or the waiters themselves, we skip enabling the fake-irq.
- */
- stuck_engines = intel_kick_waiters(dev_priv);
- if (unlikely(stuck_engines))
- DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
- stuck_engines);
-
if (INTEL_GEN(dev_priv) >= 6)
gen6_rps_idle(dev_priv);
intel_runtime_pm_put(dev_priv);
@@ -2585,9 +2545,23 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
struct i915_vma *vma, *vn;
mutex_lock(&obj->base.dev->struct_mutex);
- list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
+ list_for_each_entry_safe_reverse(vma, vn, &obj->vma_list, obj_link) {
+ if (i915_vma_is_ggtt(vma))
+ break;
+
if (vma->vm->file == fpriv)
i915_vma_close(vma);
+ }
+
+ vma = obj->vma_hashed;
+ if (vma && vma->ctx->file_priv == fpriv)
+ i915_vma_unlink_ctx(vma);
+
+ if (i915_gem_object_is_active(obj) &&
+ !i915_gem_object_has_active_reference(obj)) {
+ i915_gem_object_set_active_reference(obj);
+ i915_gem_object_get(obj);
+ }
mutex_unlock(&obj->base.dev->struct_mutex);
}
@@ -2640,7 +2614,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
break;
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return ret;
}
@@ -2698,7 +2672,8 @@ __i915_gem_object_sync(struct drm_i915_gem_request *to,
*/
int
i915_gem_object_sync(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_request *to)
+ struct drm_i915_gem_request *to,
+ bool write)
{
struct i915_gem_active *active;
unsigned long active_mask;
@@ -2710,7 +2685,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
if (!active_mask)
return 0;
- if (obj->base.pending_write_domain) {
+ if (write) {
active = obj->last_read;
} else {
active_mask = 1;
@@ -2734,27 +2709,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
return 0;
}
-static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
-{
- u32 old_write_domain, old_read_domains;
-
- /* Force a pagefault for domain tracking on next user access */
- i915_gem_release_mmap(obj);
-
- if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
- return;
-
- old_read_domains = obj->base.read_domains;
- old_write_domain = obj->base.write_domain;
-
- obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
- obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
-
- trace_i915_gem_object_change_domain(obj,
- old_read_domains,
- old_write_domain);
-}
-
static void __i915_vma_iounmap(struct i915_vma *vma)
{
GEM_BUG_ON(i915_vma_is_pinned(vma));
@@ -2772,6 +2726,8 @@ int i915_vma_unbind(struct i915_vma *vma)
unsigned long active;
int ret;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
/* First wait upon any activity as retiring the request may
* have side-effects such as unpinning or even unbinding this vma.
*/
@@ -2807,18 +2763,19 @@ int i915_vma_unbind(struct i915_vma *vma)
goto destroy;
GEM_BUG_ON(obj->bind_count == 0);
- GEM_BUG_ON(!obj->pages);
-
- if (i915_vma_is_ggtt(vma) &&
- vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
- i915_gem_object_finish_gtt(obj);
+ GEM_BUG_ON(!obj->mm.pages);
+ if (i915_vma_is_map_and_fenceable(vma)) {
/* release the fence reg _after_ flushing */
- ret = i915_gem_object_put_fence(obj);
+ ret = i915_vma_put_fence(vma);
if (ret)
return ret;
+ /* Force a pagefault for domain tracking on next user access */
+ i915_gem_release_mmap(obj);
+
__i915_vma_iounmap(vma);
+ vma->flags &= ~I915_VMA_CAN_FENCE;
}
if (likely(!vma->vm->closed)) {
@@ -2830,10 +2787,8 @@ int i915_vma_unbind(struct i915_vma *vma)
drm_mm_remove_node(&vma->node);
list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
- if (i915_vma_is_ggtt(vma)) {
- if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
- obj->map_and_fenceable = false;
- } else if (vma->ggtt_view.pages) {
+ if (vma->ggtt_view.pages) {
+ if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) {
sg_free_table(vma->ggtt_view.pages);
kfree(vma->ggtt_view.pages);
}
@@ -2842,9 +2797,11 @@ int i915_vma_unbind(struct i915_vma *vma)
/* Since the unbound list is global, only move to that list if
* no more VMAs exist. */
- if (--obj->bind_count == 0)
+ if (--obj->bind_count == 0) {
+ assert_rpm_release_all_mmaps(to_i915(obj->base.dev));
list_move_tail(&obj->global_list,
&to_i915(obj->base.dev)->mm.unbound_list);
+ }
/* And finally now the object is completely decoupled from this vma,
* we can drop its hold on the backing storage and allow it to be
@@ -2893,18 +2850,12 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
if (vma->vm->mm.color_adjust == NULL)
return true;
- if (!drm_mm_node_allocated(gtt_space))
- return true;
-
- if (list_empty(>t_space->node_list))
- return true;
-
other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
- if (other->allocated && !other->hole_follows && other->color != cache_level)
+ if (other->allocated && !other->hole_size && other->color != cache_level)
return false;
other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
- if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
+ if (other->allocated && !gtt_space->hole_size && other->color != cache_level)
return false;
return true;
@@ -2930,7 +2881,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
struct drm_i915_gem_object *obj = vma->obj;
u64 start, end;
- u64 min_alignment;
int ret;
GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
@@ -2941,17 +2891,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
size = i915_gem_get_ggtt_size(dev_priv, size,
i915_gem_object_get_tiling(obj));
- min_alignment =
- i915_gem_get_ggtt_alignment(dev_priv, size,
- i915_gem_object_get_tiling(obj),
- flags & PIN_MAPPABLE);
- if (alignment == 0)
- alignment = min_alignment;
- if (alignment & (min_alignment - 1)) {
- DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
- alignment, min_alignment);
- return -EINVAL;
- }
+ alignment = max(max(alignment, vma->display_alignment),
+ i915_gem_get_ggtt_alignment(dev_priv, size,
+ i915_gem_object_get_tiling(obj),
+ flags & PIN_MAPPABLE));
start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
@@ -2970,15 +2913,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
size, obj->base.size,
flags & PIN_MAPPABLE ? "mappable" : "total",
end);
- return -E2BIG;
+ return -ENOSPC;
}
- ret = i915_gem_object_get_pages(obj);
+ ret = i915_gem_object_pin_pages(obj);
if (ret)
return ret;
- i915_gem_object_pin_pages(obj);
-
if (flags & PIN_OFFSET_FIXED) {
u64 offset = flags & PIN_OFFSET_MASK;
if (offset & (alignment - 1) || offset > end - size) {
@@ -2991,22 +2932,23 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
vma->node.color = obj->cache_level;
ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
if (ret) {
- ret = i915_gem_evict_for_vma(vma);
+ if (flags & PIN_NOEVICT)
+ goto err_unpin;
+
+ ret = i915_gem_evict_for_node(vma->vm, &vma->node, flags);
if (ret == 0)
ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
if (ret)
goto err_unpin;
}
} else {
- u32 search_flag, alloc_flag;
+ unsigned mmflags;
- if (flags & PIN_HIGH) {
- search_flag = DRM_MM_SEARCH_BELOW;
- alloc_flag = DRM_MM_CREATE_TOP;
- } else {
- search_flag = DRM_MM_SEARCH_DEFAULT;
- alloc_flag = DRM_MM_CREATE_DEFAULT;
- }
+ mmflags = 0;
+ if (flags & PIN_HIGH)
+ mmflags = DRM_MM_INSERT_HIGH;
+ if (flags & PIN_MAPPABLE)
+ mmflags = DRM_MM_INSERT_LOW;
/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
* so we know that we always have a minimum alignment of 4096.
@@ -3017,27 +2959,28 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
if (alignment <= 4096)
alignment = 0;
-search_free:
ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
&vma->node,
size, alignment,
obj->cache_level,
start, end,
- search_flag,
- alloc_flag);
+ mmflags);
if (ret) {
- ret = i915_gem_evict_something(vma->vm, size, alignment,
- obj->cache_level,
- start, end,
- flags);
- if (ret == 0)
- goto search_free;
+ if (flags & PIN_NOEVICT)
+ goto err_unpin;
- goto err_unpin;
+ ret = i915_gem_evict_something(vma->vm, &vma->node,
+ size, alignment,
+ obj->cache_level,
+ start, end,
+ flags);
+ if (ret)
+ goto err_unpin;
}
}
GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
+ assert_rpm_release_all_mmaps(dev_priv);
list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
obj->bind_count++;
@@ -3057,7 +3000,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* to GPU, and we can ignore the cache flush because it'll happen
* again at bind time.
*/
- if (obj->pages == NULL)
+ if (obj->mm.pages == NULL)
return false;
/*
@@ -3081,7 +3024,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
}
trace_i915_gem_object_clflush(obj);
- drm_clflush_sg(obj->pages);
+ drm_clflush_sg(obj->mm.pages);
obj->cache_dirty = false;
return true;
@@ -3091,51 +3034,78 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
static void
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
{
- uint32_t old_write_domain;
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
return;
/* No actual flushing is required for the GTT write domain. Writes
- * to it immediately go to main memory as far as we know, so there's
+ * to it "immediately" go to main memory as far as we know, so there's
* no chipset flush. It also doesn't land in render cache.
*
* However, we do have to enforce the order so that all writes through
* the GTT land before any writes to the device, such as updates to
* the GATT itself.
+ *
+ * We also have to wait a bit for the writes to land from the GTT.
+ * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+ * timing. This issue has only been observed when switching quickly
+ * between GTT writes and CPU reads from inside the kernel on recent hw,
+ * and it appears to only affect discrete GTT blocks (i.e. on LLC
+ * system agents we cannot reproduce this behaviour).
*/
wmb();
-
- old_write_domain = obj->base.write_domain;
- obj->base.write_domain = 0;
+ if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
+ POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base));
intel_fb_obj_flush(obj, false, ORIGIN_GTT);
+ obj->base.write_domain = 0;
trace_i915_gem_object_change_domain(obj,
obj->base.read_domains,
- old_write_domain);
+ I915_GEM_DOMAIN_GTT);
}
/** Flushes the CPU write domain for the object if it's dirty. */
static void
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
{
- uint32_t old_write_domain;
-
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
return;
if (i915_gem_clflush_object(obj, obj->pin_display))
i915_gem_chipset_flush(to_i915(obj->base.dev));
- old_write_domain = obj->base.write_domain;
- obj->base.write_domain = 0;
-
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+ obj->base.write_domain = 0;
trace_i915_gem_object_change_domain(obj,
obj->base.read_domains,
- old_write_domain);
+ I915_GEM_DOMAIN_CPU);
+}
+
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915;
+ struct list_head *list;
+ struct i915_vma *vma;
+
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ if (!i915_vma_is_ggtt(vma))
+ break;
+
+ if (i915_vma_is_active(vma))
+ continue;
+
+ if (!drm_mm_node_allocated(&vma->node))
+ continue;
+
+ list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ }
+
+ i915 = to_i915(obj->base.dev);
+ list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
+ list_move_tail(&obj->global_list, list);
}
/**
@@ -3150,25 +3120,11 @@ int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
{
uint32_t old_write_domain, old_read_domains;
- struct i915_vma *vma;
int ret;
- ret = i915_gem_object_wait_rendering(obj, !write);
- if (ret)
- return ret;
-
- if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
- return 0;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
- /* Flush and acquire obj->pages so that we are coherent through
- * direct access in memory with previous cached writes through
- * shmemfs and that our cache domain tracking remains valid.
- * For example, if the obj->filp was moved to swap without us
- * being notified and releasing the pages, we would mistakenly
- * continue to assume that the obj remained out of the CPU cached
- * domain.
- */
- ret = i915_gem_object_get_pages(obj);
+ ret = i915_gem_object_wait_rendering(obj, !write);
if (ret)
return ret;
@@ -3187,12 +3143,12 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
- BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+ GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
if (write) {
obj->base.read_domains = I915_GEM_DOMAIN_GTT;
obj->base.write_domain = I915_GEM_DOMAIN_GTT;
- obj->dirty = 1;
+ i915_gem_object_set_dirty(obj);
}
trace_i915_gem_object_change_domain(obj,
@@ -3200,11 +3156,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
old_write_domain);
/* And bump the LRU for this access */
- vma = i915_gem_obj_to_ggtt(obj);
- if (vma &&
- drm_mm_node_allocated(&vma->node) &&
- !i915_vma_is_active(vma))
- list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ i915_gem_object_bump_inactive_ggtt(obj);
return 0;
}
@@ -3230,6 +3182,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
struct i915_vma *vma;
int ret = 0;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
if (obj->cache_level == cache_level)
goto out;
@@ -3295,9 +3248,11 @@ restart:
* dropped the fence as all snoopable access is
* supposed to be linear.
*/
- ret = i915_gem_object_put_fence(obj);
- if (ret)
- return ret;
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ ret = i915_vma_put_fence(vma);
+ if (ret)
+ return ret;
+ }
} else {
/* We either have incoherent backing store and
* so no GTT access or the architecture is fully
@@ -3360,14 +3315,13 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
break;
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return 0;
}
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_caching *args = data;
struct drm_i915_gem_object *obj;
enum i915_cache_level level;
@@ -3396,11 +3350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- intel_runtime_pm_get(dev_priv);
-
ret = i915_mutex_lock_interruptible(dev);
if (ret)
- goto rpm_put;
+ return ret;
obj = i915_gem_object_lookup(file, args->handle);
if (!obj) {
@@ -3409,13 +3361,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
}
ret = i915_gem_object_set_cache_level(obj, level);
-
i915_gem_object_put(obj);
unlock:
mutex_unlock(&dev->struct_mutex);
-rpm_put:
- intel_runtime_pm_put(dev_priv);
-
return ret;
}
@@ -3424,14 +3372,17 @@ rpm_put:
* Can be called from an uninterruptible phase (modesetting) and allows
* any flushes to be pipelined (for pageflips).
*/
-int
+struct i915_vma *
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
u32 alignment,
const struct i915_ggtt_view *view)
{
+ struct i915_vma *vma;
u32 old_read_domains, old_write_domain;
int ret;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
/* Mark the pin_display early so that we account for the
* display coherency whilst setting up the cache domains.
*/
@@ -3448,19 +3399,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
*/
ret = i915_gem_object_set_cache_level(obj,
HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
- if (ret)
+ if (ret) {
+ vma = ERR_PTR(ret);
goto err_unpin_display;
+ }
/* As the user may map the buffer once pinned in the display plane
* (e.g. libkms for the bootup splash), we have to ensure that we
- * always use map_and_fenceable for all scanout buffers.
+ * always use map_and_fenceable for all scanout buffers. However,
+ * it may simply be too big to fit into mappable, in which case
+ * put it anyway and hope that userspace can cope (but always first
+ * try to preserve the existing ABI).
*/
- ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
- view->type == I915_GGTT_VIEW_NORMAL ?
- PIN_MAPPABLE : 0);
- if (ret)
+ vma = ERR_PTR(-ENOSPC);
+ if (view->type == I915_GGTT_VIEW_NORMAL)
+ vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+ PIN_MAPPABLE | PIN_NONBLOCK);
+ if (IS_ERR(vma))
+ vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
+ if (IS_ERR(vma))
goto err_unpin_display;
+ vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
+ WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
+
i915_gem_object_flush_cpu_write_domain(obj);
old_write_domain = obj->base.write_domain;
@@ -3476,23 +3439,29 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
old_read_domains,
old_write_domain);
- return 0;
+ return vma;
err_unpin_display:
obj->pin_display--;
- return ret;
+ return vma;
}
void
-i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view)
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
{
- if (WARN_ON(obj->pin_display == 0))
+ lockdep_assert_held(&vma->vm->dev->struct_mutex);
+ if (WARN_ON(vma->obj->pin_display == 0))
return;
- i915_gem_object_ggtt_unpin_view(obj, view);
+ if (--vma->obj->pin_display == 0)
+ vma->display_alignment = 0;
- obj->pin_display--;
+ /* Bump the LRU to try and avoid premature eviction whilst flipping */
+ if (!i915_vma_is_active(vma))
+ list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+
+ i915_vma_unpin(vma);
+ WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
}
/**
@@ -3509,6 +3478,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
uint32_t old_write_domain, old_read_domains;
int ret;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
ret = i915_gem_object_wait_rendering(obj, !write);
if (ret)
return ret;
@@ -3531,7 +3502,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
- BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
+ GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
/* If we're writing through the CPU, then the GPU read domains will
* need to be invalidated at next use.
@@ -3545,6 +3516,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
old_read_domains,
old_write_domain);
+ /* And bump the LRU for this access */
+ i915_gem_object_bump_inactive_ggtt(obj);
+
return 0;
}
@@ -3576,16 +3550,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
return -EIO;
spin_lock(&file_priv->mm.lock);
- list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
+ list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
if (time_after_eq(request->emitted_jiffies, recent_enough))
break;
- /*
- * Note that the request might not have been submitted yet.
- * In which case emitted_jiffies will be zero.
- */
- if (!request->emitted_jiffies)
- continue;
+ if (target) {
+ list_del(&target->client_link);
+ target->file_priv = NULL;
+ }
target = request;
}
@@ -3605,8 +3577,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
static bool
i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
{
- struct drm_i915_gem_object *obj = vma->obj;
-
if (!drm_mm_node_allocated(&vma->node))
return false;
@@ -3616,7 +3586,7 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
if (alignment && vma->node.start & (alignment - 1))
return true;
- if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
+ if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
return true;
if (flags & PIN_OFFSET_BIAS &&
@@ -3638,10 +3608,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
u32 fence_size, fence_alignment;
fence_size = i915_gem_get_ggtt_size(dev_priv,
- obj->base.size,
+ vma->size,
i915_gem_object_get_tiling(obj));
fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
- obj->base.size,
+ vma->size,
i915_gem_object_get_tiling(obj),
true);
@@ -3651,7 +3621,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
mappable = (vma->node.start + fence_size <=
dev_priv->ggtt.mappable_end);
- obj->map_and_fenceable = mappable && fenceable;
+ if (mappable && fenceable)
+ vma->flags |= I915_VMA_CAN_FENCE;
+ else
+ vma->flags &= ~I915_VMA_CAN_FENCE;
}
int __i915_vma_do_pin(struct i915_vma *vma,
@@ -3660,6 +3633,7 @@ int __i915_vma_do_pin(struct i915_vma *vma,
unsigned int bound = vma->flags;
int ret;
+ lockdep_assert_held(&vma->vm->dev->struct_mutex);
GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
@@ -3689,9 +3663,9 @@ err:
return ret;
}
-int
+struct i915_vma *
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view,
+ const struct i915_ggtt_view *ggtt_view,
u64 size,
u64 alignment,
u64 flags)
@@ -3699,43 +3673,39 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
struct i915_vma *vma;
int ret;
- if (!view)
- view = &i915_ggtt_view_normal;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
- vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
+ vma = i915_gem_obj_lookup_or_create_vma(obj, NULL, ggtt_view);
if (IS_ERR(vma))
- return PTR_ERR(vma);
+ return vma;
if (i915_vma_misplaced(vma, size, alignment, flags)) {
if (flags & PIN_NONBLOCK &&
(i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
- return -ENOSPC;
+ return ERR_PTR(-ENOSPC);
WARN(i915_vma_is_pinned(vma),
"bo is already pinned in ggtt with incorrect alignment:"
- " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
- " obj->map_and_fenceable=%d\n",
+ " offset=%08x %08x, req.alignment=%llx,"
+ " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
upper_32_bits(vma->node.start),
lower_32_bits(vma->node.start),
alignment,
!!(flags & PIN_MAPPABLE),
- obj->map_and_fenceable);
+ i915_vma_is_map_and_fenceable(vma));
ret = i915_vma_unbind(vma);
if (ret)
- return ret;
+ return ERR_PTR(ret);
}
- return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
-}
+ ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ if (ret)
+ return ERR_PTR(ret);
-void
-i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view)
-{
- i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
+ return vma;
}
-static __always_inline unsigned __busy_read_flag(unsigned int id)
+static __always_inline unsigned int __busy_read_flag(unsigned int id)
{
/* Note that we could alias engines in the execbuf API, but
* that would be very unwise as it prevents userspace from
@@ -3753,7 +3723,7 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
return id;
}
-static __always_inline unsigned
+static __always_inline unsigned int
__busy_set_if_active(const struct i915_gem_active *active,
unsigned int (*flag)(unsigned int id))
{
@@ -3770,19 +3740,34 @@ __busy_set_if_active(const struct i915_gem_active *active,
id = request->engine->exec_id;
- /* Check that the pointer wasn't reassigned and overwritten. */
+ /* Check that the pointer wasn't reassigned and overwritten.
+ *
+ * In __i915_gem_active_get_rcu(), we enforce ordering between
+ * the first rcu pointer dereference (imposing a
+ * read-dependency only on access through the pointer) and
+ * the second lockless access through the memory barrier
+ * following a successful atomic_inc_not_zero(). Here there
+ * is no such barrier, and so we must manually insert an
+ * explicit read barrier to ensure that the following
+ * access occurs after all the loads through the first
+ * pointer.
+ *
+ * The corresponding write barrier is part of
+ * rcu_assign_pointer().
+ */
+ smp_rmb();
if (request == rcu_access_pointer(active->request))
return flag(id);
} while (1);
}
-static inline unsigned
+static __always_inline unsigned int
busy_check_reader(const struct i915_gem_active *active)
{
return __busy_set_if_active(active, __busy_read_flag);
}
-static inline unsigned
+static __always_inline unsigned int
busy_check_writer(const struct i915_gem_active *active)
{
return __busy_set_if_active(active, __busy_write_id);
@@ -3847,7 +3832,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
rcu_read_unlock();
}
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return 0;
}
@@ -3875,37 +3860,35 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ret;
-
obj = i915_gem_object_lookup(file_priv, args->handle);
- if (!obj) {
- ret = -ENOENT;
- goto unlock;
- }
+ if (!obj)
+ return -ENOENT;
+
+ ret = mutex_lock_interruptible(&obj->mm.lock);
+ if (ret)
+ goto err;
- if (obj->pages &&
+ if (obj->mm.pages &&
i915_gem_object_is_tiled(obj) &&
dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
- if (obj->madv == I915_MADV_WILLNEED)
- i915_gem_object_unpin_pages(obj);
+ if (obj->mm.madv == I915_MADV_WILLNEED)
+ atomic_dec(&obj->mm.pages_pin_count);
if (args->madv == I915_MADV_WILLNEED)
- i915_gem_object_pin_pages(obj);
+ atomic_inc(&obj->mm.pages_pin_count);
}
- if (obj->madv != __I915_MADV_PURGED)
- obj->madv = args->madv;
+ if (obj->mm.madv != __I915_MADV_PURGED)
+ obj->mm.madv = args->madv;
/* if the object is no longer attached, discard its backing storage */
- if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
+ if (obj->mm.madv == I915_MADV_DONTNEED && obj->mm.pages == NULL)
i915_gem_object_truncate(obj);
- args->retained = obj->madv != __I915_MADV_PURGED;
+ args->retained = obj->mm.madv != __I915_MADV_PURGED;
+ mutex_unlock(&obj->mm.lock);
+err:
i915_gem_object_put(obj);
-unlock:
- mutex_unlock(&dev->struct_mutex);
return ret;
}
@@ -3914,21 +3897,22 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
{
int i;
+ mutex_init(&obj->mm.lock);
+
INIT_LIST_HEAD(&obj->global_list);
for (i = 0; i < I915_NUM_ENGINES; i++)
init_request_active(&obj->last_read[i],
i915_gem_object_retire__read);
init_request_active(&obj->last_write,
i915_gem_object_retire__write);
- init_request_active(&obj->last_fence, NULL);
- INIT_LIST_HEAD(&obj->obj_exec_link);
INIT_LIST_HEAD(&obj->vma_list);
INIT_LIST_HEAD(&obj->batch_pool_link);
obj->ops = ops;
- obj->fence_reg = I915_FENCE_REG_NONE;
- obj->madv = I915_MADV_WILLNEED;
+ obj->mm.madv = I915_MADV_WILLNEED;
+ INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL);
+ mutex_init(&obj->mm.get_page.lock);
i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
}
@@ -4005,7 +3989,7 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
* back the contents from the GPU.
*/
- if (obj->madv != I915_MADV_WILLNEED)
+ if (obj->mm.madv != I915_MADV_WILLNEED)
return false;
if (obj->base.filp == NULL)
@@ -4021,16 +4005,11 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
return atomic_long_read(&obj->base.filp->f_count) == 1;
}
-void i915_gem_free_object(struct drm_gem_object *gem_obj)
+static void __i915_gem_free_objects(struct work_struct *work)
{
- struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_vma *vma, *next;
-
- intel_runtime_pm_get(dev_priv);
-
- trace_i915_gem_object_destroy(obj);
+ struct drm_i915_private *dev_priv =
+ container_of(work, struct drm_i915_private, mm.free_work);
+ struct llist_node *freed;
/* All file-owned VMA should have been released by this point through
* i915_gem_close_object(), or earlier by i915_gem_context_close().
@@ -4039,73 +4018,75 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
* the GTT either for the user or for scanout). Those VMA still need to
* unbound now.
*/
- list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
- GEM_BUG_ON(!i915_vma_is_ggtt(vma));
- GEM_BUG_ON(i915_vma_is_active(vma));
- vma->flags &= ~I915_VMA_PIN_MASK;
- i915_vma_close(vma);
- }
- GEM_BUG_ON(obj->bind_count);
- /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
- * before progressing. */
- if (obj->stolen)
- i915_gem_object_unpin_pages(obj);
+ while ((freed = llist_del_all(&dev_priv->mm.free_list))) {
+ struct drm_i915_gem_object *obj, *on;
- WARN_ON(atomic_read(&obj->frontbuffer_bits));
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ llist_for_each_entry(obj, freed, free_link) {
+ struct i915_vma *vma, *vn;
- if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
- dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
- i915_gem_object_is_tiled(obj))
- i915_gem_object_unpin_pages(obj);
+ trace_i915_gem_object_destroy(obj);
- if (WARN_ON(obj->pages_pin_count))
- obj->pages_pin_count = 0;
- if (discard_backing_storage(obj))
- obj->madv = I915_MADV_DONTNEED;
- i915_gem_object_put_pages(obj);
+ GEM_BUG_ON(i915_gem_object_is_active(obj));
+ list_for_each_entry_safe(vma, vn,
+ &obj->vma_list, obj_link) {
+ GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+ GEM_BUG_ON(i915_vma_is_active(vma));
+ vma->flags &= ~I915_VMA_PIN_MASK;
+ i915_vma_close(vma);
+ }
- BUG_ON(obj->pages);
+ list_del(&obj->global_list);
+ }
+ mutex_unlock(&dev_priv->drm.struct_mutex);
- if (obj->base.import_attach)
- drm_prime_gem_destroy(&obj->base, NULL);
+ llist_for_each_entry_safe(obj, on, freed, free_link) {
+ GEM_BUG_ON(obj->bind_count);
+ GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
- if (obj->ops->release)
- obj->ops->release(obj);
+ atomic_set(&obj->mm.pages_pin_count, 0);
+ __i915_gem_object_put_pages(obj);
+ GEM_BUG_ON(obj->mm.pages);
- drm_gem_object_release(&obj->base);
- i915_gem_info_remove_obj(dev_priv, obj->base.size);
+ if (obj->base.import_attach)
+ drm_prime_gem_destroy(&obj->base, NULL);
- kfree(obj->bit_17);
- i915_gem_object_free(obj);
+ if (obj->ops->release)
+ obj->ops->release(obj);
- intel_runtime_pm_put(dev_priv);
-}
+ __i915_gem_object_invalidate(obj);
+ drm_gem_object_release(&obj->base);
+ i915_gem_info_remove_obj(dev_priv, obj->base.size);
-struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm)
-{
- struct i915_vma *vma;
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
- if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
- vma->vm == vm)
- return vma;
+ kfree(obj->bit_17);
+ i915_gem_object_free(obj);
+ }
}
- return NULL;
}
-struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view)
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
{
- struct i915_vma *vma;
+ struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+
+ if (discard_backing_storage(obj))
+ obj->mm.madv = I915_MADV_DONTNEED;
- GEM_BUG_ON(!view);
+ if (llist_add(&obj->free_link, &dev_priv->mm.free_list))
+ queue_work(dev_priv->wq, &dev_priv->mm.free_work);
+}
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (i915_vma_is_ggtt(vma) &&
- i915_ggtt_view_equal(&vma->ggtt_view, view))
- return vma;
- return NULL;
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
+{
+ if (!obj)
+ return;
+
+ GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
+ if (i915_gem_object_is_active(obj))
+ i915_gem_object_set_active_reference(obj);
+ else
+ i915_gem_object_put(obj);
}
int i915_gem_suspend(struct drm_device *dev)
@@ -4141,6 +4122,7 @@ int i915_gem_suspend(struct drm_device *dev)
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
cancel_delayed_work_sync(&dev_priv->gt.retire_work);
flush_delayed_work(&dev_priv->gt.idle_work);
+ flush_work(&dev_priv->mm.free_work);
/* Assert that we sucessfully flushed all the work and
* reset the GPU back to its idle, low power state.
@@ -4331,7 +4313,9 @@ int i915_gem_init(struct drm_device *dev)
*/
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
- i915_gem_init_userptr(dev_priv);
+ ret = i915_gem_init_userptr(dev_priv);
+ if (ret)
+ goto out_unlock;
ret = i915_gem_init_ggtt(dev_priv);
if (ret)
@@ -4383,6 +4367,7 @@ void
i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
+ int i;
if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
!IS_CHERRYVIEW(dev_priv))
@@ -4398,6 +4383,13 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
I915_READ(vgtif_reg(avail_rs.fence_num));
/* Initialize fence registers to zero */
+ for (i = 0; i < dev_priv->num_fence_regs; i++) {
+ struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+
+ fence->i915 = dev_priv;
+ fence->id = i;
+ list_add_tail(&fence->lru_list, &dev_priv->mm.fence_list);
+ }
i915_gem_restore_fences(dev);
i915_gem_detect_bit_6_swizzle(dev);
@@ -4428,13 +4420,13 @@ i915_gem_load_init(struct drm_device *dev)
NULL);
INIT_LIST_HEAD(&dev_priv->context_list);
+ INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_objects);
+ init_llist_head(&dev_priv->mm.free_list);
INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
INIT_LIST_HEAD(&dev_priv->mm.bound_list);
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
for (i = 0; i < I915_NUM_ENGINES; i++)
init_engine_lists(&dev_priv->engine[i]);
- for (i = 0; i < I915_MAX_NUM_FENCES; i++)
- INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
i915_gem_retire_work_handler);
INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
@@ -4444,8 +4436,6 @@ i915_gem_load_init(struct drm_device *dev)
dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
- INIT_LIST_HEAD(&dev_priv->mm.fence_list);
-
init_waitqueue_head(&dev_priv->pending_flip_queue);
dev_priv->mm.interruptible = true;
@@ -4503,7 +4493,7 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
* file_priv.
*/
spin_lock(&file_priv->mm.lock);
- list_for_each_entry(request, &file_priv->mm.request_list, client_list)
+ list_for_each_entry(request, &file_priv->mm.request_list, client_link)
request->file_priv = NULL;
spin_unlock(&file_priv->mm.lock);
@@ -4575,112 +4565,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
}
}
-/* All the new VM stuff */
-u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
- struct i915_address_space *vm)
-{
- struct drm_i915_private *dev_priv = to_i915(o->base.dev);
- struct i915_vma *vma;
-
- WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
-
- list_for_each_entry(vma, &o->vma_list, obj_link) {
- if (i915_vma_is_ggtt(vma) &&
- vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
- continue;
- if (vma->vm == vm)
- return vma->node.start;
- }
-
- WARN(1, "%s vma for this object not found.\n",
- i915_is_ggtt(vm) ? "global" : "ppgtt");
- return -1;
-}
-
-u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
- const struct i915_ggtt_view *view)
-{
- struct i915_vma *vma;
-
- list_for_each_entry(vma, &o->vma_list, obj_link)
- if (i915_vma_is_ggtt(vma) &&
- i915_ggtt_view_equal(&vma->ggtt_view, view))
- return vma->node.start;
-
- WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
- return -1;
-}
-
-bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
- struct i915_address_space *vm)
-{
- struct i915_vma *vma;
-
- list_for_each_entry(vma, &o->vma_list, obj_link) {
- if (i915_vma_is_ggtt(vma) &&
- vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
- continue;
- if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
- return true;
- }
-
- return false;
-}
-
-bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
- const struct i915_ggtt_view *view)
-{
- struct i915_vma *vma;
-
- list_for_each_entry(vma, &o->vma_list, obj_link)
- if (i915_vma_is_ggtt(vma) &&
- i915_ggtt_view_equal(&vma->ggtt_view, view) &&
- drm_mm_node_allocated(&vma->node))
- return true;
-
- return false;
-}
-
-unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
-{
- struct i915_vma *vma;
-
- GEM_BUG_ON(list_empty(&o->vma_list));
-
- list_for_each_entry(vma, &o->vma_list, obj_link) {
- if (i915_vma_is_ggtt(vma) &&
- vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
- return vma->node.size;
- }
-
- return 0;
-}
-
-bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
-{
- struct i915_vma *vma;
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (i915_vma_is_pinned(vma))
- return true;
-
- return false;
-}
-
-/* Like i915_gem_object_get_page(), but mark the returned page dirty */
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
-{
- struct page *page;
-
- /* Only default objects have per-page dirty tracking */
- if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
- return NULL;
-
- page = i915_gem_object_get_page(obj, n);
- set_page_dirty(page);
- return page;
-}
-
/* Allocate a new GEM object and fill it with the supplied data */
struct drm_i915_gem_object *
i915_gem_object_create_from_data(struct drm_device *dev,
@@ -4699,14 +4583,13 @@ i915_gem_object_create_from_data(struct drm_device *dev,
if (ret)
goto fail;
- ret = i915_gem_object_get_pages(obj);
+ ret = i915_gem_object_pin_pages(obj);
if (ret)
goto fail;
- i915_gem_object_pin_pages(obj);
- sg = obj->pages;
+ sg = obj->mm.pages;
bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
- obj->dirty = 1; /* Backing store is now out of date */
+ i915_gem_object_set_dirty(obj); /* Backing store is now out of date */
i915_gem_object_unpin_pages(obj);
if (WARN_ON(bytes != size)) {
@@ -4721,3 +4604,115 @@ fail:
i915_gem_object_put(obj);
return ERR_PTR(ret);
}
+
+static struct scatterlist *
+__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+ unsigned long n, unsigned int *offset)
+{
+ struct scatterlist *sg = obj->mm.pages->sgl;
+ int idx = 0;
+
+ while (idx + __sg_page_count(sg) <= n) {
+ idx += __sg_page_count(sg++);
+ if (unlikely(sg_is_chain(sg)))
+ sg = sg_chain_ptr(sg);
+ }
+
+ *offset = n - idx;
+ return sg;
+}
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+ unsigned long n,
+ unsigned int *offset)
+{
+ struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
+ struct scatterlist *sg;
+
+ GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+ GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) == 0);
+
+ if (n < READ_ONCE(iter->sg_idx))
+ goto lookup;
+
+ mutex_lock(&iter->lock);
+ if (n >= iter->sg_idx &&
+ n < iter->sg_idx + __sg_page_count(iter->sg_pos)) {
+ sg = iter->sg_pos;
+ *offset = n - iter->sg_idx;
+ mutex_unlock(&iter->lock);
+ return sg;
+ }
+
+ while (iter->sg_idx <= n) {
+ unsigned long exception;
+ unsigned int count, i;
+
+ radix_tree_insert(&iter->radix,
+ iter->sg_idx,
+ iter->sg_pos);
+
+ exception =
+ RADIX_TREE_EXCEPTIONAL_ENTRY |
+ iter->sg_idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
+ count = __sg_page_count(iter->sg_pos);
+ for (i = 1; i < count; i++)
+ radix_tree_insert(&iter->radix,
+ iter->sg_idx + i,
+ (void *)exception);
+
+ iter->sg_idx += count;
+ iter->sg_pos = __sg_next(iter->sg_pos);
+ }
+ mutex_unlock(&iter->lock);
+
+lookup:
+ rcu_read_lock();
+ sg = radix_tree_lookup(&iter->radix, n);
+ rcu_read_unlock();
+
+ if (unlikely(!sg))
+ return __i915_gem_object_get_sg(obj, n, offset);
+
+ *offset = 0;
+ if (unlikely(radix_tree_exception(sg))) {
+ unsigned long base =
+ (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+ sg = radix_tree_lookup(&iter->radix, base);
+ *offset = n - base;
+ }
+ return sg;
+}
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned long n)
+{
+ struct scatterlist *sg;
+ unsigned int offset;
+
+ sg = i915_gem_object_get_sg(obj, n, &offset);
+ return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+ unsigned long n)
+{
+ struct page *page = i915_gem_object_get_page(obj, n);
+ if (!i915_gem_object_is_dirty(obj))
+ set_page_dirty(page);
+ return page;
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+ unsigned long n)
+{
+ struct scatterlist *sg;
+ unsigned int offset;
+
+ sg = i915_gem_object_get_sg(obj, n, &offset);
+ return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index ed989596d9a3..6b656822bb3a 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -73,7 +73,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
list_for_each_entry_safe(obj, next,
&pool->cache_list[n],
batch_pool_link)
- i915_gem_object_put(obj);
+ __i915_gem_object_release_unless_active(obj);
INIT_LIST_HEAD(&pool->cache_list[n]);
}
@@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
size_t size)
{
struct drm_i915_gem_object *obj = NULL;
- struct drm_i915_gem_object *tmp, *next;
+ struct drm_i915_gem_object *tmp;
struct list_head *list;
- int n;
+ int n, ret;
lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
@@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
n = ARRAY_SIZE(pool->cache_list) - 1;
list = &pool->cache_list[n];
- list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
+ list_for_each_entry(tmp, list, batch_pool_link) {
/* The batches are strictly LRU ordered */
if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
&tmp->base.dev->struct_mutex))
break;
- /* While we're looping, do some clean up */
- if (tmp->madv == __I915_MADV_PURGED) {
- list_del(&tmp->batch_pool_link);
- i915_gem_object_put(tmp);
- continue;
- }
-
if (tmp->base.size >= size) {
obj = tmp;
break;
@@ -132,20 +125,16 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
}
if (obj == NULL) {
- int ret;
-
- obj = i915_gem_object_create(&pool->engine->i915->drm, size);
+ obj = i915_gem_object_create_internal(&pool->engine->i915->drm,
+ size);
if (IS_ERR(obj))
return obj;
-
- ret = i915_gem_object_get_pages(obj);
- if (ret)
- return ERR_PTR(ret);
-
- obj->madv = I915_MADV_DONTNEED;
}
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ return ERR_PTR(ret);
+
list_move_tail(&obj->batch_pool_link, list);
- i915_gem_object_pin_pages(obj);
return obj;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index bb72af5320b0..ecef433fe396 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -85,6 +85,7 @@
*
*/
+#include <linux/log2.h>
#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
@@ -92,20 +93,8 @@
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
-/* This is a HW constraint. The value below is the largest known requirement
- * I've seen in a spec to date, and that was a workaround for a non-shipping
- * part. It should be safe to decrease this, but it's more future proof as is.
- */
-#define GEN6_CONTEXT_ALIGN (64<<10)
-#define GEN7_CONTEXT_ALIGN 4096
-
-static size_t get_context_alignment(struct drm_i915_private *dev_priv)
-{
- if (IS_GEN6(dev_priv))
- return GEN6_CONTEXT_ALIGN;
-
- return GEN7_CONTEXT_ALIGN;
-}
+/* Initial size (as log2) to preallocate the handle->object hashtable */
+#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
static int get_context_size(struct drm_i915_private *dev_priv)
{
@@ -134,6 +123,64 @@ static int get_context_size(struct drm_i915_private *dev_priv)
return ret;
}
+static void resize_vma_ht(struct work_struct *work)
+{
+ struct i915_gem_context *ctx =
+ container_of(work, typeof(*ctx), vma.resize);
+ unsigned int size, bits, new_bits, i;
+ struct hlist_head *new_ht;
+
+ bits = 1 + ilog2(4*ctx->vma.ht_count/3);
+ new_bits = min_t(unsigned int,
+ max(bits, VMA_HT_BITS),
+ sizeof(unsigned int)*8);
+ if (new_bits == ctx->vma.ht_bits)
+ goto out;
+
+ new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
+ if (!new_ht)
+ new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
+ if (!new_ht)
+ /* pretend the resize suceeded and stop calling us for a bit! */
+ goto out;
+
+ size = 1 << ctx->vma.ht_bits;
+ for (i = 0; i < size; i++) {
+ struct i915_vma *vma;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(vma, tmp, &ctx->vma.ht[i], ctx_node)
+ hlist_add_head(&vma->ctx_node,
+ &new_ht[hash_32(vma->ctx_handle,
+ new_bits)]);
+ }
+ kvfree(ctx->vma.ht);
+ ctx->vma.ht = new_ht;
+ ctx->vma.ht_bits = new_bits;
+ smp_wmb();
+out:
+ ctx->vma.ht_size = 1 << bits;
+}
+
+static void decouple_vma(struct i915_gem_context *ctx)
+{
+ unsigned int i, size;
+
+ if (ctx->vma.ht_size & 1)
+ cancel_work_sync(&ctx->vma.resize);
+
+ size = 1 << ctx->vma.ht_bits;
+ for (i = 0; i < size; i++) {
+ struct i915_vma *vma;
+
+ hlist_for_each_entry(vma, &ctx->vma.ht[i], ctx_node) {
+ vma->obj->vma_hashed = NULL;
+ vma->ctx = NULL;
+ }
+ }
+ kvfree(ctx->vma.ht);
+}
+
void i915_gem_context_free(struct kref *ctx_ref)
{
struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -143,6 +190,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
trace_i915_context_free(ctx);
GEM_BUG_ON(!ctx->closed);
+ decouple_vma(ctx);
i915_ppgtt_put(ctx->ppgtt);
for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -155,9 +203,11 @@ void i915_gem_context_free(struct kref *ctx_ref)
if (ce->ring)
intel_ring_free(ce->ring);
- i915_gem_object_put(ce->state);
+ __i915_gem_object_release_unless_active(ce->state->obj);
}
+ put_pid(ctx->pid);
+
list_del(&ctx->link);
ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
@@ -278,16 +328,36 @@ __create_hw_context(struct drm_device *dev,
list_add_tail(&ctx->link, &dev_priv->context_list);
ctx->i915 = dev_priv;
- ctx->ggtt_alignment = get_context_alignment(dev_priv);
+ ctx->vma.ht_bits = VMA_HT_BITS;
+ ctx->vma.ht_size = 1 << ctx->vma.ht_bits;
+ ctx->vma.ht = kzalloc(sizeof(*ctx->vma.ht)*ctx->vma.ht_size,
+ GFP_KERNEL);
+ if (!ctx->vma.ht)
+ goto err_out;
+
+ INIT_WORK(&ctx->vma.resize, resize_vma_ht);
if (dev_priv->hw_context_size) {
- struct drm_i915_gem_object *obj =
- i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = i915_gem_alloc_context_obj(dev,
+ dev_priv->hw_context_size);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
goto err_out;
}
- ctx->engine[RCS].state = obj;
+
+ vma = i915_gem_obj_lookup_or_create_vma(obj,
+ &dev_priv->ggtt.base,
+ NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ ret = PTR_ERR(vma);
+ goto err_out;
+ }
+
+ ctx->engine[RCS].state = vma;
}
/* Default context will never have a file_priv */
@@ -300,6 +370,9 @@ __create_hw_context(struct drm_device *dev,
ret = DEFAULT_CONTEXT_HANDLE;
ctx->file_priv = file_priv;
+ if (file_priv)
+ ctx->pid = get_task_pid(current, PIDTYPE_PID);
+
ctx->user_handle = ret;
/* NB: Mark all slices as needing a remap so that when the context first
* loads it will restore whatever remap state already exists. If there
@@ -399,7 +472,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
struct intel_context *ce = &ctx->engine[engine->id];
if (ce->state)
- i915_gem_object_ggtt_unpin(ce->state);
+ i915_vma_unpin(ce->state);
i915_gem_context_put(ctx);
}
@@ -572,17 +645,6 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
0;
int len, ret;
- /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
- * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
- * explicitly, so we rely on the value at ring init, stored in
- * itlb_before_ctx_switch.
- */
- if (IS_GEN6(dev_priv)) {
- ret = engine->emit_flush(req, EMIT_INVALIDATE);
- if (ret)
- return ret;
- }
-
/* These flags are for resource streamer on HSW+ */
if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
@@ -620,9 +682,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_SET_CONTEXT);
- intel_ring_emit(ring,
- i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) |
- flags);
+ intel_ring_emit(ring, req->ctx->engine[RCS].state->node.start | flags);
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
@@ -651,7 +711,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
MI_STORE_REGISTER_MEM |
MI_SRM_LRM_GLOBAL_GTT);
intel_ring_emit_reg(ring, last_reg);
- intel_ring_emit(ring, engine->scratch.gtt_offset);
+ intel_ring_emit(ring, engine->scratch->node.start);
intel_ring_emit(ring, MI_NOOP);
}
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
@@ -708,10 +768,10 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
}
static bool
-needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *engine,
- struct i915_gem_context *to)
+needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine)
{
+ struct i915_hw_ppgtt *last_ppgtt;
+
if (!ppgtt)
return false;
@@ -720,7 +780,9 @@ needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt,
return true;
/* Same context without new entries, skip */
- if (engine->last_context == to &&
+ last_ppgtt =
+ engine->last_context->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
+ if (last_ppgtt == ppgtt &&
!(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
return false;
@@ -750,6 +812,20 @@ needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
return false;
}
+static void flush_cpu_writes(struct drm_i915_gem_object *obj)
+{
+ if (obj->base.write_domain == 0)
+ return;
+
+ if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
+ if (i915_gem_clflush_object(obj, false))
+ i915_gem_chipset_flush(to_i915(obj->base.dev));
+ }
+
+ wmb();
+ obj->base.write_domain = 0;
+}
+
static int do_rcs_switch(struct drm_i915_gem_request *req)
{
struct i915_gem_context *to = req->ctx;
@@ -763,8 +839,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
return 0;
/* Trying to pin first makes error handling easier. */
- ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
- to->ggtt_alignment, 0);
+ ret = i915_vma_pin(to->engine[RCS].state, 0, 0, PIN_GLOBAL);
if (ret)
return ret;
@@ -778,18 +853,11 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
from = engine->last_context;
/*
- * Clear this page out of any CPU caches for coherent swap-in/out. Note
- * that thanks to write = false in this call and us not setting any gpu
- * write domains when putting a context object onto the active list
- * (when switching away from it), this won't block.
- *
- * XXX: We need a real interface to do this instead of trickery.
+ * Clear this page out of any CPU caches for coherent swap-in/out.
*/
- ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false);
- if (ret)
- goto unpin_out;
+ flush_cpu_writes(to->engine[RCS].state->obj);
- if (needs_pd_load_pre(ppgtt, engine, to)) {
+ if (needs_pd_load_pre(ppgtt, engine)) {
/* Older GENs and non render rings still want the load first,
* "PP_DCLV followed by PP_DIR_BASE register through Load
* Register Immediate commands in Ring Buffer before submitting
@@ -797,7 +865,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
trace_switch_mm(engine, to);
ret = ppgtt->switch_mm(ppgtt, req);
if (ret)
- goto unpin_out;
+ goto unpin_vma;
}
if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
@@ -814,7 +882,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
ret = mi_set_context(req, hw_flags);
if (ret)
- goto unpin_out;
+ goto unpin_vma;
}
/* The backing object for the context is done after switching to the
@@ -824,8 +892,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
* MI_SET_CONTEXT instead of when the next seqno has completed.
*/
if (from != NULL) {
- struct drm_i915_gem_object *obj = from->engine[RCS].state;
-
/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
* whole damn pipeline, we don't need to explicitly mark the
* object dirty. The only exception is that the context must be
@@ -833,11 +899,10 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
* able to defer doing this until we know the object would be
* swapped, but there is no way to do that yet.
*/
- obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
- i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0);
-
+ i915_vma_move_to_active(from->engine[RCS].state, req, 0);
/* obj is kept alive until the next request by its active ref */
- i915_gem_object_ggtt_unpin(obj);
+ i915_vma_unpin(from->engine[RCS].state);
+
i915_gem_context_put(from);
}
engine->last_context = i915_gem_context_get(to);
@@ -882,8 +947,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
return 0;
-unpin_out:
- i915_gem_object_ggtt_unpin(to->engine[RCS].state);
+unpin_vma:
+ i915_vma_unpin(to->engine[RCS].state);
return ret;
}
@@ -913,7 +978,7 @@ int i915_switch_context(struct drm_i915_gem_request *req)
struct i915_hw_ppgtt *ppgtt =
to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
- if (needs_pd_load_pre(ppgtt, engine, to)) {
+ if (needs_pd_load_pre(ppgtt, engine)) {
int ret;
trace_switch_mm(engine, to);
@@ -955,7 +1020,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
return PTR_ERR(req);
ret = i915_switch_context(req);
- i915_add_request_no_flush(req);
+ i915_add_request(req);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index c60a8d5bbad0..2e62241f1e37 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -44,51 +44,42 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
struct scatterlist *src, *dst;
int ret, i;
- ret = i915_mutex_lock_interruptible(obj->base.dev);
+ ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err;
- ret = i915_gem_object_get_pages(obj);
- if (ret)
- goto err_unlock;
-
- i915_gem_object_pin_pages(obj);
-
/* Copy sg so that we make an independent mapping */
st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
if (st == NULL) {
ret = -ENOMEM;
- goto err_unpin;
+ goto err_put_pages;
}
- ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL);
+ ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
if (ret)
goto err_free;
- src = obj->pages->sgl;
+ src = obj->mm.pages->sgl;
dst = st->sgl;
- for (i = 0; i < obj->pages->nents; i++) {
+ for (i = 0; i < obj->mm.pages->nents; i++) {
sg_set_page(dst, sg_page(src), src->length, 0);
dst = sg_next(dst);
src = sg_next(src);
}
if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
- ret =-ENOMEM;
+ ret = -ENOMEM;
goto err_free_sg;
}
- mutex_unlock(&obj->base.dev->struct_mutex);
return st;
err_free_sg:
sg_free_table(st);
err_free:
kfree(st);
-err_unpin:
+err_put_pages:
i915_gem_object_unpin_pages(obj);
-err_unlock:
- mutex_unlock(&obj->base.dev->struct_mutex);
err:
return ERR_PTR(ret);
}
@@ -103,36 +94,21 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
sg_free_table(sg);
kfree(sg);
- mutex_lock(&obj->base.dev->struct_mutex);
i915_gem_object_unpin_pages(obj);
- mutex_unlock(&obj->base.dev->struct_mutex);
}
static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
- struct drm_device *dev = obj->base.dev;
- void *addr;
- int ret;
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ERR_PTR(ret);
-
- addr = i915_gem_object_pin_map(obj);
- mutex_unlock(&dev->struct_mutex);
-
- return addr;
+ return i915_gem_object_pin_map(obj, !HAS_LLC(obj->base.dev));
}
static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
- struct drm_device *dev = obj->base.dev;
- mutex_lock(&dev->struct_mutex);
i915_gem_object_unpin_map(obj);
- mutex_unlock(&dev->struct_mutex);
}
static void *i915_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num)
@@ -179,15 +155,22 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
struct drm_device *dev = obj->base.dev;
- int ret;
bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
+ int ret;
- ret = i915_mutex_lock_interruptible(dev);
+ ret = i915_gem_object_pin_pages(obj);
if (ret)
return ret;
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret)
+ goto err;
+
ret = i915_gem_object_set_to_cpu_domain(obj, write);
mutex_unlock(&dev->struct_mutex);
+
+err:
+ i915_gem_object_unpin_pages(obj);
return ret;
}
@@ -197,13 +180,19 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
struct drm_device *dev = obj->base.dev;
int ret;
- ret = i915_mutex_lock_interruptible(dev);
+ ret = i915_gem_object_pin_pages(obj);
if (ret)
return ret;
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret)
+ goto err;
+
ret = i915_gem_object_set_to_gtt_domain(obj, false);
mutex_unlock(&dev->struct_mutex);
+err:
+ i915_gem_object_unpin_pages(obj);
return ret;
}
@@ -291,22 +280,18 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
return dma_buf;
}
-static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
+static struct sg_table *
+i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
{
- struct sg_table *sg;
-
- sg = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL);
- if (IS_ERR(sg))
- return PTR_ERR(sg);
-
- obj->pages = sg;
- return 0;
+ return dma_buf_map_attachment(obj->base.import_attach,
+ DMA_BIDIRECTIONAL);
}
-static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj)
+static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
{
- dma_buf_unmap_attachment(obj->base.import_attach,
- obj->pages, DMA_BIDIRECTIONAL);
+ dma_buf_unmap_attachment(obj->base.import_attach, pages,
+ DMA_BIDIRECTIONAL);
}
static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index f76c06e92677..7e27a59f5069 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -26,6 +26,8 @@
*
*/
+#include <linux/random.h>
+
#include <drm/drmP.h>
#include <drm/i915_drm.h>
@@ -46,17 +48,32 @@ gpu_is_idle(struct drm_i915_private *dev_priv)
return true;
}
-static bool
-mark_free(struct i915_vma *vma, struct list_head *unwind)
+static u64
+randomize_offset(u64 start, u64 end, u64 len, u64 align)
{
- if (i915_vma_is_pinned(vma))
- return false;
+ u64 range = end - len - start;
+ u64 addr;
+
+ if (align == 0)
+ align = 4096;
+
+ if (range) {
+ if (sizeof(unsigned long) == sizeof(u64)) {
+ addr = get_random_long();
+ } else {
+ addr = get_random_int();
+ if (range > 0xffffffff) {
+ addr <<= 32;
+ addr |= get_random_int();
+ }
+ }
- if (WARN_ON(!list_empty(&vma->exec_list)))
- return false;
+ div64_u64_rem(addr, range, &addr);
+ } else {
+ addr = 0;
+ }
- list_add(&vma->exec_list, unwind);
- return drm_mm_scan_add_block(&vma->node);
+ return (addr + start) & -align;
}
/**
@@ -84,12 +101,14 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
*/
int
i915_gem_evict_something(struct i915_address_space *vm,
- u64 min_size, u64 alignment,
+ struct drm_mm_node *node,
+ u64 size, u64 alignment,
unsigned cache_level,
u64 start, u64 end,
unsigned flags)
{
struct drm_i915_private *dev_priv = to_i915(vm->dev);
+ struct drm_mm_scan scan;
struct list_head eviction_list;
struct list_head *phases[] = {
&vm->inactive_list,
@@ -99,7 +118,24 @@ i915_gem_evict_something(struct i915_address_space *vm,
struct i915_vma *vma, *next;
int ret;
- trace_i915_gem_evict(vm, min_size, alignment, flags);
+ trace_i915_gem_evict(vm, size, alignment, flags);
+ lockdep_assert_held(&vm->dev->struct_mutex);
+
+ if (flags & PIN_MAPPABLE) {
+ node->start = randomize_offset(start, end, size, alignment);
+ node->size = size;
+ node->color = cache_level;
+ if (node->start + size <= end) {
+ ret = i915_gem_evict_for_node(vm, node, flags);
+ if (ret == 0)
+ ret = drm_mm_reserve_node(&vm->mm, node);
+ if (ret == 0)
+ return 0;
+ }
+ }
+
+ if (flags & PIN_GLOBAL)
+ i915_gem_retire_requests(dev_priv);
/*
* The goal is to evict objects and amalgamate space in LRU order.
@@ -114,12 +150,9 @@ i915_gem_evict_something(struct i915_address_space *vm,
* On each list, the oldest objects lie at the HEAD with the freshest
* object on the TAIL.
*/
- if (start != 0 || end != vm->total) {
- drm_mm_init_scan_with_range(&vm->mm, min_size,
- alignment, cache_level,
- start, end);
- } else
- drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
+ drm_mm_init_scan_with_range(&scan, size, alignment, cache_level,
+ start, end,
+ flags & PIN_HIGH ? DRM_MM_INSERT_HIGH : 0);
if (flags & PIN_NONBLOCK)
phases[1] = NULL;
@@ -128,17 +161,29 @@ search_again:
INIT_LIST_HEAD(&eviction_list);
phase = phases;
do {
- list_for_each_entry(vma, *phase, vm_link)
- if (mark_free(vma, &eviction_list))
+ list_for_each_entry(vma, *phase, vm_link) {
+ if (vma->node.start >= end)
+ continue;
+
+ if (vma->node.start + vma->node.size <= start)
+ continue;
+
+ if (i915_vma_is_pinned(vma))
+ continue;
+
+ if (flags & PIN_NOFAULT && vma->obj->fault_mappable)
+ continue;
+
+ list_add(&vma->evict_link, &eviction_list);
+ if (drm_mm_scan_add_block(&scan, &vma->node))
goto found;
+ }
} while (*++phase);
/* Nothing found, clean up and bail out! */
- list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
- ret = drm_mm_scan_remove_block(&vma->node);
+ list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
+ ret = drm_mm_scan_remove_block(&scan, &vma->node);
BUG_ON(ret);
-
- INIT_LIST_HEAD(&vma->exec_list);
}
/* Can we unpin some objects such as idle hw contents,
@@ -157,6 +202,11 @@ search_again:
return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC;
}
+ /* This is quite time consuming with large GTT */
+ cond_resched();
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
/* Not everything in the GGTT is tracked via vma (otherwise we
* could evict as required with minimal stalling) so we are forced
* to idle the GPU and explicitly retire outstanding requests in
@@ -172,6 +222,13 @@ search_again:
return ret;
i915_gem_retire_requests(dev_priv);
+
+ if (drm_mm_insert_node_in_range_generic(&vm->mm, node,
+ size, alignment, cache_level,
+ start, end,
+ flags & PIN_HIGH ? DRM_MM_INSERT_HIGH : 0) == 0)
+ return 0;
+
goto search_again;
found:
@@ -181,64 +238,106 @@ found:
* calling unbind (which may remove the active reference
* of any of our objects, thus corrupting the list).
*/
- list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
- if (drm_mm_scan_remove_block(&vma->node))
+ list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
+ if (drm_mm_scan_remove_block(&scan, &vma->node))
__i915_vma_pin(vma);
else
- list_del_init(&vma->exec_list);
+ list_del(&vma->evict_link);
}
/* Unbinding will emit any required flushes */
- while (!list_empty(&eviction_list)) {
- vma = list_first_entry(&eviction_list,
- struct i915_vma,
- exec_list);
-
- list_del_init(&vma->exec_list);
+ ret = 0;
+ list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
__i915_vma_unpin(vma);
if (ret == 0)
ret = i915_vma_unbind(vma);
}
+ if (ret == 0)
+ ret = drm_mm_insert_node_in_range_generic(&vm->mm, node,
+ size, alignment,
+ cache_level,
+ start, end,
+ DRM_MM_INSERT_EVICT);
return ret;
}
-int
-i915_gem_evict_for_vma(struct i915_vma *target)
+int i915_gem_evict_for_node(struct i915_address_space *vm,
+ struct drm_mm_node *target,
+ unsigned int flags)
{
- struct drm_mm_node *node, *next;
+ struct list_head eviction_list;
+ struct drm_mm_node *node;
+ u64 start = target->start;
+ u64 end = start + target->size - 1;
+ struct i915_vma *vma, *next;
+ bool check_snoop;
+ int ret = 0;
+
+ trace_i915_gem_evict_node(vm, target, flags);
+ lockdep_assert_held(&vm->dev->struct_mutex);
+
+ check_snoop = vm->mm.color_adjust;
+ if (check_snoop) {
+ if (start > vm->start)
+ start -= 4096;
+ if (end < vm->start + vm->total)
+ end += 4096;
+ }
- list_for_each_entry_safe(node, next,
- &target->vm->mm.head_node.node_list,
- node_list) {
- struct i915_vma *vma;
- int ret;
+ node = drm_mm_interval_first(&vm->mm, start, end);
+ if (!node)
+ return 0;
- if (node->start + node->size <= target->node.start)
- continue;
- if (node->start >= target->node.start + target->node.size)
+ INIT_LIST_HEAD(&eviction_list);
+ vma = container_of(node, typeof(*vma), node);
+ list_for_each_entry_from(vma,
+ &vm->mm.head_node.node_list,
+ node.node_list) {
+ if (vma->node.start > end)
break;
- vma = container_of(node, typeof(*vma), node);
+ if (check_snoop) {
+ if (vma->node.start + vma->node.size == target->start) {
+ if (vma->node.color == target->color)
+ continue;
+ }
+ if (vma->node.start == target->start + target->size) {
+ if (vma->node.color == target->color)
+ continue;
+ }
+ }
- if (i915_vma_is_pinned(vma)) {
- if (!vma->exec_entry || i915_vma_pin_count(vma) > 1)
- /* Object is pinned for some other use */
- return -EBUSY;
+ if (vma->node.color == -1) {
+ ret = -ENOSPC;
+ break;
+ }
- /* We need to evict a buffer in the same batch */
- if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
- /* Overlapping fixed objects in the same batch */
- return -EINVAL;
+ if (flags & PIN_NONBLOCK &&
+ (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) {
+ ret = -ENOSPC;
+ break;
+ }
- return -ENOSPC;
+ /* Overlap of objects in the same batch? */
+ if (i915_vma_is_pinned(vma)) {
+ ret = -ENOSPC;
+ if (vma->exec_entry &&
+ vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+ ret = -EINVAL;
+ break;
}
- ret = i915_vma_unbind(vma);
- if (ret)
- return ret;
+ __i915_vma_pin(vma);
+ list_add(&vma->evict_link, &eviction_list);
+ }
+
+ list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
+ __i915_vma_unpin(vma);
+ if (ret == 0)
+ ret = i915_vma_unbind(vma);
}
- return 0;
+ return ret;
}
/**
@@ -255,34 +354,47 @@ i915_gem_evict_for_vma(struct i915_vma *target)
* To clarify: This is for freeing up virtual address space, not for freeing
* memory in e.g. the shrinker.
*/
-int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
+int i915_gem_evict_vm(struct i915_address_space *vm)
{
- struct i915_vma *vma, *next;
- int ret;
+ struct list_head *phases[] = {
+ &vm->inactive_list,
+ &vm->active_list,
+ NULL
+ }, **phase;
+ struct list_head eviction_list;
+ struct i915_vma *vma, *vn;
+ int ret = 0;
- WARN_ON(!mutex_is_locked(&vm->dev->struct_mutex));
trace_i915_gem_evict_vm(vm);
+ lockdep_assert_held(&vm->dev->struct_mutex);
- if (do_idle) {
- struct drm_i915_private *dev_priv = to_i915(vm->dev);
-
- if (i915_is_ggtt(vm)) {
- ret = i915_gem_switch_to_kernel_context(dev_priv);
- if (ret)
- return ret;
- }
-
- ret = i915_gem_wait_for_idle(dev_priv, true);
+ /* Switch back to the default context in order to unpin
+ * the existing context objects. However, such objects only
+ * pin themselves inside the global GTT and performing the
+ * switch otherwise is ineffective.
+ */
+ if (i915_is_ggtt(vm)) {
+ ret = i915_gem_switch_to_kernel_context(to_i915(vm->dev));
if (ret)
return ret;
-
- i915_gem_retire_requests(dev_priv);
- WARN_ON(!list_empty(&vm->active_list));
}
- list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link)
- if (!i915_vma_is_pinned(vma))
- WARN_ON(i915_vma_unbind(vma));
+ INIT_LIST_HEAD(&eviction_list);
+ phase = phases;
+ do {
+ list_for_each_entry_safe(vma, vn, *phase, vm_link) {
+ if (i915_vma_is_pinned(vma))
+ continue;
+
+ __i915_vma_pin(vma);
+ list_add(&vma->evict_link, &eviction_list);
+ }
+ } while (*++phase);
- return 0;
+ list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
+ __i915_vma_unpin(vma);
+ if (ret == 0)
+ ret = i915_vma_unbind(vma);
+ }
+ return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c494b79ded20..f72e8e68e394 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
#include <linux/dma_remapping.h>
#include <linux/reservation.h>
+#include <linux/sync_file.h>
#include <linux/uaccess.h>
#include <drm/drmP.h>
@@ -39,455 +40,903 @@
#include "intel_drv.h"
#include "intel_frontbuffer.h"
-#define __EXEC_OBJECT_HAS_PIN (1<<31)
-#define __EXEC_OBJECT_HAS_FENCE (1<<30)
-#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
-#define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
+#define DBG_USE_CPU_RELOC 0 /* force relocations to use the CPU write path */
+
+#define __EXEC_OBJECT_HAS_PIN BIT(31)
+#define __EXEC_OBJECT_HAS_FENCE BIT(30)
+#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
+#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */
+#define __EB_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
+
+#define __EXEC_HAS_RELOC BIT(31)
+#define __EXEC_VALIDATED BIT(30)
+#define UPDATE PIN_OFFSET_FIXED
#define BATCH_OFFSET_BIAS (256*1024)
-struct i915_execbuffer_params {
- struct drm_device *dev;
- struct drm_file *file;
- struct i915_vma *batch;
- u32 dispatch_flags;
- u32 args_batch_start_offset;
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
- struct drm_i915_gem_request *request;
+struct i915_execbuffer {
+ struct drm_i915_private *i915;
+ struct drm_file *file;
+ struct drm_i915_gem_execbuffer2 *args;
+ struct drm_i915_gem_exec_object2 *exec;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ struct i915_address_space *vm;
+ struct i915_vma *batch;
+ struct drm_i915_gem_request *request;
+ struct list_head unbound;
+ struct list_head relocs;
+ struct reloc_cache {
+ struct drm_mm_node node;
+ unsigned long vaddr;
+ unsigned int page;
+ bool use_64bit_reloc;
+ bool has_llc;
+ bool has_fence;
+ } reloc_cache;
+ u64 invalid_flags;
+ u32 context_flags;
+ u32 dispatch_flags;
+ int lut_mask;
+ struct hlist_head *buckets;
};
-struct eb_vmas {
- struct list_head vmas;
- int and;
- union {
- struct i915_vma *lut[0];
- struct hlist_head buckets[0];
- };
-};
+#define to_ptr(T, x) ((T *)(uintptr_t)(x))
-static struct eb_vmas *
-eb_create(struct drm_i915_gem_execbuffer2 *args)
+/* Used to convert any address to canonical form.
+ * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
+ * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
+ * addresses to be in a canonical form:
+ * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
+ * canonical form [63:48] == [47]."
+ */
+#define GEN8_HIGH_ADDRESS_BIT 47
+static inline u64 gen8_canonical_addr(u64 address)
{
- struct eb_vmas *eb = NULL;
-
- if (args->flags & I915_EXEC_HANDLE_LUT) {
- unsigned size = args->buffer_count;
- size *= sizeof(struct i915_vma *);
- size += sizeof(struct eb_vmas);
- eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
- }
-
- if (eb == NULL) {
- unsigned size = args->buffer_count;
- unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
- BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
- while (count > 2*size)
- count >>= 1;
- eb = kzalloc(count*sizeof(struct hlist_head) +
- sizeof(struct eb_vmas),
- GFP_TEMPORARY);
- if (eb == NULL)
- return eb;
-
- eb->and = count - 1;
- } else
- eb->and = -args->buffer_count;
-
- INIT_LIST_HEAD(&eb->vmas);
- return eb;
+ return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
}
-static void
-eb_reset(struct eb_vmas *eb)
+static inline u64 gen8_noncanonical_addr(u64 address)
{
- if (eb->and >= 0)
- memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
+ return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
}
-static struct i915_vma *
-eb_get_batch(struct eb_vmas *eb)
+static int
+eb_create(struct i915_execbuffer *eb)
{
- struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
+ if ((eb->args->flags & I915_EXEC_HANDLE_LUT) == 0) {
+ unsigned int size = 1 + ilog2(eb->args->buffer_count);
- /*
- * SNA is doing fancy tricks with compressing batch buffers, which leads
- * to negative relocation deltas. Usually that works out ok since the
- * relocate address is still positive, except when the batch is placed
- * very low in the GTT. Ensure this doesn't happen.
- *
- * Note that actual hangs have only been observed on gen7, but for
- * paranoia do it everywhere.
- */
- if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
- vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ do {
+ eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
+ GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+ if (eb->buckets)
+ break;
+ } while (--size);
- return vma;
+ if (unlikely(!eb->buckets)) {
+ eb->buckets = kzalloc(sizeof(struct hlist_head),
+ GFP_TEMPORARY);
+ if (unlikely(!eb->buckets))
+ return -ENOMEM;
+ }
+
+ eb->lut_mask = size;
+ } else
+ eb->lut_mask = -eb->args->buffer_count;
+
+ return 0;
}
-static int
-eb_lookup_vmas(struct eb_vmas *eb,
- struct drm_i915_gem_exec_object2 *exec,
- const struct drm_i915_gem_execbuffer2 *args,
- struct i915_address_space *vm,
- struct drm_file *file)
+static bool
+eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
+ const struct i915_vma *vma)
{
- struct drm_i915_gem_object *obj;
- struct list_head objects;
- int i, ret;
+ if ((entry->flags & __EXEC_OBJECT_HAS_PIN) == 0)
+ return true;
- INIT_LIST_HEAD(&objects);
- spin_lock(&file->table_lock);
- /* Grab a reference to the object and release the lock so we can lookup
- * or create the VMA without using GFP_ATOMIC */
- for (i = 0; i < args->buffer_count; i++) {
- obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
- if (obj == NULL) {
- spin_unlock(&file->table_lock);
- DRM_DEBUG("Invalid object handle %d at index %d\n",
- exec[i].handle, i);
- ret = -ENOENT;
- goto err;
- }
+ if (vma->node.size < entry->pad_to_size)
+ return true;
- if (!list_empty(&obj->obj_exec_link)) {
- spin_unlock(&file->table_lock);
- DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
- obj, exec[i].handle, i);
- ret = -EINVAL;
- goto err;
- }
+ if (entry->alignment && vma->node.start & (entry->alignment - 1))
+ return true;
- i915_gem_object_get(obj);
- list_add_tail(&obj->obj_exec_link, &objects);
- }
- spin_unlock(&file->table_lock);
+ if (entry->flags & EXEC_OBJECT_PINNED &&
+ vma->node.start != entry->offset)
+ return true;
- i = 0;
- while (!list_empty(&objects)) {
- struct i915_vma *vma;
+ if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
+ vma->node.start < BATCH_OFFSET_BIAS)
+ return true;
- obj = list_first_entry(&objects,
- struct drm_i915_gem_object,
- obj_exec_link);
+ if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
+ (vma->node.start + vma->node.size - 1) >> 32)
+ return true;
- /*
- * NOTE: We can leak any vmas created here when something fails
- * later on. But that's no issue since vma_unbind can deal with
- * vmas which are not actually bound. And since only
- * lookup_or_create exists as an interface to get at the vma
- * from the (obj, vm) we don't run the risk of creating
- * duplicated vmas for the same vm.
- */
- vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
- if (IS_ERR(vma)) {
- DRM_DEBUG("Failed to lookup VMA\n");
- ret = PTR_ERR(vma);
- goto err;
- }
+ return false;
+}
- /* Transfer ownership from the objects list to the vmas list. */
- list_add_tail(&vma->exec_list, &eb->vmas);
- list_del_init(&obj->obj_exec_link);
+static void
+eb_pin_vma(struct i915_execbuffer *eb,
+ struct drm_i915_gem_exec_object2 *entry,
+ struct i915_vma *vma)
+{
+ u64 flags;
- vma->exec_entry = &exec[i];
- if (eb->and < 0) {
- eb->lut[i] = vma;
- } else {
- uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
- vma->exec_handle = handle;
- hlist_add_head(&vma->exec_node,
- &eb->buckets[handle & eb->and]);
- }
- ++i;
- }
+ flags = entry->offset & PIN_OFFSET_MASK;
+ if (vma->node.size && flags != vma->node.start)
+ flags = vma->node.start | PIN_NOEVICT;
- return 0;
+ flags |= PIN_USER | PIN_NONBLOCK | PIN_OFFSET_FIXED;
+ if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT))
+ flags |= PIN_GLOBAL;
+ if (unlikely(i915_vma_pin(vma, 0, 0, flags)))
+ return;
-err:
- while (!list_empty(&objects)) {
- obj = list_first_entry(&objects,
- struct drm_i915_gem_object,
- obj_exec_link);
- list_del_init(&obj->obj_exec_link);
- i915_gem_object_put(obj);
+ if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+ if (unlikely(i915_vma_get_fence(vma))) {
+ i915_vma_unpin(vma);
+ return;
+ }
+
+ if (i915_vma_pin_fence(vma))
+ entry->flags |= __EXEC_OBJECT_HAS_FENCE;
}
- /*
- * Objects already transfered to the vmas list will be unreferenced by
- * eb_destroy.
- */
- return ret;
+ entry->flags |= __EXEC_OBJECT_HAS_PIN;
}
-static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
+static inline void
+__eb_unreserve_vma(struct i915_vma *vma,
+ const struct drm_i915_gem_exec_object2 *entry)
{
- if (eb->and < 0) {
- if (handle >= -eb->and)
- return NULL;
- return eb->lut[handle];
- } else {
- struct hlist_head *head;
- struct i915_vma *vma;
+ GEM_BUG_ON((entry->flags & __EXEC_OBJECT_HAS_PIN) == 0);
- head = &eb->buckets[handle & eb->and];
- hlist_for_each_entry(vma, head, exec_node) {
- if (vma->exec_handle == handle)
- return vma;
- }
- return NULL;
+ if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
+ i915_vma_unpin_fence(vma);
+
+ __i915_vma_unpin(vma);
+}
+
+static inline void
+eb_unreserve_vma(struct i915_vma *vma,
+ struct drm_i915_gem_exec_object2 *entry)
+{
+ if (entry->flags & __EXEC_OBJECT_HAS_PIN) {
+ __eb_unreserve_vma(vma, entry);
+ entry->flags &= ~__EB_RESERVED;
}
}
-static void
-i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
+static int
+eb_add_vma(struct i915_execbuffer *eb,
+ struct drm_i915_gem_exec_object2 *entry,
+ struct i915_vma *vma)
{
- struct drm_i915_gem_exec_object2 *entry;
- struct drm_i915_gem_object *obj = vma->obj;
+ int ret;
- if (!drm_mm_node_allocated(&vma->node))
- return;
+ GEM_BUG_ON(i915_vma_is_closed(vma));
- entry = vma->exec_entry;
+ if ((eb->args->flags & __EXEC_VALIDATED) == 0) {
+ if (unlikely(entry->flags & eb->invalid_flags))
+ return -EINVAL;
- if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
- i915_gem_object_unpin_fence(obj);
+ if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
+ return -EINVAL;
- if (entry->flags & __EXEC_OBJECT_HAS_PIN)
- __i915_vma_unpin(vma);
+ /* Offset can be used as input (EXEC_OBJECT_PINNED), reject
+ * any non-page-aligned or non-canonical addresses.
+ */
+ if (entry->flags & EXEC_OBJECT_PINNED) {
+ if (unlikely(entry->offset !=
+ gen8_canonical_addr(entry->offset & PAGE_MASK)))
+ return -EINVAL;
+ }
- entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
-}
+ /* From drm_mm perspective address space is continuous,
+ * so from this point we're always using non-canonical
+ * form internally.
+ */
+ entry->offset = gen8_noncanonical_addr(entry->offset);
-static void eb_destroy(struct eb_vmas *eb)
-{
- while (!list_empty(&eb->vmas)) {
- struct i915_vma *vma;
+ /* pad_to_size was once a reserved field, so sanitize it */
+ if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
+ if (unlikely(offset_in_page(entry->pad_to_size)))
+ return -EINVAL;
+ } else {
+ entry->pad_to_size = 0;
+ }
- vma = list_first_entry(&eb->vmas,
- struct i915_vma,
- exec_list);
- list_del_init(&vma->exec_list);
- i915_gem_execbuffer_unreserve_vma(vma);
- i915_gem_object_put(vma->obj);
+ if (unlikely(vma->exec_entry)) {
+ DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
+ entry->handle, (int)(entry - eb->exec));
+ return -EINVAL;
+ }
}
- kfree(eb);
-}
-static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
-{
- return (HAS_LLC(obj->base.dev) ||
- obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
- obj->cache_level != I915_CACHE_NONE);
-}
+ vma->exec_entry = entry;
+ entry->rsvd2 = (uintptr_t)vma;
-/* Used to convert any address to canonical form.
- * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
- * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
- * addresses to be in a canonical form:
- * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
- * canonical form [63:48] == [47]."
- */
-#define GEN8_HIGH_ADDRESS_BIT 47
-static inline uint64_t gen8_canonical_addr(uint64_t address)
-{
- return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
-}
+ if (eb->lut_mask >= 0) {
+ vma->exec_handle = entry->handle;
+ hlist_add_head(&vma->exec_node,
+ &eb->buckets[hash_32(entry->handle,
+ eb->lut_mask)]);
+ }
-static inline uint64_t gen8_noncanonical_addr(uint64_t address)
-{
- return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
+ if (entry->relocation_count)
+ list_add_tail(&vma->reloc_link, &eb->relocs);
+
+ if (!eb->reloc_cache.has_fence) {
+ entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
+ } else {
+ if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+ i915_gem_object_is_tiled(vma->obj))
+ entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
+ }
+
+ if ((entry->flags & EXEC_OBJECT_PINNED) == 0)
+ entry->flags |= eb->context_flags;
+
+ ret = 0;
+ eb_pin_vma(eb, entry, vma);
+ if (eb_vma_misplaced(entry, vma)) {
+ eb_unreserve_vma(vma, entry);
+
+ list_add_tail(&vma->exec_link, &eb->unbound);
+ if (drm_mm_node_allocated(&vma->node))
+ ret = i915_vma_unbind(vma);
+ } else {
+ if (entry->offset != vma->node.start) {
+ entry->offset = vma->node.start | UPDATE;
+ eb->args->flags |= __EXEC_HAS_RELOC;
+ }
+ }
+ return ret;
}
-static inline uint64_t
-relocation_target(struct drm_i915_gem_relocation_entry *reloc,
- uint64_t target_offset)
+static inline int use_cpu_reloc(const struct reloc_cache *cache,
+ const struct drm_i915_gem_object *obj)
{
- return gen8_canonical_addr((int)reloc->delta + target_offset);
+ if (!i915_gem_object_has_struct_page(obj))
+ return false;
+
+ if (DBG_USE_CPU_RELOC)
+ return DBG_USE_CPU_RELOC > 0;
+
+ return (cache->has_llc ||
+ obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
+ obj->cache_level != I915_CACHE_NONE);
}
static int
-relocate_entry_cpu(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_relocation_entry *reloc,
- uint64_t target_offset)
+eb_reserve_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
{
- struct drm_device *dev = obj->base.dev;
- uint32_t page_offset = offset_in_page(reloc->offset);
- uint64_t delta = relocation_target(reloc, target_offset);
- char *vaddr;
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+ u64 flags;
int ret;
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret)
- return ret;
+ flags = PIN_USER | PIN_NONBLOCK;
+ if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
+ flags |= PIN_GLOBAL;
+
+ if (!drm_mm_node_allocated(&vma->node)) {
+ /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
+ * limit address to the first 4GBs for unflagged objects.
+ */
+ if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
+ flags |= PIN_ZONE_4G;
- vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
- reloc->offset >> PAGE_SHIFT));
- *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
+ flags |= PIN_MAPPABLE;
- if (INTEL_INFO(dev)->gen >= 8) {
- page_offset = offset_in_page(page_offset + sizeof(uint32_t));
+ if (entry->flags & EXEC_OBJECT_PINNED) {
+ flags |= entry->offset | PIN_OFFSET_FIXED;
+ /* force overlapping PINNED checks */
+ flags &= ~PIN_NONBLOCK;
+ } else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
+ flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+ }
- if (page_offset == 0) {
- kunmap_atomic(vaddr);
- vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
- (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
- }
+ ret = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags);
+ if (ret)
+ return ret;
- *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
+ if (entry->offset != vma->node.start) {
+ entry->offset = vma->node.start | UPDATE;
+ eb->args->flags |= __EXEC_HAS_RELOC;
}
+ entry->flags |= __EXEC_OBJECT_HAS_PIN;
+
+ if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+ ret = i915_vma_get_fence(vma);
+ if (ret)
+ return ret;
- kunmap_atomic(vaddr);
+ if (i915_vma_pin_fence(vma))
+ entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+ }
+ GEM_BUG_ON(eb_vma_misplaced(entry, vma));
return 0;
}
-static int
-relocate_entry_gtt(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_relocation_entry *reloc,
- uint64_t target_offset)
+static int eb_reserve(struct i915_execbuffer *eb)
{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
- uint64_t delta = relocation_target(reloc, target_offset);
- uint64_t offset;
- void __iomem *reloc_page;
+ const unsigned int count = eb->args->buffer_count;
+ struct list_head last;
+ struct i915_vma *vma;
+ unsigned int i, pass;
int ret;
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret)
- return ret;
-
- ret = i915_gem_object_put_fence(obj);
- if (ret)
- return ret;
+ /* Attempt to pin all of the buffers into the GTT.
+ * This is done in 3 phases:
+ *
+ * 1a. Unbind all objects that do not match the GTT constraints for
+ * the execbuffer (fenceable, mappable, alignment etc).
+ * 1b. Increment pin count for already bound objects.
+ * 2. Bind new objects.
+ * 3. Decrement pin count.
+ *
+ * This avoid unnecessary unbinding of later objects in order to make
+ * room for the earlier objects *unless* we need to defragment.
+ */
- /* Map the page containing the relocation we're going to perform. */
- offset = i915_gem_obj_ggtt_offset(obj);
- offset += reloc->offset;
- reloc_page = io_mapping_map_atomic_wc(ggtt->mappable,
- offset & PAGE_MASK);
- iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
-
- if (INTEL_INFO(dev)->gen >= 8) {
- offset += sizeof(uint32_t);
-
- if (offset_in_page(offset) == 0) {
- io_mapping_unmap_atomic(reloc_page);
- reloc_page =
- io_mapping_map_atomic_wc(ggtt->mappable,
- offset);
+ pass = 0;
+ ret = 0;
+ do {
+ list_for_each_entry(vma, &eb->unbound, exec_link) {
+ ret = eb_reserve_vma(eb, vma);
+ if (ret)
+ break;
}
+ if (ret != -ENOSPC || pass++)
+ return ret;
- iowrite32(upper_32_bits(delta),
- reloc_page + offset_in_page(offset));
- }
+ /* Resort *all* the objects into priority order */
+ INIT_LIST_HEAD(&eb->unbound);
+ INIT_LIST_HEAD(&last);
+ for (i = 0; i < count; i++) {
+ struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
- io_mapping_unmap_atomic(reloc_page);
+ vma = to_ptr(struct i915_vma, entry->rsvd2);
+ eb_unreserve_vma(vma, entry);
- return 0;
+ if (entry->flags & EXEC_OBJECT_PINNED)
+ list_add(&vma->exec_link, &eb->unbound);
+ else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
+ list_add_tail(&vma->exec_link, &eb->unbound);
+ else
+ list_add_tail(&vma->exec_link, &last);
+ }
+ list_splice_tail(&last, &eb->unbound);
+
+ /* Too fragmented, unbind everything and retry */
+ ret = i915_gem_evict_vm(eb->vm);
+ if (ret)
+ return ret;
+ } while (1);
}
-static void
-clflush_write32(void *addr, uint32_t value)
+static inline struct hlist_head *
+ht_head(const struct i915_gem_context *ctx, u32 handle)
{
- /* This is not a fast path, so KISS. */
- drm_clflush_virt_range(addr, sizeof(uint32_t));
- *(uint32_t *)addr = value;
- drm_clflush_virt_range(addr, sizeof(uint32_t));
+ return &ctx->vma.ht[hash_32(handle, ctx->vma.ht_bits)];
}
-static int
-relocate_entry_clflush(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_relocation_entry *reloc,
- uint64_t target_offset)
+static int eb_batch_index(const struct i915_execbuffer *eb)
{
- struct drm_device *dev = obj->base.dev;
- uint32_t page_offset = offset_in_page(reloc->offset);
- uint64_t delta = relocation_target(reloc, target_offset);
- char *vaddr;
- int ret;
-
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret)
- return ret;
-
- vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
- reloc->offset >> PAGE_SHIFT));
- clflush_write32(vaddr + page_offset, lower_32_bits(delta));
+ if (eb->args->flags & I915_EXEC_BATCH_FIRST)
+ return 0;
+ else
+ return eb->args->buffer_count - 1;
+}
- if (INTEL_INFO(dev)->gen >= 8) {
- page_offset = offset_in_page(page_offset + sizeof(uint32_t));
+static int eb_select_context(struct i915_execbuffer *eb)
+{
+ struct i915_gem_context *ctx;
- if (page_offset == 0) {
- kunmap_atomic(vaddr);
- vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
- (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
- }
+ ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
+ if (unlikely(IS_ERR(ctx)))
+ return PTR_ERR(ctx);
- clflush_write32(vaddr + page_offset, upper_32_bits(delta));
+ if (unlikely(ctx->hang_stats.banned)) {
+ DRM_DEBUG("Context %u tried to submit while banned\n",
+ ctx->user_handle);
+ return -EIO;
}
- kunmap_atomic(vaddr);
+ eb->ctx = ctx;
+ eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
+
+ eb->context_flags = 0;
+ if (ctx->flags & CONTEXT_NO_ZEROMAP)
+ eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
return 0;
}
-static bool object_is_idle(struct drm_i915_gem_object *obj)
+static int
+eb_lookup_vmas(struct i915_execbuffer *eb)
{
- unsigned long active = i915_gem_object_get_active(obj);
- int idx;
+ const int count = eb->args->buffer_count;
+ struct i915_vma *vma;
+ struct idr *idr;
+ int slow_pass = -1;
+ int i, ret;
- for_each_active(active, idx) {
- if (!i915_gem_active_is_idle(&obj->last_read[idx],
- &obj->base.dev->struct_mutex))
- return false;
- }
+ INIT_LIST_HEAD(&eb->relocs);
+ INIT_LIST_HEAD(&eb->unbound);
- return true;
-}
+ ret = eb_select_context(eb);
+ if (unlikely(ret))
+ return ret;
-static int
-i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
- struct eb_vmas *eb,
- struct drm_i915_gem_relocation_entry *reloc)
+ if (unlikely(eb->ctx->vma.ht_size & 1))
+ flush_work(&eb->ctx->vma.resize);
+ for (i = 0; i < count; i++) {
+ eb->exec[i].rsvd2 = 0;
+
+ hlist_for_each_entry(vma,
+ ht_head(eb->ctx, eb->exec[i].handle),
+ ctx_node) {
+ if (vma->ctx_handle != eb->exec[i].handle)
+ continue;
+
+ ret = eb_add_vma(eb, &eb->exec[i], vma);
+ if (unlikely(ret))
+ return ret;
+
+ goto next_vma;
+ }
+
+ if (slow_pass < 0)
+ slow_pass = i;
+next_vma: ;
+ }
+
+ if (slow_pass < 0)
+ goto out;
+
+ spin_lock(&eb->file->table_lock);
+ /* Grab a reference to the object and release the lock so we can lookup
+ * or create the VMA without using GFP_ATOMIC */
+ idr = &eb->file->object_idr;
+ for (i = slow_pass; i < count; i++) {
+ struct drm_i915_gem_object *obj;
+
+ if (eb->exec[i].rsvd2)
+ continue;
+
+ obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
+ if (unlikely(!obj)) {
+ spin_unlock(&eb->file->table_lock);
+ DRM_DEBUG("Invalid object handle %d at index %d\n",
+ eb->exec[i].handle, i);
+ ret = -ENOENT;
+ goto err;
+ }
+
+ eb->exec[i].rsvd2 = 1 | (uintptr_t)obj;
+ }
+ spin_unlock(&eb->file->table_lock);
+
+ for (i = slow_pass; i < count; i++) {
+ struct drm_i915_gem_object *obj;
+
+ if ((eb->exec[i].rsvd2 & 1) == 0)
+ continue;
+
+ /*
+ * NOTE: We can leak any vmas created here when something fails
+ * later on. But that's no issue since vma_unbind can deal with
+ * vmas which are not actually bound. And since only
+ * lookup_or_create exists as an interface to get at the vma
+ * from the (obj, vm) we don't run the risk of creating
+ * duplicated vmas for the same vm.
+ */
+ obj = to_ptr(typeof(*obj), eb->exec[i].rsvd2 & ~1);
+ vma = i915_gem_obj_lookup_or_create_vma(obj, eb->vm, NULL);
+ if (unlikely(IS_ERR(vma))) {
+ DRM_DEBUG("Failed to lookup VMA\n");
+ ret = PTR_ERR(vma);
+ goto err;
+ }
+
+ /* First come, first served */
+ if (!vma->ctx) {
+ vma->ctx = eb->ctx;
+ vma->ctx_handle = eb->exec[i].handle;
+ hlist_add_head(&vma->ctx_node,
+ ht_head(eb->ctx, eb->exec[i].handle));
+ eb->ctx->vma.ht_count++;
+ if (i915_vma_is_ggtt(vma)) {
+ GEM_BUG_ON(obj->vma_hashed);
+ obj->vma_hashed = vma;
+ }
+ }
+
+ ret = eb_add_vma(eb, &eb->exec[i], vma);
+ if (unlikely(ret))
+ goto err;
+ }
+ if (4*eb->ctx->vma.ht_count > 3*eb->ctx->vma.ht_size ||
+ 4*eb->ctx->vma.ht_count < eb->ctx->vma.ht_size) {
+ eb->ctx->vma.ht_size |= 1;
+ queue_work(system_highpri_wq, &eb->ctx->vma.resize);
+ }
+
+out:
+ /* take note of the batch buffer before we might reorder the lists */
+ i = eb_batch_index(eb);
+ eb->batch = to_ptr(struct i915_vma, eb->exec[i].rsvd2);
+
+ /*
+ * SNA is doing fancy tricks with compressing batch buffers, which leads
+ * to negative relocation deltas. Usually that works out ok since the
+ * relocate address is still positive, except when the batch is placed
+ * very low in the GTT. Ensure this doesn't happen.
+ *
+ * Note that actual hangs have only been observed on gen7, but for
+ * paranoia do it everywhere.
+ */
+ if ((eb->exec[i].flags & EXEC_OBJECT_PINNED) == 0)
+ eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ if (eb->reloc_cache.has_fence)
+ eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE;
+
+ eb->args->flags |= __EXEC_VALIDATED;
+ return eb_reserve(eb);
+
+err:
+ for (i = slow_pass; i < count; i++) {
+ if (eb->exec[i].rsvd2 & 1)
+ eb->exec[i].rsvd2 = 0;
+ }
+ return ret;
+}
+
+static struct i915_vma *
+eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
+{
+ if (eb->lut_mask < 0) {
+ if (handle >= -eb->lut_mask)
+ return NULL;
+ return to_ptr(struct i915_vma, eb->exec[handle].rsvd2);
+ } else {
+ struct hlist_head *head;
+ struct i915_vma *vma;
+
+ head = &eb->buckets[hash_32(handle, eb->lut_mask)];
+ hlist_for_each_entry(vma, head, exec_node) {
+ if (vma->exec_handle == handle)
+ return vma;
+ }
+ return NULL;
+ }
+}
+
+static void
+eb_reset(const struct i915_execbuffer *eb)
+{
+ const unsigned int count = eb->args->buffer_count;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+ struct i915_vma *vma = to_ptr(struct i915_vma, entry->rsvd2);
+
+ eb_unreserve_vma(vma, entry);
+ vma->exec_entry = NULL;
+ }
+
+ if (eb->lut_mask >= 0)
+ memset(eb->buckets, 0,
+ (1<<eb->lut_mask)*sizeof(struct hlist_head));
+}
+
+static void eb_release_vma(const struct i915_execbuffer *eb)
+{
+ const unsigned int count = eb->args->buffer_count;
+ unsigned int i;
+
+ if (!eb->exec)
+ return;
+
+ for (i = 0; i < count; i++) {
+ struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+ struct i915_vma *vma = to_ptr(struct i915_vma, entry->rsvd2);
+
+ if (!vma || !vma->exec_entry)
+ continue;
+
+ GEM_BUG_ON(vma->exec_entry != entry);
+ if (entry->flags & __EXEC_OBJECT_HAS_PIN)
+ __eb_unreserve_vma(vma, entry);
+ vma->exec_entry = NULL;
+ }
+}
+
+static void eb_destroy(const struct i915_execbuffer *eb)
+{
+ if (eb->lut_mask >= 0)
+ kfree(eb->buckets);
+}
+
+static inline u64
+relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
+ const struct i915_vma *target)
+{
+ return gen8_canonical_addr((int)reloc->delta + target->node.start);
+}
+
+static void reloc_cache_init(struct reloc_cache *cache,
+ struct drm_i915_private *i915)
+{
+ cache->page = -1;
+ cache->vaddr = 0;
+ cache->has_llc = HAS_LLC(i915);
+ cache->has_fence = INTEL_GEN(i915) < 4;
+ cache->use_64bit_reloc = INTEL_GEN(i915) >= 8;
+ cache->node.allocated = false;
+}
+
+static inline void *unmask_page(unsigned long p)
+{
+ return (void *)(uintptr_t)(p & PAGE_MASK);
+}
+
+static inline unsigned int unmask_flags(unsigned long p)
+{
+ return p & ~PAGE_MASK;
+}
+
+#define KMAP 0x4
+
+static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
+{
+ struct drm_i915_private *i915 =
+ container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
+ return &i915->ggtt;
+}
+
+static void reloc_cache_reset(struct reloc_cache *cache)
+{
+ void *vaddr;
+
+ if (!cache->vaddr)
+ return;
+
+ vaddr = unmask_page(cache->vaddr);
+ if (cache->vaddr & KMAP) {
+ if (cache->vaddr & CLFLUSH_AFTER)
+ mb();
+
+ kunmap_atomic(vaddr);
+ i915_gem_object_unpin_pages((struct drm_i915_gem_object *)cache->node.mm);
+ } else {
+ wmb();
+ io_mapping_unmap_atomic(vaddr);
+ if (cache->node.allocated) {
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+
+ ggtt->base.clear_range(&ggtt->base,
+ cache->node.start,
+ cache->node.size,
+ true);
+ drm_mm_remove_node(&cache->node);
+ } else
+ i915_vma_unpin((struct i915_vma *)cache->node.mm);
+ }
+
+ cache->vaddr = 0;
+ cache->page = -1;
+}
+
+static void *reloc_kmap(struct drm_i915_gem_object *obj,
+ struct reloc_cache *cache,
+ int page)
+{
+ void *vaddr;
+
+ if (cache->vaddr) {
+ kunmap_atomic(unmask_page(cache->vaddr));
+ } else {
+ unsigned int flushes;
+ int ret;
+
+ ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);
+ if (ret)
+ return ERR_PTR(ret);
+
+ cache->vaddr = flushes | KMAP;
+ cache->node.mm = (void *)obj;
+ if (flushes)
+ mb();
+ }
+
+ vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+ cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
+ cache->page = page;
+
+ return vaddr;
+}
+
+static void *reloc_iomap(struct drm_i915_gem_object *obj,
+ struct reloc_cache *cache,
+ int page)
+{
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+ unsigned long offset;
+ void *vaddr;
+
+ if (cache->node.allocated) {
+ wmb();
+ ggtt->base.insert_page(&ggtt->base,
+ i915_gem_object_get_dma_address(obj, page),
+ cache->node.start, I915_CACHE_NONE, 0);
+ cache->page = page;
+ return unmask_page(cache->vaddr);
+ }
+
+ if (cache->vaddr) {
+ io_mapping_unmap_atomic(unmask_page(cache->vaddr));
+ } else {
+ struct i915_vma *vma;
+ int ret;
+
+ if (use_cpu_reloc(cache, obj))
+ return NULL;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, true);
+ if (ret)
+ return ERR_PTR(ret);
+
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ PIN_MAPPABLE | PIN_NONBLOCK);
+ if (IS_ERR(vma)) {
+ memset(&cache->node, 0, sizeof(cache->node));
+ ret = drm_mm_insert_node_in_range_generic
+ (&ggtt->base.mm, &cache->node,
+ 4096, 0, -1,
+ 0, ggtt->mappable_end,
+ DRM_MM_INSERT_LOW);
+ if (ret)
+ return ERR_PTR(ret);
+ } else {
+ ret = i915_vma_put_fence(vma);
+ if (ret) {
+ i915_vma_unpin(vma);
+ return ERR_PTR(ret);
+ }
+
+ cache->node.start = vma->node.start;
+ cache->node.mm = (void *)vma;
+ }
+ }
+
+ offset = cache->node.start;
+ if (cache->node.allocated) {
+ ggtt->base.insert_page(&ggtt->base,
+ i915_gem_object_get_dma_address(obj, page),
+ offset, I915_CACHE_NONE, 0);
+ } else {
+ offset += page << PAGE_SHIFT;
+ }
+
+ vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset);
+ cache->page = page;
+ cache->vaddr = (unsigned long)vaddr;
+
+ return vaddr;
+}
+
+static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+ struct reloc_cache *cache,
+ int page)
+{
+ void *vaddr;
+
+ if (cache->page == page) {
+ vaddr = unmask_page(cache->vaddr);
+ } else {
+ vaddr = NULL;
+ if ((cache->vaddr & KMAP) == 0)
+ vaddr = reloc_iomap(obj, cache, page);
+ if (!vaddr)
+ vaddr = reloc_kmap(obj, cache, page);
+ }
+
+ return vaddr;
+}
+
+static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
+{
+ if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
+ if (flushes & CLFLUSH_BEFORE) {
+ clflushopt(addr);
+ mb();
+ }
+
+ *addr = value;
+
+ /* Writes to the same cacheline are serialised by the CPU
+ * (including clflush). On the write path, we only require
+ * that it hits memory in an orderly fashion and place
+ * mb barriers at the start and end of the relocation phase
+ * to ensure ordering of clflush wrt to the system.
+ */
+ if (flushes & CLFLUSH_AFTER)
+ clflushopt(addr);
+ } else
+ *addr = value;
+}
+
+static u64
+relocate_entry(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_relocation_entry *reloc,
+ struct reloc_cache *cache,
+ const struct i915_vma *target)
+{
+ u64 offset = reloc->offset;
+ u64 target_offset = relocation_target(reloc, target);
+ bool wide = cache->use_64bit_reloc;
+ void *vaddr;
+
+repeat:
+ vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ clflush_write32(vaddr + offset_in_page(offset),
+ lower_32_bits(target_offset),
+ cache->vaddr);
+
+ if (wide) {
+ offset += sizeof(u32);
+ target_offset >>= 32;
+ wide = false;
+ goto repeat;
+ }
+
+ return gen8_canonical_addr(target->node.start) | 1;
+}
+
+static bool object_is_idle(struct drm_i915_gem_object *obj)
+{
+ unsigned long active = i915_gem_object_get_active(obj);
+ int idx;
+
+ for_each_active(active, idx) {
+ if (!i915_gem_active_is_idle(&obj->last_read[idx],
+ &obj->base.dev->struct_mutex))
+ return false;
+ }
+
+ return true;
+}
+
+static u64
+eb_relocate_entry(struct i915_execbuffer *eb,
+ const struct i915_vma *vma,
+ const struct drm_i915_gem_relocation_entry *reloc)
{
- struct drm_device *dev = obj->base.dev;
- struct drm_gem_object *target_obj;
- struct drm_i915_gem_object *target_i915_obj;
- struct i915_vma *target_vma;
- uint64_t target_offset;
+ struct i915_vma *target;
int ret;
/* we've already hold a reference to all valid objects */
- target_vma = eb_get_vma(eb, reloc->target_handle);
- if (unlikely(target_vma == NULL))
+ target = eb_get_vma(eb, reloc->target_handle);
+ if (unlikely(!target))
return -ENOENT;
- target_i915_obj = target_vma->obj;
- target_obj = &target_vma->obj->base;
-
- target_offset = gen8_canonical_addr(target_vma->node.start);
-
- /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
- * pipe_control writes because the gpu doesn't properly redirect them
- * through the ppgtt for non_secure batchbuffers. */
- if (unlikely(IS_GEN6(dev) &&
- reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
- ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
- PIN_GLOBAL);
- if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
- return ret;
- }
/* Validate that the target is in a valid r/w GPU domain */
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
DRM_DEBUG("reloc with multiple write domains: "
- "obj %p target %d offset %d "
+ "target %d offset %d "
"read %08x write %08x",
- obj, reloc->target_handle,
+ reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
reloc->write_domain);
@@ -496,131 +945,143 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
if (unlikely((reloc->write_domain | reloc->read_domains)
& ~I915_GEM_GPU_DOMAINS)) {
DRM_DEBUG("reloc with read/write non-GPU domains: "
- "obj %p target %d offset %d "
+ "target %d offset %d "
"read %08x write %08x",
- obj, reloc->target_handle,
+ reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
reloc->write_domain);
return -EINVAL;
}
- target_obj->pending_read_domains |= reloc->read_domains;
- target_obj->pending_write_domain |= reloc->write_domain;
+ if (reloc->write_domain) {
+ target->exec_entry->flags |= EXEC_OBJECT_WRITE;
+
+ /* Sandybridge PPGTT errata: We need a global gtt mapping
+ * for MI and pipe_control writes because the gpu doesn't
+ * properly redirect them through the ppgtt for non_secure
+ * batchbuffers.
+ */
+ if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
+ IS_GEN6(eb->i915)) {
+ ret = i915_vma_bind(target, target->obj->cache_level,
+ PIN_GLOBAL);
+ if (WARN_ONCE(ret,
+ "Unexpected failure to bind target VMA!"))
+ return ret;
+ }
+ }
/* If the relocation already has the right value in it, no
* more work needs to be done.
*/
- if (target_offset == reloc->presumed_offset)
+ if (gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
return 0;
/* Check that the relocation address is valid... */
if (unlikely(reloc->offset >
- obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
+ vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
DRM_DEBUG("Relocation beyond object bounds: "
- "obj %p target %d offset %d size %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset,
- (int) obj->base.size);
+ "target %d offset %d size %d.\n",
+ reloc->target_handle,
+ (int)reloc->offset,
+ (int)vma->size);
return -EINVAL;
}
if (unlikely(reloc->offset & 3)) {
DRM_DEBUG("Relocation not 4-byte aligned: "
- "obj %p target %d offset %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset);
+ "target %d offset %d.\n",
+ reloc->target_handle,
+ (int)reloc->offset);
return -EINVAL;
}
/* We can't wait for rendering with pagefaults disabled */
- if (pagefault_disabled() && !object_is_idle(obj))
- return -EFAULT;
-
- if (use_cpu_reloc(obj))
- ret = relocate_entry_cpu(obj, reloc, target_offset);
- else if (obj->map_and_fenceable)
- ret = relocate_entry_gtt(obj, reloc, target_offset);
- else if (static_cpu_has(X86_FEATURE_CLFLUSH))
- ret = relocate_entry_clflush(obj, reloc, target_offset);
- else {
- WARN_ONCE(1, "Impossible case in relocation handling\n");
- ret = -ENODEV;
- }
-
- if (ret)
- return ret;
+ if (pagefault_disabled() && !object_is_idle(vma->obj))
+ return -EBUSY;
/* and update the user's relocation entry */
- reloc->presumed_offset = target_offset;
-
- return 0;
+ return relocate_entry(vma->obj, reloc, &eb->reloc_cache, target);
}
-static int
-i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
- struct eb_vmas *eb)
+static int eb_relocate_vma(struct i915_execbuffer *eb,
+ const struct i915_vma *vma)
{
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
- struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
- struct drm_i915_gem_relocation_entry __user *user_relocs;
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- int remain, ret;
-
- user_relocs = u64_to_user_ptr(entry->relocs_ptr);
+ struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
+ struct drm_i915_gem_relocation_entry __user *urelocs;
+ const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+ unsigned int remain;
+ urelocs = u64_to_user_ptr(entry->relocs_ptr);
remain = entry->relocation_count;
- while (remain) {
- struct drm_i915_gem_relocation_entry *r = stack_reloc;
- int count = remain;
- if (count > ARRAY_SIZE(stack_reloc))
- count = ARRAY_SIZE(stack_reloc);
- remain -= count;
+ if (unlikely(remain > ULONG_MAX / sizeof(*urelocs)))
+ return -EINVAL;
- if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
- return -EFAULT;
+ /*
+ * We must check that the entire relocation array is safe
+ * to read. However, if the array is not writable the user loses
+ * the updated relocation values.
+ */
- do {
- u64 offset = r->presumed_offset;
+ do {
+ struct drm_i915_gem_relocation_entry *r = stack;
+ unsigned int count =
+ min_t(unsigned int, remain, ARRAY_SIZE(stack));
- ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
- if (ret)
- return ret;
+ if (__copy_from_user_inatomic(r, urelocs, count*sizeof(r[0]))) {
+ remain = -EFAULT;
+ goto out;
+ }
- if (r->presumed_offset != offset &&
- __put_user(r->presumed_offset, &user_relocs->presumed_offset)) {
- return -EFAULT;
+ remain -= count;
+ do {
+ u64 offset = eb_relocate_entry(eb, vma, r);
+
+ if (offset == 0) {
+ } else if ((s64)offset < 0) {
+ remain = (s64)offset;
+ goto out;
+ } else {
+ __put_user(offset & ~1,
+ &urelocs[r-stack].presumed_offset);
}
-
- user_relocs++;
- r++;
- } while (--count);
- }
-
- return 0;
+ } while (r++, --count);
+ urelocs += ARRAY_SIZE(stack);
+ } while (remain);
+out:
+ reloc_cache_reset(&eb->reloc_cache);
+ return remain;
#undef N_RELOC
}
static int
-i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
- struct eb_vmas *eb,
- struct drm_i915_gem_relocation_entry *relocs)
+eb_relocate_vma_slow(struct i915_execbuffer *eb,
+ const struct i915_vma *vma)
{
const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- int i, ret;
+ struct drm_i915_gem_relocation_entry *relocs =
+ to_ptr(typeof(*relocs), entry->relocs_ptr);
+ unsigned int i;
+ int ret;
for (i = 0; i < entry->relocation_count; i++) {
- ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
- if (ret)
- return ret;
- }
+ u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
- return 0;
+ if ((s64)offset < 0) {
+ ret = (s64)offset;
+ goto err;
+ }
+ }
+ ret = 0;
+err:
+ reloc_cache_reset(&eb->reloc_cache);
+ return ret;
}
-static int
-i915_gem_execbuffer_relocate(struct eb_vmas *eb)
+static int eb_relocate(struct i915_execbuffer *eb)
{
- struct i915_vma *vma;
+ const struct i915_vma *vma;
int ret = 0;
/* This is the fast path and we cannot handle a pagefault whilst
@@ -631,412 +1092,456 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
* lockdep complains vehemently.
*/
pagefault_disable();
- list_for_each_entry(vma, &eb->vmas, exec_list) {
- ret = i915_gem_execbuffer_relocate_vma(vma, eb);
- if (ret)
- break;
+ list_for_each_entry(vma, &eb->relocs, reloc_link) {
+retry:
+ ret = eb_relocate_vma(eb, vma);
+ if (ret == 0)
+ continue;
+
+ if (ret == -EBUSY) {
+ pagefault_enable();
+ ret = i915_gem_object_wait_rendering(vma->obj, false);
+ pagefault_disable();
+ if (ret == 0)
+ goto retry;
+ }
+ break;
}
pagefault_enable();
return ret;
}
-static bool only_mappable_for_reloc(unsigned int flags)
+static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
{
- return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
- __EXEC_OBJECT_NEEDS_MAP;
-}
+ const unsigned long relocs_max =
+ ULONG_MAX / sizeof(struct drm_i915_gem_relocation_entry);
+ const char __user *addr, *end;
+ unsigned long size;
+ unsigned int nreloc;
+ char c;
+
+ nreloc = entry->relocation_count;
+ if (nreloc == 0)
+ return 0;
-static int
-i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
- struct intel_engine_cs *engine,
- bool *need_reloc)
+ if (nreloc > relocs_max)
+ return -EINVAL;
+
+ addr = u64_to_user_ptr(entry->relocs_ptr);
+ size = nreloc * sizeof(struct drm_i915_gem_relocation_entry);
+ if (!access_ok(VERIFY_WRITE, addr, size))
+ return -EFAULT;
+
+ end = addr + size;
+ for (; addr < end; addr += PAGE_SIZE) {
+ int ret = __get_user(c, addr);
+ if (ret)
+ return ret;
+ }
+ return __get_user(c, end - 1);
+}
+
+static int
+eb_copy_relocations(const struct i915_execbuffer *eb)
{
- struct drm_i915_gem_object *obj = vma->obj;
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- uint64_t flags;
+ const unsigned int count = eb->args->buffer_count;
+ unsigned int i;
int ret;
- flags = PIN_USER;
- if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
- flags |= PIN_GLOBAL;
-
- if (!drm_mm_node_allocated(&vma->node)) {
- /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
- * limit address to the first 4GBs for unflagged objects.
- */
- if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
- flags |= PIN_ZONE_4G;
- if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
- flags |= PIN_GLOBAL | PIN_MAPPABLE;
- if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
- flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
- if (entry->flags & EXEC_OBJECT_PINNED)
- flags |= entry->offset | PIN_OFFSET_FIXED;
- if ((flags & PIN_MAPPABLE) == 0)
- flags |= PIN_HIGH;
- }
-
- ret = i915_vma_pin(vma,
- entry->pad_to_size,
- entry->alignment,
- flags);
- if ((ret == -ENOSPC || ret == -E2BIG) &&
- only_mappable_for_reloc(entry->flags))
- ret = i915_vma_pin(vma,
- entry->pad_to_size,
- entry->alignment,
- flags & ~PIN_MAPPABLE);
- if (ret)
- return ret;
+ for (i = 0; i < count; i++) {
+ struct drm_i915_gem_relocation_entry __user *urelocs;
+ struct drm_i915_gem_relocation_entry *relocs;
+ unsigned int nreloc = eb->exec[i].relocation_count, j;
+ unsigned long size;
- entry->flags |= __EXEC_OBJECT_HAS_PIN;
+ if (nreloc == 0)
+ continue;
- if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
- ret = i915_gem_object_get_fence(obj);
+ ret = check_relocations(&eb->exec[i]);
if (ret)
- return ret;
+ goto err;
- if (i915_gem_object_pin_fence(obj))
- entry->flags |= __EXEC_OBJECT_HAS_FENCE;
- }
+ urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
+ size = nreloc * sizeof(*relocs);
- if (entry->offset != vma->node.start) {
- entry->offset = vma->node.start;
- *need_reloc = true;
- }
+ relocs = drm_malloc_gfp(size, 1, GFP_TEMPORARY);
+ if (!relocs) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /* copy_from_user is limited to 4GiB */
+ j = 0;
+ do {
+ u32 len = min_t(u64, 1ull<<31, size);
+
+ if (__copy_from_user(relocs + j, urelocs + j, len)) {
+ ret = -EFAULT;
+ goto err;
+ }
- if (entry->flags & EXEC_OBJECT_WRITE) {
- obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
- obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
+ size -= len;
+ BUILD_BUG_ON_NOT_POWER_OF_2(sizeof(*relocs));
+ j += len / sizeof(*relocs);
+ } while (size);
+
+ /* As we do not update the known relocation offsets after
+ * relocating (due to the complexities in lock handling),
+ * we need to mark them as invalid now so that we force the
+ * relocation processing next time. Just in case the target
+ * object is evicted and then rebound into its old
+ * presumed_offset before the next execbuffer - if that
+ * happened we would make the mistake of assuming that the
+ * relocations were valid.
+ */
+ user_access_begin();
+ for (j = 0; j < nreloc; j++) {
+ if (unsafe_put_user(-1, &urelocs[j].presumed_offset))
+ break;
+ }
+ user_access_end();
+
+ eb->exec[i].relocs_ptr = (uintptr_t)relocs;
}
return 0;
+
+err:
+ while (i--) {
+ struct drm_i915_gem_relocation_entry *relocs =
+ to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
+ if (eb->exec[i].relocation_count)
+ drm_free_large(relocs);
+ }
+ return ret;
}
-static bool
-need_reloc_mappable(struct i915_vma *vma)
+static int eb_prefault_relocations(const struct i915_execbuffer *eb)
{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-
- if (entry->relocation_count == 0)
- return false;
-
- if (!i915_vma_is_ggtt(vma))
- return false;
+ const unsigned int count = eb->args->buffer_count;
+ unsigned int i;
- /* See also use_cpu_reloc() */
- if (HAS_LLC(vma->obj->base.dev))
- return false;
+ for (i = 0; i < count; i++) {
+ int ret;
- if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
- return false;
+ ret = check_relocations(&eb->exec[i]);
+ if (ret)
+ return ret;
+ }
- return true;
+ return 0;
}
-static bool
-eb_vma_misplaced(struct i915_vma *vma)
+static int eb_relocate_slow(struct i915_execbuffer *eb)
{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- struct drm_i915_gem_object *obj = vma->obj;
+ struct drm_device *dev = &eb->i915->drm;
+ bool have_copy = false;
+ const struct i915_vma *vma;
+ int ret = 0;
- WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
- !i915_vma_is_ggtt(vma));
+repeat:
+ if (signal_pending(current))
+ return -ERESTARTSYS;
- if (entry->alignment &&
- vma->node.start & (entry->alignment - 1))
- return true;
+ /* We may process another execbuffer during the unlock... */
+ eb_reset(eb);
+ mutex_unlock(&dev->struct_mutex);
- if (vma->node.size < entry->pad_to_size)
- return true;
+ if (ret == 0 && likely(!i915.prefault_disable)) {
+ ret = eb_prefault_relocations(eb);
+ } else if (!have_copy) {
+ ret = eb_copy_relocations(eb);
+ have_copy = true;
+ } else {
+ cond_resched();
+ ret = 0;
+ }
+ if (ret) {
+ mutex_lock(&dev->struct_mutex);
+ return ret;
+ }
- if (entry->flags & EXEC_OBJECT_PINNED &&
- vma->node.start != entry->offset)
- return true;
+ /* A frequent cause for EAGAIN are currently unavailable client pages */
+ flush_workqueue(eb->i915->mm.userptr_wq);
- if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
- vma->node.start < BATCH_OFFSET_BIAS)
- return true;
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret) {
+ mutex_lock(&dev->struct_mutex);
+ goto err;
+ }
- /* avoid costly ping-pong once a batch bo ended up non-mappable */
- if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
- return !only_mappable_for_reloc(entry->flags);
+ /* reacquire the objects */
+ ret = eb_lookup_vmas(eb);
+ if (ret)
+ goto err;
- if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
- (vma->node.start + vma->node.size - 1) >> 32)
- return true;
+ list_for_each_entry(vma, &eb->relocs, reloc_link) {
+ if (!have_copy) {
+ pagefault_disable();
+ ret = eb_relocate_vma(eb, vma);
+ pagefault_enable();
+ if (ret)
+ goto repeat;
+ } else {
+ ret = eb_relocate_vma_slow(eb, vma);
+ if (ret)
+ goto err;
+ }
+ }
- return false;
-}
+ /* Leave the user relocations as are, this is the painfully slow path,
+ * and we want to avoid the complication of dropping the lock whilst
+ * having buffers reserved in the aperture and so causing spurious
+ * ENOSPC for random operations.
+ */
-static int
-i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
- struct list_head *vmas,
- struct i915_gem_context *ctx,
- bool *need_relocs)
-{
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- struct i915_address_space *vm;
- struct list_head ordered_vmas;
- struct list_head pinned_vmas;
- bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
- int retry;
+err:
+ if (ret == -EAGAIN)
+ goto repeat;
- vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
+ if (have_copy) {
+ const unsigned int count = eb->args->buffer_count;
+ unsigned int i;
- INIT_LIST_HEAD(&ordered_vmas);
- INIT_LIST_HEAD(&pinned_vmas);
- while (!list_empty(vmas)) {
- struct drm_i915_gem_exec_object2 *entry;
- bool need_fence, need_mappable;
+ for (i = 0; i < count; i++) {
+ const struct drm_i915_gem_exec_object2 *entry =
+ &eb->exec[i];
+ struct drm_i915_gem_relocation_entry *relocs;
- vma = list_first_entry(vmas, struct i915_vma, exec_list);
- obj = vma->obj;
- entry = vma->exec_entry;
-
- if (ctx->flags & CONTEXT_NO_ZEROMAP)
- entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
-
- if (!has_fenced_gpu_access)
- entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
- need_fence =
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- i915_gem_object_is_tiled(obj);
- need_mappable = need_fence || need_reloc_mappable(vma);
-
- if (entry->flags & EXEC_OBJECT_PINNED)
- list_move_tail(&vma->exec_list, &pinned_vmas);
- else if (need_mappable) {
- entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
- list_move(&vma->exec_list, &ordered_vmas);
- } else
- list_move_tail(&vma->exec_list, &ordered_vmas);
+ if (entry->relocation_count == 0)
+ continue;
- obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
- obj->base.pending_write_domain = 0;
+ relocs = to_ptr(typeof(*relocs), entry->relocs_ptr);
+ drm_free_large(relocs);
+ }
}
- list_splice(&ordered_vmas, vmas);
- list_splice(&pinned_vmas, vmas);
- /* Attempt to pin all of the buffers into the GTT.
- * This is done in 3 phases:
- *
- * 1a. Unbind all objects that do not match the GTT constraints for
- * the execbuffer (fenceable, mappable, alignment etc).
- * 1b. Increment pin count for already bound objects.
- * 2. Bind new objects.
- * 3. Decrement pin count.
- *
- * This avoid unnecessary unbinding of later objects in order to make
- * room for the earlier objects *unless* we need to defragment.
+ return ret;
+}
+
+static void eb_export_fence(struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_request *req,
+ unsigned int flags)
+{
+ struct reservation_object *resv;
+
+ resv = i915_gem_object_get_dmabuf_resv(obj);
+ if (!resv)
+ return;
+
+ /* Ignore errors from failing to allocate the new fence, we can't
+ * handle an error right now. Worst case should be missed
+ * synchronisation leading to rendering corruption.
*/
- retry = 0;
- do {
- int ret = 0;
+ ww_mutex_lock(&resv->lock, NULL);
+ if (flags & EXEC_OBJECT_WRITE)
+ reservation_object_add_excl_fence(resv, &req->fence);
+ else if (reservation_object_reserve_shared(resv) == 0)
+ reservation_object_add_shared_fence(resv, &req->fence);
+ ww_mutex_unlock(&resv->lock);
+}
- /* Unbind any ill-fitting objects or pin. */
- list_for_each_entry(vma, vmas, exec_list) {
- if (!drm_mm_node_allocated(&vma->node))
- continue;
+static unsigned int eb_other_engines(struct i915_execbuffer *eb)
+{
+ unsigned int mask;
- if (eb_vma_misplaced(vma))
- ret = i915_vma_unbind(vma);
- else
- ret = i915_gem_execbuffer_reserve_vma(vma,
- engine,
- need_relocs);
- if (ret)
- goto err;
- }
+ mask = ~intel_engine_flag(eb->engine) & I915_BO_ACTIVE_MASK;
+ mask <<= I915_BO_ACTIVE_SHIFT;
- /* Bind fresh objects */
- list_for_each_entry(vma, vmas, exec_list) {
- if (drm_mm_node_allocated(&vma->node))
- continue;
+ return mask;
+}
- ret = i915_gem_execbuffer_reserve_vma(vma, engine,
- need_relocs);
- if (ret)
- goto err;
- }
+static int
+__eb_sync(struct drm_i915_gem_request *to,
+ struct drm_i915_gem_request *from)
+{
+ int idx, ret;
-err:
- if (ret != -ENOSPC || retry++)
- return ret;
+ if (to->engine == from->engine)
+ return 0;
- /* Decrement pin count for bound objects */
- list_for_each_entry(vma, vmas, exec_list)
- i915_gem_execbuffer_unreserve_vma(vma);
+ idx = intel_engine_sync_index(from->engine, to->engine);
+ if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+ return 0;
- ret = i915_gem_evict_vm(vm, true);
+ trace_i915_gem_ring_sync_to(to, from);
+ if (!i915.semaphores) {
+ if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+ ret = kfence_await_dma_fence(&to->submit, &from->fence,
+ GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ }
+ } else {
+ ret = to->engine->semaphore.sync_to(to, from);
if (ret)
return ret;
- } while (1);
+ }
+
+ from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+ return 0;
}
static int
-i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
- struct drm_i915_gem_execbuffer2 *args,
- struct drm_file *file,
- struct intel_engine_cs *engine,
- struct eb_vmas *eb,
- struct drm_i915_gem_exec_object2 *exec,
- struct i915_gem_context *ctx)
+eb_sync(struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_request *to,
+ bool write)
{
- struct drm_i915_gem_relocation_entry *reloc;
- struct i915_address_space *vm;
- struct i915_vma *vma;
- bool need_relocs;
- int *reloc_offset;
- int i, total, ret;
- unsigned count = args->buffer_count;
-
- vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
+ struct i915_gem_active *active;
+ unsigned long active_mask;
+ int idx;
- /* We may process another execbuffer during the unlock... */
- while (!list_empty(&eb->vmas)) {
- vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
- list_del_init(&vma->exec_list);
- i915_gem_execbuffer_unreserve_vma(vma);
- i915_gem_object_put(vma->obj);
+ if (write) {
+ active_mask = i915_gem_object_get_active(obj);
+ active = obj->last_read;
+ } else {
+ active_mask = 1;
+ active = &obj->last_write;
}
- mutex_unlock(&dev->struct_mutex);
+ for_each_active(active_mask, idx) {
+ struct drm_i915_gem_request *request;
+ int ret;
- total = 0;
- for (i = 0; i < count; i++)
- total += exec[i].relocation_count;
+ request = i915_gem_active_peek(&active[idx],
+ &obj->base.dev->struct_mutex);
+ if (!request)
+ continue;
- reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
- reloc = drm_malloc_ab(total, sizeof(*reloc));
- if (reloc == NULL || reloc_offset == NULL) {
- drm_free_large(reloc);
- drm_free_large(reloc_offset);
- mutex_lock(&dev->struct_mutex);
- return -ENOMEM;
+ ret = __eb_sync(to, request);
+ if (ret)
+ return ret;
}
- total = 0;
- for (i = 0; i < count; i++) {
- struct drm_i915_gem_relocation_entry __user *user_relocs;
- u64 invalid_offset = (u64)-1;
- int j;
+ return 0;
+}
- user_relocs = u64_to_user_ptr(exec[i].relocs_ptr);
+static int eb_add_reservations(struct reservation_object *resv,
+ struct drm_i915_gem_request *req,
+ unsigned flags)
+{
+ struct fence *fence;
+ int ret;
- if (copy_from_user(reloc+total, user_relocs,
- exec[i].relocation_count * sizeof(*reloc))) {
- ret = -EFAULT;
- mutex_lock(&dev->struct_mutex);
- goto err;
- }
+ rcu_read_lock();
- /* As we do not update the known relocation offsets after
- * relocating (due to the complexities in lock handling),
- * we need to mark them as invalid now so that we force the
- * relocation processing next time. Just in case the target
- * object is evicted and then rebound into its old
- * presumed_offset before the next execbuffer - if that
- * happened we would make the mistake of assuming that the
- * relocations were valid.
- */
- for (j = 0; j < exec[i].relocation_count; j++) {
- if (__copy_to_user(&user_relocs[j].presumed_offset,
- &invalid_offset,
- sizeof(invalid_offset))) {
- ret = -EFAULT;
- mutex_lock(&dev->struct_mutex);
- goto err;
- }
- }
+ if (flags & EXEC_OBJECT_WRITE) {
+ struct reservation_object_list *shared;
+ unsigned count, i;
- reloc_offset[i] = total;
- total += exec[i].relocation_count;
- }
+ shared = rcu_dereference(resv->fence);
+ count = shared ? shared->shared_count : 0;
+ for (i = 0; i < count; i++) {
+ struct fence *fence =
+ rcu_dereference(shared->shared[i]);
- ret = i915_mutex_lock_interruptible(dev);
- if (ret) {
- mutex_lock(&dev->struct_mutex);
- goto err;
- }
+ if (fence_is_i915(fence))
+ continue;
- /* reacquire the objects */
- eb_reset(eb);
- ret = eb_lookup_vmas(eb, exec, args, vm, file);
- if (ret)
- goto err;
+ if (!fence_get_rcu(fence))
+ continue;
- need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
- ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
- &need_relocs);
- if (ret)
- goto err;
+ ret = kfence_await_dma_fence(&req->submit, fence,
+ GFP_NOWAIT | __GFP_NOWARN);
+ fence_put(fence);
+ if (ret < 0)
+ goto err;
+ }
+ }
- list_for_each_entry(vma, &eb->vmas, exec_list) {
- int offset = vma->exec_entry - exec;
- ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
- reloc + reloc_offset[offset]);
- if (ret)
+ fence = rcu_dereference(resv->fence_excl);
+ if (fence && !fence_is_i915(fence) && fence_get_rcu(fence)) {
+ ret = kfence_await_dma_fence(&req->submit, fence,
+ GFP_NOWAIT | __GFP_NOWARN);
+ fence_put(fence);
+ if (ret < 0)
goto err;
}
- /* Leave the user relocations as are, this is the painfully slow path,
- * and we want to avoid the complication of dropping the lock whilst
- * having buffers reserved in the aperture and so causing spurious
- * ENOSPC for random operations.
- */
-
+ ret = 0;
err:
- drm_free_large(reloc);
- drm_free_large(reloc_offset);
+ rcu_read_unlock();
return ret;
}
-static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
+static int
+eb_move_to_gpu(struct i915_execbuffer *eb)
{
- unsigned int mask;
+ const unsigned int other_rings = eb_other_engines(eb);
+ const unsigned int count = eb->args->buffer_count;
+ bool flush_chipset = false;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ unsigned int i;
+ int ret;
- mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
- mask <<= I915_BO_ACTIVE_SHIFT;
+ INIT_LIST_HEAD(&eb->relocs);
+
+ for (i = 0; i < count; i++) {
+ const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+ struct reservation_object *resv;
+
+ vma = to_ptr(struct i915_vma, entry->rsvd2);
+ obj = vma->obj;
+ resv = i915_gem_object_get_dmabuf_resv(obj);
+
+ if ((entry->flags & EXEC_OBJECT_ASYNC) == 0) {
+ if (obj->flags & other_rings) {
+ ret = eb_sync(obj, eb->request,
+ entry->flags & EXEC_OBJECT_WRITE);
+ if (ret)
+ return ret;
+ }
+
+ if (resv) {
+ ret = eb_add_reservations(resv,
+ eb->request,
+ entry->flags);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (obj->base.write_domain) {
+ if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
+ flush_chipset |= i915_gem_clflush_object(obj, false);
+
+ obj->base.write_domain = 0;
+ }
- return mask;
-}
+ if (entry->flags & EXEC_OBJECT_WRITE)
+ obj->base.read_domains = 0;
+ obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
-static int
-i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
- struct list_head *vmas)
-{
- const unsigned int other_rings = eb_other_engines(req);
- struct i915_vma *vma;
- uint32_t flush_domains = 0;
- bool flush_chipset = false;
- int ret;
+ i915_vma_move_to_active(vma, eb->request, entry->flags);
- list_for_each_entry(vma, vmas, exec_list) {
- struct drm_i915_gem_object *obj = vma->obj;
+ __eb_unreserve_vma(vma, entry);
+ vma->exec_entry = NULL;
- if (obj->flags & other_rings) {
- ret = i915_gem_object_sync(obj, req);
- if (ret)
- return ret;
+ if (resv) {
+ list_add_tail(&vma->reloc_link, &eb->relocs);
+ vma->exec_entry = (void *)(uintptr_t)entry->flags;
}
- if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
- flush_chipset |= i915_gem_clflush_object(obj, false);
+ }
+ eb->exec = NULL;
- flush_domains |= obj->base.write_domain;
+ list_for_each_entry(vma, &eb->relocs, reloc_link) {
+ eb_export_fence(vma->obj,
+ eb->request,
+ (uintptr_t)vma->exec_entry);
+ vma->exec_entry = NULL;
}
if (flush_chipset)
- i915_gem_chipset_flush(req->engine->i915);
+ i915_gem_chipset_flush(eb->i915);
- if (flush_domains & I915_GEM_DOMAIN_GTT)
- wmb();
+ /* Make sure (untracked) CPU relocs/parsing are flushed */
+ wmb();
/* Unconditionally invalidate GPU caches and TLBs. */
- return req->engine->emit_flush(req, EMIT_INVALIDATE);
+ return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE);
}
static bool
@@ -1062,106 +1567,6 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
return true;
}
-static int
-validate_exec_list(struct drm_device *dev,
- struct drm_i915_gem_exec_object2 *exec,
- int count)
-{
- unsigned relocs_total = 0;
- unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
- unsigned invalid_flags;
- int i;
-
- /* INTERNAL flags must not overlap with external ones */
- BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
-
- invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
- if (USES_FULL_PPGTT(dev))
- invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
-
- for (i = 0; i < count; i++) {
- char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
- int length; /* limited by fault_in_pages_readable() */
-
- if (exec[i].flags & invalid_flags)
- return -EINVAL;
-
- /* Offset can be used as input (EXEC_OBJECT_PINNED), reject
- * any non-page-aligned or non-canonical addresses.
- */
- if (exec[i].flags & EXEC_OBJECT_PINNED) {
- if (exec[i].offset !=
- gen8_canonical_addr(exec[i].offset & PAGE_MASK))
- return -EINVAL;
-
- /* From drm_mm perspective address space is continuous,
- * so from this point we're always using non-canonical
- * form internally.
- */
- exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
- }
-
- if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
- return -EINVAL;
-
- /* pad_to_size was once a reserved field, so sanitize it */
- if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
- if (offset_in_page(exec[i].pad_to_size))
- return -EINVAL;
- } else {
- exec[i].pad_to_size = 0;
- }
-
- /* First check for malicious input causing overflow in
- * the worst case where we need to allocate the entire
- * relocation tree as a single array.
- */
- if (exec[i].relocation_count > relocs_max - relocs_total)
- return -EINVAL;
- relocs_total += exec[i].relocation_count;
-
- length = exec[i].relocation_count *
- sizeof(struct drm_i915_gem_relocation_entry);
- /*
- * We must check that the entire relocation array is safe
- * to read, but since we may need to update the presumed
- * offsets during execution, check for full write access.
- */
- if (!access_ok(VERIFY_WRITE, ptr, length))
- return -EFAULT;
-
- if (likely(!i915.prefault_disable)) {
- if (fault_in_multipages_readable(ptr, length))
- return -EFAULT;
- }
- }
-
- return 0;
-}
-
-static struct i915_gem_context *
-i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
- struct intel_engine_cs *engine, const u32 ctx_id)
-{
- struct i915_gem_context *ctx = NULL;
- struct i915_ctx_hang_stats *hs;
-
- if (engine->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
- return ERR_PTR(-EINVAL);
-
- ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
- if (IS_ERR(ctx))
- return ctx;
-
- hs = &ctx->hang_stats;
- if (hs->banned) {
- DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
- return ERR_PTR(-EIO);
- }
-
- return ctx;
-}
-
void i915_vma_move_to_active(struct i915_vma *vma,
struct drm_i915_gem_request *req,
unsigned int flags)
@@ -1171,90 +1576,29 @@ void i915_vma_move_to_active(struct i915_vma *vma,
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
- obj->dirty = 1; /* be paranoid */
-
- /* Add a reference if we're newly entering the active list.
- * The order in which we add operations to the retirement queue is
+ /* The order in which we add operations to the retirement queue is
* vital here: mark_active adds to the start of the callback list,
* such that subsequent callbacks are called first. Therefore we
* add the active reference first and queue for it to be dropped
* *last*.
*/
- if (!i915_gem_object_is_active(obj))
- i915_gem_object_get(obj);
+ i915_gem_object_set_dirty(obj); /* be paranoid */
i915_gem_object_set_active(obj, idx);
i915_gem_active_set(&obj->last_read[idx], req);
if (flags & EXEC_OBJECT_WRITE) {
i915_gem_active_set(&obj->last_write, req);
-
intel_fb_obj_invalidate(obj, ORIGIN_CS);
-
- /* update for the implicit flush after a batch */
- obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
}
- if (flags & EXEC_OBJECT_NEEDS_FENCE) {
- i915_gem_active_set(&obj->last_fence, req);
- if (flags & __EXEC_OBJECT_HAS_FENCE) {
- struct drm_i915_private *dev_priv = req->i915;
-
- list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
- &dev_priv->mm.fence_list);
- }
- }
+ if (flags & EXEC_OBJECT_NEEDS_FENCE)
+ i915_gem_active_set(&vma->last_fence, req);
i915_vma_set_active(vma, idx);
i915_gem_active_set(&vma->last_read[idx], req);
list_move_tail(&vma->vm_link, &vma->vm->active_list);
}
-static void eb_export_fence(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_request *req,
- unsigned int flags)
-{
- struct reservation_object *resv;
-
- resv = i915_gem_object_get_dmabuf_resv(obj);
- if (!resv)
- return;
-
- /* Ignore errors from failing to allocate the new fence, we can't
- * handle an error right now. Worst case should be missed
- * synchronisation leading to rendering corruption.
- */
- ww_mutex_lock(&resv->lock, NULL);
- if (flags & EXEC_OBJECT_WRITE)
- reservation_object_add_excl_fence(resv, &req->fence);
- else if (reservation_object_reserve_shared(resv) == 0)
- reservation_object_add_shared_fence(resv, &req->fence);
- ww_mutex_unlock(&resv->lock);
-}
-
-static void
-i915_gem_execbuffer_move_to_active(struct list_head *vmas,
- struct drm_i915_gem_request *req)
-{
- struct i915_vma *vma;
-
- list_for_each_entry(vma, vmas, exec_list) {
- struct drm_i915_gem_object *obj = vma->obj;
- u32 old_read = obj->base.read_domains;
- u32 old_write = obj->base.write_domain;
-
- obj->base.write_domain = obj->base.pending_write_domain;
- if (obj->base.write_domain)
- vma->exec_entry->flags |= EXEC_OBJECT_WRITE;
- else
- obj->base.pending_read_domains |= obj->base.read_domains;
- obj->base.read_domains = obj->base.pending_read_domains;
-
- i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
- eb_export_fence(obj, req, vma->exec_entry->flags);
- trace_i915_gem_object_change_domain(obj, old_read, old_write);
- }
-}
-
static int
i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
{
@@ -1266,163 +1610,163 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
return -EINVAL;
}
- ret = intel_ring_begin(req, 4 * 3);
+ ret = intel_ring_begin(req, 4 * 2 + 2);
if (ret)
return ret;
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(4));
for (i = 0; i < 4; i++) {
- intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
intel_ring_emit(ring, 0);
}
-
+ intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
return 0;
}
-static struct i915_vma*
-i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
- struct drm_i915_gem_exec_object2 *shadow_exec_entry,
- struct drm_i915_gem_object *batch_obj,
- struct eb_vmas *eb,
- u32 batch_start_offset,
- u32 batch_len,
- bool is_master)
+static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
{
struct drm_i915_gem_object *shadow_batch_obj;
struct i915_vma *vma;
int ret;
- shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
- PAGE_ALIGN(batch_len));
+ shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
+ PAGE_ALIGN(eb->args->batch_len));
if (IS_ERR(shadow_batch_obj))
return ERR_CAST(shadow_batch_obj);
- ret = intel_engine_cmd_parser(engine,
- batch_obj,
+ ret = intel_engine_cmd_parser(eb->engine,
+ eb->batch->obj,
shadow_batch_obj,
- batch_start_offset,
- batch_len,
+ eb->args->batch_start_offset,
+ eb->args->batch_len,
is_master);
- if (ret)
+ if (ret) {
+ if (ret == -EACCES) /* unhandled chained batch */
+ vma = NULL;
+ else
+ vma = ERR_PTR(ret);
goto err;
+ }
- ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
- if (ret)
+ vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto err;
+ }
- i915_gem_object_unpin_pages(shadow_batch_obj);
-
- memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
-
- vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
- vma->exec_entry = shadow_exec_entry;
+ vma->exec_entry =
+ memset(&eb->exec[eb->args->buffer_count++],
+ 0, sizeof(*vma->exec_entry));
vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
- i915_gem_object_get(shadow_batch_obj);
- list_add_tail(&vma->exec_list, &eb->vmas);
-
- return vma;
+ vma->exec_entry->rsvd2 = (uintptr_t)vma;
err:
i915_gem_object_unpin_pages(shadow_batch_obj);
- if (ret == -EACCES) /* unhandled chained batch */
- return NULL;
- else
- return ERR_PTR(ret);
+ return vma;
+}
+
+static void
+add_to_client(struct drm_i915_gem_request *req,
+ struct drm_file *file)
+{
+ req->file_priv = file->driver_priv;
+ list_add_tail(&req->client_link, &req->file_priv->mm.request_list);
}
static int
-execbuf_submit(struct i915_execbuffer_params *params,
- struct drm_i915_gem_execbuffer2 *args,
- struct list_head *vmas)
+eb_set_constants_offset(struct i915_execbuffer *eb)
{
- struct drm_i915_private *dev_priv = params->request->i915;
- u64 exec_start, exec_len;
- int instp_mode;
- u32 instp_mask;
+ struct drm_i915_private *dev_priv = eb->i915;
+ struct intel_ring *ring;
+ u32 mode, mask;
int ret;
- ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
- if (ret)
- return ret;
-
- ret = i915_switch_context(params->request);
- if (ret)
- return ret;
-
- instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
- instp_mask = I915_EXEC_CONSTANTS_MASK;
- switch (instp_mode) {
+ mode = eb->args->flags & I915_EXEC_CONSTANTS_MASK;
+ switch (mode) {
case I915_EXEC_CONSTANTS_REL_GENERAL:
case I915_EXEC_CONSTANTS_ABSOLUTE:
case I915_EXEC_CONSTANTS_REL_SURFACE:
- if (instp_mode != 0 && params->engine->id != RCS) {
- DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
- return -EINVAL;
- }
-
- if (instp_mode != dev_priv->relative_constants_mode) {
- if (INTEL_INFO(dev_priv)->gen < 4) {
- DRM_DEBUG("no rel constants on pre-gen4\n");
- return -EINVAL;
- }
-
- if (INTEL_INFO(dev_priv)->gen > 5 &&
- instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
- DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
- return -EINVAL;
- }
-
- /* The HW changed the meaning on this bit on gen6 */
- if (INTEL_INFO(dev_priv)->gen >= 6)
- instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
- }
break;
default:
- DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
+ DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
return -EINVAL;
}
- if (params->engine->id == RCS &&
- instp_mode != dev_priv->relative_constants_mode) {
- struct intel_ring *ring = params->request->ring;
-
- ret = intel_ring_begin(params->request, 4);
- if (ret)
- return ret;
+ if (mode == dev_priv->relative_constants_mode)
+ return 0;
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
- intel_ring_emit_reg(ring, INSTPM);
- intel_ring_emit(ring, instp_mask << 16 | instp_mode);
- intel_ring_advance(ring);
+ if (eb->engine->id != RCS) {
+ DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
+ return -EINVAL;
+ }
- dev_priv->relative_constants_mode = instp_mode;
+ if (INTEL_GEN(dev_priv) < 4) {
+ DRM_DEBUG("no rel constants on pre-gen4\n");
+ return -EINVAL;
}
- if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
- ret = i915_reset_gen7_sol_offsets(params->request);
- if (ret)
- return ret;
+ if (INTEL_GEN(dev_priv) > 5 &&
+ mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
+ DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
+ return -EINVAL;
}
- exec_len = args->batch_len;
- exec_start = params->batch->node.start +
- params->args_batch_start_offset;
+ /* The HW changed the meaning on this bit on gen6 */
+ mask = I915_EXEC_CONSTANTS_MASK;
+ if (INTEL_GEN(dev_priv) >= 6)
+ mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
+
+ ret = intel_ring_begin(eb->request, 4);
+ if (ret)
+ return ret;
+
+ ring = eb->request->ring;
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit_reg(ring, INSTPM);
+ intel_ring_emit(ring, mask << 16 | mode);
+ intel_ring_advance(ring);
+
+ dev_priv->relative_constants_mode = mode;
+
+ return 0;
+}
+
+static int
+eb_submit(struct i915_execbuffer *eb)
+{
+ int ret;
+
+ ret = eb_move_to_gpu(eb);
+ if (ret)
+ return ret;
- if (exec_len == 0)
- exec_len = params->batch->size;
+ ret = i915_switch_context(eb->request);
+ if (ret)
+ return ret;
- ret = params->engine->emit_bb_start(params->request,
- exec_start, exec_len,
- params->dispatch_flags);
+ ret = eb_set_constants_offset(eb);
if (ret)
return ret;
- trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
+ if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
+ ret = i915_reset_gen7_sol_offsets(eb->request);
+ if (ret)
+ return ret;
+ }
+
+ ret = eb->engine->emit_bb_start(eb->request,
+ eb->batch->node.start +
+ eb->args->batch_start_offset,
+ eb->args->batch_len,
+ eb->dispatch_flags);
+ if (ret)
+ return ret;
- i915_gem_execbuffer_move_to_active(vmas, params->request);
+ trace_i915_gem_ring_dispatch(eb->request, eb->dispatch_flags);
+ add_to_client(eb->request, eb->file);
return 0;
}
@@ -1507,148 +1851,160 @@ eb_select_engine(struct drm_i915_private *dev_priv,
return engine;
}
+static int eb_add_fence(struct i915_execbuffer *eb, struct fence *fence)
+{
+ struct fence_array *array;
+ int ret, i;
+
+ if (fence_is_i915(fence))
+ return __eb_sync(eb->request, to_request(fence));
+
+ if (!fence_is_array(fence))
+ return kfence_await_dma_fence(&eb->request->submit, fence,
+ GFP_KERNEL);
+
+ array = to_fence_array(fence);
+ for (i = 0; i < array->num_fences; i++) {
+ if (fence_is_i915(fence))
+ ret = __eb_sync(eb->request, to_request(fence));
+ else
+ ret = kfence_await_dma_fence(&eb->request->submit,
+ fence, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
static int
-i915_gem_do_execbuffer(struct drm_device *dev, void *data,
+i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_file *file,
struct drm_i915_gem_execbuffer2 *args,
struct drm_i915_gem_exec_object2 *exec)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
- struct eb_vmas *eb;
- struct drm_i915_gem_exec_object2 shadow_exec_entry;
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
- struct i915_address_space *vm;
- struct i915_execbuffer_params params_master; /* XXX: will be removed later */
- struct i915_execbuffer_params *params = ¶ms_master;
- const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
- u32 dispatch_flags;
+ struct i915_execbuffer eb;
+ struct fence *in_fence = NULL;
+ struct sync_file *out_fence = NULL;
+ int out_fence_fd = -1;
int ret;
- bool need_relocs;
+
+ BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
if (!i915_gem_check_execbuffer(args))
return -EINVAL;
- ret = validate_exec_list(dev, exec, args->buffer_count);
- if (ret)
- return ret;
-
- dispatch_flags = 0;
+ eb.i915 = to_i915(dev);
+ eb.file = file;
+ eb.args = args;
+ if ((args->flags & I915_EXEC_NO_RELOC) == 0)
+ args->flags |= __EXEC_HAS_RELOC;
+ eb.exec = NULL;
+ eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
+ if (USES_FULL_PPGTT(eb.i915))
+ eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
+ reloc_cache_init(&eb.reloc_cache, eb.i915);
+
+ eb.dispatch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
return -EPERM;
- dispatch_flags |= I915_DISPATCH_SECURE;
+ eb.dispatch_flags |= I915_DISPATCH_SECURE;
}
if (args->flags & I915_EXEC_IS_PINNED)
- dispatch_flags |= I915_DISPATCH_PINNED;
+ eb.dispatch_flags |= I915_DISPATCH_PINNED;
- engine = eb_select_engine(dev_priv, file, args);
- if (!engine)
+ eb.engine = eb_select_engine(eb.i915, file, args);
+ if (!eb.engine)
return -EINVAL;
- if (args->buffer_count < 1) {
- DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
- return -EINVAL;
- }
-
if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
- if (!HAS_RESOURCE_STREAMER(dev)) {
+ if (!HAS_RESOURCE_STREAMER(eb.i915)) {
DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
return -EINVAL;
}
- if (engine->id != RCS) {
+ if (eb.engine->id != RCS) {
DRM_DEBUG("RS is not available on %s\n",
- engine->name);
+ eb.engine->name);
return -EINVAL;
}
- dispatch_flags |= I915_DISPATCH_RS;
+ eb.dispatch_flags |= I915_DISPATCH_RS;
}
+ if (eb_create(&eb))
+ return -ENOMEM;
+
/* Take a local wakeref for preparing to dispatch the execbuf as
* we expect to access the hardware fairly frequently in the
* process. Upon first dispatch, we acquire another prolonged
* wakeref that we hold until the GPU has been idle for at least
* 100ms.
*/
- intel_runtime_pm_get(dev_priv);
-
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- goto pre_mutex_err;
+ intel_runtime_pm_get(eb.i915);
- ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
- if (IS_ERR(ctx)) {
- mutex_unlock(&dev->struct_mutex);
- ret = PTR_ERR(ctx);
- goto pre_mutex_err;
+ if (args->flags & I915_EXEC_FENCE_IN) {
+ in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+ if (!in_fence) {
+ ret = -EINVAL;
+ goto pre_mutex_err;
+ }
}
- i915_gem_context_get(ctx);
-
- if (ctx->ppgtt)
- vm = &ctx->ppgtt->base;
- else
- vm = &ggtt->base;
-
- memset(¶ms_master, 0x00, sizeof(params_master));
-
- eb = eb_create(args);
- if (eb == NULL) {
- i915_gem_context_put(ctx);
- mutex_unlock(&dev->struct_mutex);
- ret = -ENOMEM;
- goto pre_mutex_err;
+ if (args->flags & I915_EXEC_FENCE_OUT) {
+ out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (out_fence_fd < 0) {
+ ret = out_fence_fd;
+ goto pre_mutex_err;
+ }
}
- /* Look up object handles */
- ret = eb_lookup_vmas(eb, exec, args, vm, file);
+ ret = i915_mutex_lock_interruptible(dev);
if (ret)
- goto err;
-
- /* take note of the batch buffer before we might reorder the lists */
- params->batch = eb_get_batch(eb);
+ goto pre_mutex_err;
- /* Move the objects en-masse into the GTT, evicting if necessary. */
- need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
- ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
- &need_relocs);
+ /* Look up object handles */
+ eb.exec = exec;
+ ret = eb_lookup_vmas(&eb);
if (ret)
goto err;
/* The objects are in their final locations, apply the relocations. */
- if (need_relocs)
- ret = i915_gem_execbuffer_relocate(eb);
- if (ret) {
- if (ret == -EFAULT) {
- ret = i915_gem_execbuffer_relocate_slow(dev, args, file,
- engine,
- eb, exec, ctx);
- BUG_ON(!mutex_is_locked(&dev->struct_mutex));
- }
- if (ret)
+ if (args->flags & __EXEC_HAS_RELOC && !list_empty(&eb.relocs)) {
+ ret = eb_relocate(&eb);
+ if (ret == -EAGAIN || ret == -EFAULT)
+ ret = eb_relocate_slow(&eb);
+ if (ret) {
+ /* If the user expects the execobject.offset and
+ * reloc.presumed_offset to be an exact match,
+ * as for using NO_RELOC, then we cannot update
+ * the execobject.offset until we have completed
+ * relocation.
+ */
+ if (args->flags & I915_EXEC_NO_RELOC)
+ args->flags &= ~__EXEC_HAS_RELOC;
goto err;
+ }
}
/* Set the pending read domains for the batch buffer to COMMAND */
- if (params->batch->obj->base.pending_write_domain) {
+ if (eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE) {
DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
ret = -EINVAL;
goto err;
}
+ if (args->batch_start_offset > eb.batch->size ||
+ args->batch_len > eb.batch->size - args->batch_start_offset) {
+ DRM_DEBUG("Attempting to use out-of-bounds batch\n");
+ ret = -EINVAL;
+ goto err;
+ }
- params->args_batch_start_offset = args->batch_start_offset;
- if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
+ if (intel_engine_needs_cmd_parser(eb.engine) && args->batch_len) {
struct i915_vma *vma;
- vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
- params->batch->obj,
- eb,
- args->batch_start_offset,
- args->batch_len,
- drm_is_current_master(file));
+ vma = eb_parse(&eb, drm_is_current_master(file));
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
@@ -1664,19 +2020,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* specifically don't want that set on batches the
* command parser has accepted.
*/
- dispatch_flags |= I915_DISPATCH_SECURE;
- params->args_batch_start_offset = 0;
- params->batch = vma;
+ eb.dispatch_flags |= I915_DISPATCH_SECURE;
+ eb.args->batch_start_offset = 0;
+ eb.batch = vma;
}
}
- params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
-
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
- if (dispatch_flags & I915_DISPATCH_SECURE) {
- struct drm_i915_gem_object *obj = params->batch->obj;
+ if (eb.dispatch_flags & I915_DISPATCH_SECURE) {
+ struct i915_vma *vma;
/*
* So on first glance it looks freaky that we pin the batch here
@@ -1688,39 +2042,60 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* fitting due to fragmentation.
* So this is actually safe.
*/
- ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
- if (ret)
+ vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL,
+ 0, 0, 0);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto err;
+ }
- params->batch = i915_gem_obj_to_ggtt(obj);
+ eb.batch = vma;
}
+ if (args->batch_len == 0)
+ args->batch_len = eb.batch->size - eb.args->batch_start_offset;
+
/* Allocate a request for this batch buffer nice and early. */
- params->request = i915_gem_request_alloc(engine, ctx);
- if (IS_ERR(params->request)) {
- ret = PTR_ERR(params->request);
+ eb.request = i915_gem_request_alloc(eb.engine, eb.ctx);
+ if (IS_ERR(eb.request)) {
+ ret = PTR_ERR(eb.request);
goto err_batch_unpin;
}
- ret = i915_gem_request_add_to_client(params->request, file);
- if (ret)
- goto err_request;
+ if (in_fence) {
+ ret = eb_add_fence(&eb, in_fence);
+ if (ret < 0)
+ goto err_request;
+ }
- /*
- * Save assorted stuff away to pass through to *_submission().
- * NB: This data should be 'persistent' and not local as it will
- * kept around beyond the duration of the IOCTL once the GPU
- * scheduler arrives.
+ if (out_fence_fd != -1) {
+ out_fence = sync_file_create(fence_get(&eb.request->fence));
+ if (!out_fence) {
+ ret = -ENOMEM;
+ goto err_request;
+ }
+ }
+
+ /* Whilst this request exists, batch_obj will be on the
+ * active_list, and so will hold the active reference. Only when this
+ * request is retired will the the batch_obj be moved onto the
+ * inactive_list and lose its active reference. Hence we do not need
+ * to explicitly hold another reference here.
*/
- params->dev = dev;
- params->file = file;
- params->engine = engine;
- params->dispatch_flags = dispatch_flags;
- params->ctx = ctx;
+ eb.request->batch = eb.batch;
- ret = execbuf_submit(params, args, &eb->vmas);
+ ret = eb_submit(&eb);
err_request:
- __i915_add_request(params->request, params->batch->obj, ret == 0);
+ __i915_add_request(eb.request, ret == 0);
+ if (out_fence) {
+ if (ret == 0) {
+ fd_install(out_fence_fd, out_fence->file);
+ args->rsvd2 &= ~(u64)0xffffffff << 32;
+ args->rsvd2 |= (u64)out_fence_fd << 32;
+ out_fence_fd = -1;
+ } else
+ fput(out_fence->file);
+ }
err_batch_unpin:
/*
@@ -1729,19 +2104,20 @@ err_batch_unpin:
* needs to be adjusted to also track the ggtt batch vma properly as
* active.
*/
- if (dispatch_flags & I915_DISPATCH_SECURE)
- i915_vma_unpin(params->batch);
+ if (eb.dispatch_flags & I915_DISPATCH_SECURE)
+ i915_vma_unpin(eb.batch);
err:
/* the request owns the ref now */
- i915_gem_context_put(ctx);
- eb_destroy(eb);
-
+ eb_release_vma(&eb);
mutex_unlock(&dev->struct_mutex);
-
pre_mutex_err:
+ if (out_fence_fd != -1)
+ put_unused_fd(out_fence_fd);
+ fence_put(in_fence);
/* intel_gpu_busy should also get a ref, so it will free when the device
* is really idle. */
- intel_runtime_pm_put(dev_priv);
+ intel_runtime_pm_put(eb.i915);
+ eb_destroy(&eb);
return ret;
}
@@ -1765,8 +2141,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
}
/* Copy in the exec list from userland */
- exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
- exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
+ exec_list = drm_malloc_gfp(args->buffer_count,
+ sizeof(*exec_list),
+ __GFP_NOWARN | GFP_TEMPORARY);
+ exec2_list = drm_malloc_gfp(args->buffer_count + 1,
+ sizeof(*exec2_list),
+ __GFP_NOWARN | GFP_TEMPORARY);
if (exec_list == NULL || exec2_list == NULL) {
DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
args->buffer_count);
@@ -1791,7 +2171,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
exec2_list[i].alignment = exec_list[i].alignment;
exec2_list[i].offset = exec_list[i].offset;
- if (INTEL_INFO(dev)->gen < 4)
+ if (INTEL_GEN(dev) < 4)
exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
else
exec2_list[i].flags = 0;
@@ -1808,25 +2188,23 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
exec2.flags = I915_EXEC_RENDER;
i915_execbuffer2_set_context_id(exec2, 0);
- ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
- if (!ret) {
+ ret = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
+ if (exec2.flags & __EXEC_HAS_RELOC) {
struct drm_i915_gem_exec_object __user *user_exec_list =
u64_to_user_ptr(args->buffers_ptr);
/* Copy the new buffer offsets back to the user's exec list. */
for (i = 0; i < args->buffer_count; i++) {
+ if ((exec2_list[i].offset & UPDATE) == 0)
+ continue;
+
exec2_list[i].offset =
- gen8_canonical_addr(exec2_list[i].offset);
- ret = __copy_to_user(&user_exec_list[i].offset,
- &exec2_list[i].offset,
- sizeof(user_exec_list[i].offset));
- if (ret) {
- ret = -EFAULT;
- DRM_DEBUG("failed to copy %d exec entries "
- "back to user (%d)\n",
- args->buffer_count, ret);
+ gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
+ exec2_list[i].offset &= PIN_OFFSET_MASK;
+ if (__copy_to_user(&user_exec_list[i].offset,
+ &exec2_list[i].offset,
+ sizeof(user_exec_list[i].offset)))
break;
- }
}
}
@@ -1840,59 +2218,50 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_gem_execbuffer2 *args = data;
- struct drm_i915_gem_exec_object2 *exec2_list = NULL;
+ struct drm_i915_gem_exec_object2 *exec2_list;
int ret;
if (args->buffer_count < 1 ||
- args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
+ args->buffer_count >= UINT_MAX / sizeof(*exec2_list)) {
DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
return -EINVAL;
}
- if (args->rsvd2 != 0) {
- DRM_DEBUG("dirty rvsd2 field\n");
- return -EINVAL;
- }
-
- exec2_list = drm_malloc_gfp(args->buffer_count,
+ exec2_list = drm_malloc_gfp(args->buffer_count + 1,
sizeof(*exec2_list),
- GFP_TEMPORARY);
+ __GFP_NOWARN | GFP_TEMPORARY);
if (exec2_list == NULL) {
DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
args->buffer_count);
return -ENOMEM;
}
- ret = copy_from_user(exec2_list,
- u64_to_user_ptr(args->buffers_ptr),
- sizeof(*exec2_list) * args->buffer_count);
- if (ret != 0) {
- DRM_DEBUG("copy %d exec entries failed %d\n",
- args->buffer_count, ret);
+ if (copy_from_user(exec2_list,
+ u64_to_user_ptr(args->buffers_ptr),
+ sizeof(*exec2_list) * args->buffer_count)) {
+ DRM_DEBUG("copy %d exec entries failed\n", args->buffer_count);
drm_free_large(exec2_list);
return -EFAULT;
}
- ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
- if (!ret) {
+ ret = i915_gem_do_execbuffer(dev, file, args, exec2_list);
+ if (args->flags & __EXEC_HAS_RELOC) {
/* Copy the new buffer offsets back to the user's exec list. */
struct drm_i915_gem_exec_object2 __user *user_exec_list =
- u64_to_user_ptr(args->buffers_ptr);
+ u64_to_user_ptr(args->buffers_ptr);
int i;
+ user_access_begin();
for (i = 0; i < args->buffer_count; i++) {
+ if ((exec2_list[i].offset & UPDATE) == 0)
+ continue;
+
exec2_list[i].offset =
- gen8_canonical_addr(exec2_list[i].offset);
- ret = __copy_to_user(&user_exec_list[i].offset,
- &exec2_list[i].offset,
- sizeof(user_exec_list[i].offset));
- if (ret) {
- ret = -EFAULT;
- DRM_DEBUG("failed to copy %d exec entries "
- "back to user\n",
- args->buffer_count);
+ gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
+ if (unsafe_put_user(exec2_list[i].offset,
+ &user_exec_list[i].offset))
break;
- }
}
+ user_access_end();
}
drm_free_large(exec2_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 9e8173fe2a09..34d38817b7a9 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -55,87 +55,82 @@
* CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
*/
-static void i965_write_fence_reg(struct drm_device *dev, int reg,
- struct drm_i915_gem_object *obj)
+static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
i915_reg_t fence_reg_lo, fence_reg_hi;
int fence_pitch_shift;
+ u64 val;
- if (INTEL_INFO(dev)->gen >= 6) {
- fence_reg_lo = FENCE_REG_GEN6_LO(reg);
- fence_reg_hi = FENCE_REG_GEN6_HI(reg);
+ if (INTEL_INFO(fence->i915)->gen >= 6) {
+ fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
+ fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
+
} else {
- fence_reg_lo = FENCE_REG_965_LO(reg);
- fence_reg_hi = FENCE_REG_965_HI(reg);
+ fence_reg_lo = FENCE_REG_965_LO(fence->id);
+ fence_reg_hi = FENCE_REG_965_HI(fence->id);
fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
}
- /* To w/a incoherency with non-atomic 64-bit register updates,
- * we split the 64-bit update into two 32-bit writes. In order
- * for a partial fence not to be evaluated between writes, we
- * precede the update with write to turn off the fence register,
- * and only enable the fence as the last step.
- *
- * For extra levels of paranoia, we make sure each step lands
- * before applying the next step.
- */
- I915_WRITE(fence_reg_lo, 0);
- POSTING_READ(fence_reg_lo);
-
- if (obj) {
- u32 size = i915_gem_obj_ggtt_size(obj);
- unsigned int tiling = i915_gem_object_get_tiling(obj);
- unsigned int stride = i915_gem_object_get_stride(obj);
- uint64_t val;
-
- /* Adjust fence size to match tiled area */
- if (tiling != I915_TILING_NONE) {
- uint32_t row_size = stride *
- (tiling == I915_TILING_Y ? 32 : 8);
- size = (size / row_size) * row_size;
- }
-
- val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
- 0xfffff000) << 32;
- val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
- val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift;
- if (tiling == I915_TILING_Y)
+ if (vma) {
+ unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+ unsigned int tiling_y = tiling == I915_TILING_Y;
+ unsigned int stride = i915_gem_object_get_stride(vma->obj);
+ u32 row_size = stride * (tiling_y ? 32 : 8);
+ u32 size = (u32)vma->node.size / row_size * row_size;
+
+ val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
+ val |= vma->node.start & 0xfffff000;
+ val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
+ if (tiling_y)
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
val |= I965_FENCE_REG_VALID;
+ } else
+ val = 0;
+
+ if (1) {
+ struct drm_i915_private *dev_priv = fence->i915;
- I915_WRITE(fence_reg_hi, val >> 32);
- POSTING_READ(fence_reg_hi);
+ /* To w/a incoherency with non-atomic 64-bit register updates,
+ * we split the 64-bit update into two 32-bit writes. In order
+ * for a partial fence not to be evaluated between writes, we
+ * precede the update with write to turn off the fence register,
+ * and only enable the fence as the last step.
+ *
+ * For extra levels of paranoia, we make sure each step lands
+ * before applying the next step.
+ */
+ I915_WRITE(fence_reg_lo, 0);
+ POSTING_READ(fence_reg_lo);
- I915_WRITE(fence_reg_lo, val);
+ I915_WRITE(fence_reg_hi, upper_32_bits(val));
+ I915_WRITE(fence_reg_lo, lower_32_bits(val));
POSTING_READ(fence_reg_lo);
- } else {
- I915_WRITE(fence_reg_hi, 0);
- POSTING_READ(fence_reg_hi);
}
}
-static void i915_write_fence_reg(struct drm_device *dev, int reg,
- struct drm_i915_gem_object *obj)
+static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
u32 val;
- if (obj) {
- u32 size = i915_gem_obj_ggtt_size(obj);
- unsigned int tiling = i915_gem_object_get_tiling(obj);
- unsigned int stride = i915_gem_object_get_stride(obj);
+ if (vma) {
+ unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+ unsigned int tiling_y = tiling == I915_TILING_Y;
+ unsigned int stride = i915_gem_object_get_stride(vma->obj);
int pitch_val;
int tile_width;
- WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
- (size & -size) != size ||
- (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
- "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
- i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
+ WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
+ !is_power_of_2(vma->node.size) ||
+ (vma->node.start & (vma->node.size - 1)),
+ "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08lx) aligned\n",
+ (long)vma->node.start,
+ i915_vma_is_map_and_fenceable(vma),
+ (long)vma->node.size);
- if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
+ if (tiling_y && HAS_128_BYTE_Y_TILING(fence->i915))
tile_width = 128;
else
tile_width = 512;
@@ -144,139 +139,141 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
pitch_val = stride / tile_width;
pitch_val = ffs(pitch_val) - 1;
- val = i915_gem_obj_ggtt_offset(obj);
- if (tiling == I915_TILING_Y)
+ val = vma->node.start;
+ if (tiling_y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
- val |= I915_FENCE_SIZE_BITS(size);
+ val |= I915_FENCE_SIZE_BITS(vma->node.size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
val |= I830_FENCE_REG_VALID;
} else
val = 0;
- I915_WRITE(FENCE_REG(reg), val);
- POSTING_READ(FENCE_REG(reg));
+ if (1) {
+ struct drm_i915_private *dev_priv = fence->i915;
+ i915_reg_t reg = FENCE_REG(fence->id);
+
+ I915_WRITE(reg, val);
+ POSTING_READ(reg);
+ }
}
-static void i830_write_fence_reg(struct drm_device *dev, int reg,
- struct drm_i915_gem_object *obj)
+static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- uint32_t val;
+ u32 val;
- if (obj) {
- u32 size = i915_gem_obj_ggtt_size(obj);
- unsigned int tiling = i915_gem_object_get_tiling(obj);
- unsigned int stride = i915_gem_object_get_stride(obj);
+ if (vma) {
+ unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+ unsigned int stride = i915_gem_object_get_stride(vma->obj);
uint32_t pitch_val;
- WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
- (size & -size) != size ||
- (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
- "object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
- i915_gem_obj_ggtt_offset(obj), size);
+ WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
+ !is_power_of_2(vma->node.size) ||
+ (vma->node.start & (vma->node.size - 1)),
+ "object 0x%08lx not 512K or pot-size 0x%08lx aligned\n",
+ (long)vma->node.start, (long)vma->node.size);
pitch_val = stride / 128;
pitch_val = ffs(pitch_val) - 1;
- val = i915_gem_obj_ggtt_offset(obj);
+ val = vma->node.start;
if (tiling == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
- val |= I830_FENCE_SIZE_BITS(size);
+ val |= I830_FENCE_SIZE_BITS(vma->node.size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
val |= I830_FENCE_REG_VALID;
} else
val = 0;
- I915_WRITE(FENCE_REG(reg), val);
- POSTING_READ(FENCE_REG(reg));
-}
+ if (1) {
+ struct drm_i915_private *dev_priv = fence->i915;
+ i915_reg_t reg = FENCE_REG(fence->id);
-inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
-{
- return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
+ I915_WRITE(reg, val);
+ POSTING_READ(reg);
+ }
}
-static void i915_gem_write_fence(struct drm_device *dev, int reg,
- struct drm_i915_gem_object *obj)
+static void fence_write(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
-
- /* Ensure that all CPU reads are completed before installing a fence
- * and all writes before removing the fence.
+ /* Previous access through the fence register is marshalled by
+ * the mb() inside the fault handlers (i915_gem_release_mmaps)
+ * and explicitly managed for internal users.
*/
- if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
- mb();
-
- WARN(obj &&
- (!i915_gem_object_get_stride(obj) ||
- !i915_gem_object_get_tiling(obj)),
- "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
- i915_gem_object_get_stride(obj),
- i915_gem_object_get_tiling(obj));
-
- if (IS_GEN2(dev))
- i830_write_fence_reg(dev, reg, obj);
- else if (IS_GEN3(dev))
- i915_write_fence_reg(dev, reg, obj);
- else if (INTEL_INFO(dev)->gen >= 4)
- i965_write_fence_reg(dev, reg, obj);
-
- /* And similarly be paranoid that no direct access to this region
- * is reordered to before the fence is installed.
+
+ if (IS_GEN2(fence->i915))
+ i830_write_fence_reg(fence, vma);
+ else if (IS_GEN3(fence->i915))
+ i915_write_fence_reg(fence, vma);
+ else
+ i965_write_fence_reg(fence, vma);
+
+ /* Access through the fenced region afterwards is
+ * ordered by the posting reads whilst writing the registers.
*/
- if (i915_gem_object_needs_mb(obj))
- mb();
-}
-static inline int fence_number(struct drm_i915_private *dev_priv,
- struct drm_i915_fence_reg *fence)
-{
- return fence - dev_priv->fence_regs;
+ fence->dirty = false;
}
-static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
- struct drm_i915_fence_reg *fence,
- bool enable)
+static int fence_update(struct drm_i915_fence_reg *fence,
+ struct i915_vma *vma)
{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- int reg = fence_number(dev_priv, fence);
+ int ret;
- i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
+ if (vma) {
+ if (!i915_vma_is_map_and_fenceable(vma))
+ return -EINVAL;
- if (enable) {
- obj->fence_reg = reg;
- fence->obj = obj;
- list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
- } else {
- obj->fence_reg = I915_FENCE_REG_NONE;
- fence->obj = NULL;
- list_del_init(&fence->lru_list);
+ if (WARN(!i915_gem_object_get_stride(vma->obj) ||
+ !i915_gem_object_get_tiling(vma->obj),
+ "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+ i915_gem_object_get_stride(vma->obj),
+ i915_gem_object_get_tiling(vma->obj)))
+ return -EINVAL;
+
+ ret = i915_gem_active_retire(&vma->last_fence,
+ &vma->obj->base.dev->struct_mutex);
+ if (ret)
+ return ret;
}
- obj->fence_dirty = false;
-}
-static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
-{
- if (i915_gem_object_is_tiled(obj))
- i915_gem_release_mmap(obj);
+ if (fence->vma) {
+ ret = i915_gem_active_retire(&fence->vma->last_fence,
+ &fence->vma->obj->base.dev->struct_mutex);
+ if (ret)
+ return ret;
+ }
- /* As we do not have an associated fence register, we will force
- * a tiling change if we ever need to acquire one.
- */
- obj->fence_dirty = false;
- obj->fence_reg = I915_FENCE_REG_NONE;
-}
+ if (fence->vma && fence->vma != vma) {
+ /* Ensure that all userspace CPU access is completed before
+ * stealing the fence.
+ */
+ i915_gem_release_mmap(fence->vma->obj);
-static int
-i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
-{
- return i915_gem_active_retire(&obj->last_fence,
- &obj->base.dev->struct_mutex);
+ fence->vma->fence = NULL;
+ fence->vma = NULL;
+
+ list_move(&fence->lru_list, &fence->i915->mm.fence_list);
+ }
+
+ fence_write(fence, vma);
+
+ if (vma) {
+ if (fence->vma != vma) {
+ vma->fence = fence;
+ fence->vma = vma;
+ }
+
+ list_move_tail(&fence->lru_list, &fence->i915->mm.fence_list);
+ }
+
+ return 0;
}
/**
- * i915_gem_object_put_fence - force-remove fence for an object
- * @obj: object to map through a fence reg
+ * i915_vma_put_fence - force-remove fence for a VMA
+ * @vma: vma to map linearly (not through a fence reg)
*
* This function force-removes any fence from the given object, which is useful
* if the kernel wants to do untiled GTT access.
@@ -286,70 +283,40 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
* 0 on success, negative error code on failure.
*/
int
-i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
+i915_vma_put_fence(struct i915_vma *vma)
{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- struct drm_i915_fence_reg *fence;
- int ret;
-
- ret = i915_gem_object_wait_fence(obj);
- if (ret)
- return ret;
+ struct drm_i915_fence_reg *fence = vma->fence;
- if (obj->fence_reg == I915_FENCE_REG_NONE)
+ if (!fence)
return 0;
- fence = &dev_priv->fence_regs[obj->fence_reg];
-
if (WARN_ON(fence->pin_count))
return -EBUSY;
- i915_gem_object_fence_lost(obj);
- i915_gem_object_update_fence(obj, fence, false);
-
- return 0;
+ return fence_update(fence, NULL);
}
-static struct drm_i915_fence_reg *
-i915_find_fence_reg(struct drm_device *dev)
+static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct drm_i915_fence_reg *reg, *avail;
- int i;
-
- /* First try to find a free reg */
- avail = NULL;
- for (i = 0; i < dev_priv->num_fence_regs; i++) {
- reg = &dev_priv->fence_regs[i];
- if (!reg->obj)
- return reg;
-
- if (!reg->pin_count)
- avail = reg;
- }
-
- if (avail == NULL)
- goto deadlock;
+ struct drm_i915_fence_reg *fence;
- /* None available, try to steal one or wait for a user to finish */
- list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
- if (reg->pin_count)
+ list_for_each_entry(fence, &dev_priv->mm.fence_list, lru_list) {
+ if (fence->pin_count)
continue;
- return reg;
+ return fence;
}
-deadlock:
/* Wait for completion of pending flips which consume fences */
- if (intel_has_pending_fb_unpin(dev))
+ if (intel_has_pending_fb_unpin(&dev_priv->drm))
return ERR_PTR(-EAGAIN);
return ERR_PTR(-EDEADLK);
}
/**
- * i915_gem_object_get_fence - set up fencing for an object
- * @obj: object to map through a fence reg
+ * i915_vma_get_fence - set up fencing for a vma
+ * @vma: vma to map through a fence reg
*
* When mapping objects through the GTT, userspace wants to be able to write
* to them without having to worry about swizzling if the object is tiled.
@@ -366,103 +333,27 @@ deadlock:
* 0 on success, negative error code on failure.
*/
int
-i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
+i915_vma_get_fence(struct i915_vma *vma)
{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- bool enable = i915_gem_object_is_tiled(obj);
- struct drm_i915_fence_reg *reg;
- int ret;
-
- /* Have we updated the tiling parameters upon the object and so
- * will need to serialise the write to the associated fence register?
- */
- if (obj->fence_dirty) {
- ret = i915_gem_object_wait_fence(obj);
- if (ret)
- return ret;
- }
+ struct drm_i915_fence_reg *fence;
+ struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
/* Just update our place in the LRU if our fence is getting reused. */
- if (obj->fence_reg != I915_FENCE_REG_NONE) {
- reg = &dev_priv->fence_regs[obj->fence_reg];
- if (!obj->fence_dirty) {
- list_move_tail(®->lru_list,
- &dev_priv->mm.fence_list);
+ if (vma->fence) {
+ fence = vma->fence;
+ if (!fence->dirty) {
+ list_move_tail(&fence->lru_list,
+ &fence->i915->mm.fence_list);
return 0;
}
- } else if (enable) {
- if (WARN_ON(!obj->map_and_fenceable))
- return -EINVAL;
-
- reg = i915_find_fence_reg(dev);
- if (IS_ERR(reg))
- return PTR_ERR(reg);
-
- if (reg->obj) {
- struct drm_i915_gem_object *old = reg->obj;
-
- ret = i915_gem_object_wait_fence(old);
- if (ret)
- return ret;
-
- i915_gem_object_fence_lost(old);
- }
+ } else if (set) {
+ fence = fence_find(to_i915(vma->vm->dev));
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
} else
return 0;
- i915_gem_object_update_fence(obj, reg, enable);
-
- return 0;
-}
-
-/**
- * i915_gem_object_pin_fence - pin fencing state
- * @obj: object to pin fencing for
- *
- * This pins the fencing state (whether tiled or untiled) to make sure the
- * object is ready to be used as a scanout target. Fencing status must be
- * synchronize first by calling i915_gem_object_get_fence():
- *
- * The resulting fence pin reference must be released again with
- * i915_gem_object_unpin_fence().
- *
- * Returns:
- *
- * True if the object has a fence, false otherwise.
- */
-bool
-i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
-{
- if (obj->fence_reg != I915_FENCE_REG_NONE) {
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
-
- WARN_ON(!ggtt_vma ||
- dev_priv->fence_regs[obj->fence_reg].pin_count >
- i915_vma_pin_count(ggtt_vma));
- dev_priv->fence_regs[obj->fence_reg].pin_count++;
- return true;
- } else
- return false;
-}
-
-/**
- * i915_gem_object_unpin_fence - unpin fencing state
- * @obj: object to unpin fencing for
- *
- * This releases the fence pin reference acquired through
- * i915_gem_object_pin_fence. It will handle both objects with and without an
- * attached fence correctly, callers do not need to distinguish this.
- */
-void
-i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
-{
- if (obj->fence_reg != I915_FENCE_REG_NONE) {
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
- dev_priv->fence_regs[obj->fence_reg].pin_count--;
- }
+ return fence_update(fence, set);
}
/**
@@ -484,12 +375,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
* Commit delayed tiling changes if we have an object still
* attached to the fence, otherwise just clear the fence.
*/
- if (reg->obj) {
- i915_gem_object_update_fence(reg->obj, reg,
- i915_gem_object_get_tiling(reg->obj));
- } else {
- i915_gem_write_fence(dev, i, NULL);
- }
+ fence_write(reg, reg->vma);
}
}
@@ -745,7 +631,8 @@ i915_gem_swizzle_page(struct page *page)
* by swapping them out and back in again).
*/
void
-i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
+i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
{
struct sgt_iter sgt_iter;
struct page *page;
@@ -755,10 +642,9 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
return;
i = 0;
- for_each_sgt_page(page, sgt_iter, obj->pages) {
+ for_each_sgt_page(page, sgt_iter, pages) {
char new_bit_17 = page_to_phys(page) >> 17;
- if ((new_bit_17 & 0x1) !=
- (test_bit(i, obj->bit_17) != 0)) {
+ if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
i915_gem_swizzle_page(page);
set_page_dirty(page);
}
@@ -775,15 +661,15 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
* be called before the backing storage can be unpinned.
*/
void
-i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
+i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
{
struct sgt_iter sgt_iter;
struct page *page;
- int page_count = obj->base.size >> PAGE_SHIFT;
int i;
if (obj->bit_17 == NULL) {
- obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
+ obj->bit_17 = kcalloc(BITS_TO_LONGS(obj->base.size >> PAGE_SHIFT),
sizeof(long), GFP_KERNEL);
if (obj->bit_17 == NULL) {
DRM_ERROR("Failed to allocate memory for bit 17 "
@@ -794,7 +680,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
i = 0;
- for_each_sgt_page(page, sgt_iter, obj->pages) {
+ for_each_sgt_page(page, sgt_iter, pages) {
if (page_to_phys(page) & (1 << 17))
__set_bit(i, obj->bit_17);
else
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 18c7c9644761..0b7e7a6bb65c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -32,6 +32,8 @@
#include "i915_trace.h"
#include "intel_drv.h"
+#define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
+
/**
* DOC: Global GTT views
*
@@ -110,6 +112,16 @@ const struct i915_ggtt_view i915_ggtt_view_rotated = {
.type = I915_GGTT_VIEW_ROTATED,
};
+static const struct rhashtable_params vma_ht_params = {
+ .head_offset = offsetof(struct i915_vma, obj_rht),
+ .key_len = sizeof(struct i915_address_space *),
+ .key_offset = offsetof(struct i915_vma, vm),
+ .hashfn = jhash,
+ .nulls_base = 1U << RHT_BASE_SHIFT,
+ .automatic_shrinking = true,
+ .nelem_hint = 2,
+};
+
int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
int enable_ppgtt)
{
@@ -174,7 +186,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
if (vma->obj->gt_ro)
pte_flags |= PTE_READ_ONLY;
- vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
+ vma->vm->insert_entries(vma->vm, vma->obj->mm.pages, vma->node.start,
cache_level, pte_flags);
return 0;
@@ -343,7 +355,7 @@ static int __setup_page_dma(struct drm_device *dev,
static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
{
- return __setup_page_dma(dev, p, GFP_KERNEL);
+ return __setup_page_dma(dev, p, I915_GFP_DMA);
}
static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
@@ -405,33 +417,18 @@ static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
fill_page_dma(dev, p, v);
}
-static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
+static int
+setup_scratch_page(struct drm_device *dev,
+ struct i915_page_dma *scratch,
+ gfp_t gfp)
{
- struct i915_page_scratch *sp;
- int ret;
-
- sp = kzalloc(sizeof(*sp), GFP_KERNEL);
- if (sp == NULL)
- return ERR_PTR(-ENOMEM);
-
- ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
- if (ret) {
- kfree(sp);
- return ERR_PTR(ret);
- }
-
- set_pages_uc(px_page(sp), 1);
-
- return sp;
+ return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO);
}
-static void free_scratch_page(struct drm_device *dev,
- struct i915_page_scratch *sp)
+static void cleanup_scratch_page(struct drm_device *dev,
+ struct i915_page_dma *scratch)
{
- set_pages_wb(px_page(sp), 1);
-
- cleanup_px(dev, sp);
- kfree(sp);
+ cleanup_page_dma(dev, scratch);
}
static struct i915_page_table *alloc_pt(struct drm_device *dev)
@@ -477,7 +474,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
{
gen8_pte_t scratch_pte;
- scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+ scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true);
fill_px(vm->dev, pt, scratch_pte);
@@ -488,9 +485,9 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
{
gen6_pte_t scratch_pte;
- WARN_ON(px_dma(vm->scratch_page) == 0);
+ WARN_ON(vm->scratch_page.daddr == 0);
- scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
+ scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true, 0);
fill32_px(vm->dev, pt, scratch_pte);
@@ -774,7 +771,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
bool use_scratch)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+ gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, use_scratch);
if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
@@ -794,42 +791,46 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
static void
gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
struct i915_page_directory_pointer *pdp,
- struct sg_page_iter *sg_iter,
+ struct sgt_iter *sgt_iter,
uint64_t start,
enum i915_cache_level cache_level)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ unsigned int pdpe = gen8_pdpe_index(start);
+ unsigned int pde = gen8_pde_index(start);
+ unsigned int pte = gen8_pte_index(start);
+ struct i915_page_directory *pd;
+ gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, true);
gen8_pte_t *pt_vaddr;
- unsigned pdpe = gen8_pdpe_index(start);
- unsigned pde = gen8_pde_index(start);
- unsigned pte = gen8_pte_index(start);
- pt_vaddr = NULL;
+ pd = pdp->page_directory[pdpe];
+ pt_vaddr = kmap_px(pd->page_table[pde]);
- while (__sg_page_iter_next(sg_iter)) {
- if (pt_vaddr == NULL) {
- struct i915_page_directory *pd = pdp->page_directory[pdpe];
- struct i915_page_table *pt = pd->page_table[pde];
- pt_vaddr = kmap_px(pt);
+ do {
+ pt_vaddr[pte] = pte_encode | (sgt_iter->dma + sgt_iter->curr);
+ sgt_iter->curr += PAGE_SIZE;
+ if (sgt_iter->curr >= sgt_iter->max) {
+ *sgt_iter = __sgt_iter(__sg_next(sgt_iter->sgp), true);
+ if (sgt_iter->dma == 0)
+ break;
}
- pt_vaddr[pte] =
- gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
- cache_level, true);
if (++pte == GEN8_PTES) {
- kunmap_px(ppgtt, pt_vaddr);
- pt_vaddr = NULL;
if (++pde == I915_PDES) {
if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
break;
+
+ pd = pdp->page_directory[pdpe];
pde = 0;
}
+
+ kunmap_px(ppgtt, pt_vaddr);
+ pt_vaddr = kmap_px(pd->page_table[pde]);
pte = 0;
}
- }
+ } while (1);
- if (pt_vaddr)
- kunmap_px(ppgtt, pt_vaddr);
+ kunmap_px(ppgtt, pt_vaddr);
}
static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
@@ -839,22 +840,19 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
u32 unused)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- struct sg_page_iter sg_iter;
-
- __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
+ struct sgt_iter sgt_iter = __sgt_iter(pages->sgl, true);
if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
- gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
+ gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sgt_iter, start,
cache_level);
} else {
struct i915_page_directory_pointer *pdp;
- uint64_t pml4e;
- uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
+ u64 length = (u64)pages->orig_nents << PAGE_SHIFT;
+ u64 pml4e;
- gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
- gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
+ gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e)
+ gen8_ppgtt_insert_pte_entries(vm, pdp, &sgt_iter,
start, cache_level);
- }
}
}
@@ -880,9 +878,9 @@ static int gen8_init_scratch(struct i915_address_space *vm)
struct drm_device *dev = vm->dev;
int ret;
- vm->scratch_page = alloc_scratch_page(dev);
- if (IS_ERR(vm->scratch_page))
- return PTR_ERR(vm->scratch_page);
+ ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
+ if (ret)
+ return ret;
vm->scratch_pt = alloc_pt(dev);
if (IS_ERR(vm->scratch_pt)) {
@@ -916,7 +914,7 @@ free_pd:
free_pt:
free_pt(dev, vm->scratch_pt);
free_scratch_page:
- free_scratch_page(dev, vm->scratch_page);
+ cleanup_scratch_page(dev, &vm->scratch_page);
return ret;
}
@@ -960,7 +958,7 @@ static void gen8_free_scratch(struct i915_address_space *vm)
free_pdp(dev, vm->scratch_pdp);
free_pd(dev, vm->scratch_pd);
free_pt(dev, vm->scratch_pt);
- free_scratch_page(dev, vm->scratch_page);
+ cleanup_scratch_page(dev, &vm->scratch_page);
}
static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
@@ -1457,7 +1455,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
struct i915_address_space *vm = &ppgtt->base;
uint64_t start = ppgtt->base.start;
uint64_t length = ppgtt->base.total;
- gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+ gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true);
if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
@@ -1574,7 +1572,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
uint32_t pte, pde;
uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
- scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
+ scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true, 0);
gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
@@ -1799,7 +1797,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
unsigned first_pte = first_entry % GEN6_PTES;
unsigned last_pte, i;
- scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
+ scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true, 0);
while (num_entries) {
@@ -1828,28 +1826,22 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
unsigned first_entry = start >> PAGE_SHIFT;
unsigned act_pt = first_entry / GEN6_PTES;
- unsigned act_pte = first_entry % GEN6_PTES;
- gen6_pte_t *pt_vaddr = NULL;
+ unsigned act_pte = first_entry % GEN6_PTES - 1;
+ u32 pte_encode = vm->pte_encode(0, cache_level, true, flags);
struct sgt_iter sgt_iter;
+ gen6_pte_t *pt_vaddr;
dma_addr_t addr;
+ pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
for_each_sgt_dma(addr, sgt_iter, pages) {
- if (pt_vaddr == NULL)
- pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
-
- pt_vaddr[act_pte] =
- vm->pte_encode(addr, cache_level, true, flags);
-
if (++act_pte == GEN6_PTES) {
kunmap_px(ppgtt, pt_vaddr);
- pt_vaddr = NULL;
- act_pt++;
+ pt_vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
act_pte = 0;
}
+ pt_vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(addr);
}
-
- if (pt_vaddr)
- kunmap_px(ppgtt, pt_vaddr);
+ kunmap_px(ppgtt, pt_vaddr);
}
static int gen6_alloc_va_range(struct i915_address_space *vm,
@@ -1945,14 +1937,15 @@ unwind_out:
static int gen6_init_scratch(struct i915_address_space *vm)
{
struct drm_device *dev = vm->dev;
+ int ret;
- vm->scratch_page = alloc_scratch_page(dev);
- if (IS_ERR(vm->scratch_page))
- return PTR_ERR(vm->scratch_page);
+ ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
+ if (ret)
+ return ret;
vm->scratch_pt = alloc_pt(dev);
if (IS_ERR(vm->scratch_pt)) {
- free_scratch_page(dev, vm->scratch_page);
+ cleanup_scratch_page(dev, &vm->scratch_page);
return PTR_ERR(vm->scratch_pt);
}
@@ -1966,7 +1959,7 @@ static void gen6_free_scratch(struct i915_address_space *vm)
struct drm_device *dev = vm->dev;
free_pt(dev, vm->scratch_pt);
- free_scratch_page(dev, vm->scratch_page);
+ cleanup_scratch_page(dev, &vm->scratch_page);
}
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -1992,7 +1985,6 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
- bool retried = false;
int ret;
/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
@@ -2005,29 +1997,19 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
if (ret)
return ret;
-alloc:
- ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
- &ppgtt->node, GEN6_PD_SIZE,
- GEN6_PD_ALIGN, 0,
- 0, ggtt->base.total,
- DRM_MM_TOPDOWN);
- if (ret == -ENOSPC && !retried) {
- ret = i915_gem_evict_something(&ggtt->base,
+ ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ppgtt->node,
+ GEN6_PD_SIZE, GEN6_PD_ALIGN,
+ -1, 0, ggtt->base.total,
+ DRM_MM_INSERT_HIGH);
+ if (ret == -ENOSPC) {
+ ret = i915_gem_evict_something(&ggtt->base, &ppgtt->node,
GEN6_PD_SIZE, GEN6_PD_ALIGN,
- I915_CACHE_NONE,
- 0, ggtt->base.total,
+ -1, 0, ggtt->base.total,
0);
if (ret)
goto err_out;
-
- retried = true;
- goto alloc;
}
- if (ret)
- goto err_out;
-
-
if (ppgtt->node.start < ggtt->mappable_end)
DRM_DEBUG("Forced to use aperture for PDEs\n");
@@ -2297,14 +2279,15 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
i915_ggtt_flush(dev_priv);
}
-int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
+int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
{
- if (!dma_map_sg(&obj->base.dev->pdev->dev,
- obj->pages->sgl, obj->pages->nents,
- PCI_DMA_BIDIRECTIONAL))
- return -ENOSPC;
+ if (dma_map_sg(&obj->base.dev->pdev->dev,
+ pages->sgl, pages->nents,
+ PCI_DMA_BIDIRECTIONAL))
+ return 0;
- return 0;
+ return -ENOSPC;
}
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
@@ -2327,16 +2310,11 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
gen8_pte_t __iomem *pte =
(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
(offset >> PAGE_SHIFT);
- int rpm_atomic_seq;
-
- rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
- assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
}
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
@@ -2348,38 +2326,22 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
struct sgt_iter sgt_iter;
gen8_pte_t __iomem *gtt_entries;
- gen8_pte_t gtt_entry;
+ gen8_pte_t pte_encode = gen8_pte_encode(0, level, true);
dma_addr_t addr;
- int rpm_atomic_seq;
- int i = 0;
-
- rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
- gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
+ gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+ gtt_entries += start >> PAGE_SHIFT;
+ for_each_sgt_dma(addr, sgt_iter, st)
+ gen8_set_pte(gtt_entries++, pte_encode | addr);
- for_each_sgt_dma(addr, sgt_iter, st) {
- gtt_entry = gen8_pte_encode(addr, level, true);
- gen8_set_pte(>t_entries[i++], gtt_entry);
- }
-
- /*
- * XXX: This serves as a posting read to make sure that the PTE has
- * actually been updated. There is some concern that even though
- * registers and PTEs are within the same BAR that they are potentially
- * of NUMA access patterns. Therefore, even with the way we assume
- * hardware should work, we must keep this posting read for paranoia.
- */
- if (i != 0)
- WARN_ON(readq(>t_entries[i-1]) != gtt_entry);
+ wmb();
/* This next bit makes the above posting read even more important. We
* want to flush the TLBs only after we're certain all the PTE updates
* have finished.
*/
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
- POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
- assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+ wmb();
}
struct insert_entries {
@@ -2418,16 +2380,11 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
gen6_pte_t __iomem *pte =
(gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
(offset >> PAGE_SHIFT);
- int rpm_atomic_seq;
-
- rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
iowrite32(vm->pte_encode(addr, level, true, flags), pte);
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
- assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
}
/*
@@ -2443,39 +2400,22 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
{
struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- struct sgt_iter sgt_iter;
- gen6_pte_t __iomem *gtt_entries;
- gen6_pte_t gtt_entry;
+ gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
+ unsigned i = start >> PAGE_SHIFT;
+ struct sgt_iter iter;
dma_addr_t addr;
- int rpm_atomic_seq;
- int i = 0;
- rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
-
- gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
-
- for_each_sgt_dma(addr, sgt_iter, st) {
- gtt_entry = vm->pte_encode(addr, level, true, flags);
- iowrite32(gtt_entry, >t_entries[i++]);
- }
-
- /* XXX: This serves as a posting read to make sure that the PTE has
- * actually been updated. There is some concern that even though
- * registers and PTEs are within the same BAR that they are potentially
- * of NUMA access patterns. Therefore, even with the way we assume
- * hardware should work, we must keep this posting read for paranoia.
- */
- if (i != 0)
- WARN_ON(readl(>t_entries[i-1]) != gtt_entry);
+ for_each_sgt_dma(addr, iter, st)
+ iowrite32(vm->pte_encode(addr, level, true, flags),
+ &entries[i++]);
+ wmb();
/* This next bit makes the above posting read even more important. We
* want to flush the TLBs only after we're certain all the PTE updates
* have finished.
*/
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
- POSTING_READ(GFX_FLSH_CNTL_GEN6);
-
- assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+ wmb();
}
static void nop_clear_range(struct i915_address_space *vm,
@@ -2490,7 +2430,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
uint64_t length,
bool use_scratch)
{
- struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
unsigned first_entry = start >> PAGE_SHIFT;
unsigned num_entries = length >> PAGE_SHIFT;
@@ -2498,23 +2437,18 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
int i;
- int rpm_atomic_seq;
-
- rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
if (WARN(num_entries > max_entries,
"First entry = %d; Num entries = %d (max=%d)\n",
first_entry, num_entries, max_entries))
num_entries = max_entries;
- scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+ scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC,
use_scratch);
for (i = 0; i < num_entries; i++)
gen8_set_pte(>t_base[i], scratch_pte);
readl(gtt_base);
-
- assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
}
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
@@ -2522,7 +2456,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
uint64_t length,
bool use_scratch)
{
- struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
unsigned first_entry = start >> PAGE_SHIFT;
unsigned num_entries = length >> PAGE_SHIFT;
@@ -2530,23 +2463,18 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
int i;
- int rpm_atomic_seq;
-
- rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
if (WARN(num_entries > max_entries,
"First entry = %d; Num entries = %d (max=%d)\n",
first_entry, num_entries, max_entries))
num_entries = max_entries;
- scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
+ scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, use_scratch, 0);
for (i = 0; i < num_entries; i++)
iowrite32(scratch_pte, >t_base[i]);
readl(gtt_base);
-
- assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
}
static void i915_ggtt_insert_page(struct i915_address_space *vm,
@@ -2555,16 +2483,10 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm,
enum i915_cache_level cache_level,
u32 unused)
{
- struct drm_i915_private *dev_priv = to_i915(vm->dev);
unsigned int flags = (cache_level == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
- int rpm_atomic_seq;
-
- rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
-
- assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
}
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
@@ -2572,17 +2494,11 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
uint64_t start,
enum i915_cache_level cache_level, u32 unused)
{
- struct drm_i915_private *dev_priv = to_i915(vm->dev);
unsigned int flags = (cache_level == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
- int rpm_atomic_seq;
-
- rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
- assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
-
}
static void i915_ggtt_clear_range(struct i915_address_space *vm,
@@ -2590,37 +2506,33 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
uint64_t length,
bool unused)
{
- struct drm_i915_private *dev_priv = to_i915(vm->dev);
- unsigned first_entry = start >> PAGE_SHIFT;
- unsigned num_entries = length >> PAGE_SHIFT;
- int rpm_atomic_seq;
-
- rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
-
- intel_gtt_clear_range(first_entry, num_entries);
-
- assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+ intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
}
static int ggtt_bind_vma(struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
+ struct drm_i915_private *i915 = to_i915(vma->vm->dev);
struct drm_i915_gem_object *obj = vma->obj;
- u32 pte_flags = 0;
- int ret;
+ u32 pte_flags;
- ret = i915_get_ggtt_vma_pages(vma);
- if (ret)
- return ret;
+ if (!vma->ggtt_view.pages) {
+ int ret = i915_get_ggtt_vma_pages(vma);
+ if (ret)
+ return ret;
+ }
/* Currently applicable only to VLV */
+ pte_flags = 0;
if (obj->gt_ro)
pte_flags |= PTE_READ_ONLY;
+ intel_runtime_pm_get(i915);
vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
vma->node.start,
cache_level, pte_flags);
+ intel_runtime_pm_get(i915);
/*
* Without aliasing PPGTT there's no difference between
@@ -2636,29 +2548,31 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
+ struct drm_i915_private *i915 = to_i915(vma->vm->dev);
u32 pte_flags;
- int ret;
- ret = i915_get_ggtt_vma_pages(vma);
- if (ret)
- return ret;
+ if (!vma->ggtt_view.pages) {
+ int ret = i915_get_ggtt_vma_pages(vma);
+ if (ret)
+ return ret;
+ }
/* Currently applicable only to VLV */
pte_flags = 0;
if (vma->obj->gt_ro)
pte_flags |= PTE_READ_ONLY;
-
if (flags & I915_VMA_GLOBAL_BIND) {
+ intel_runtime_pm_get(i915);
vma->vm->insert_entries(vma->vm,
vma->ggtt_view.pages,
vma->node.start,
cache_level, pte_flags);
+ intel_runtime_pm_put(i915);
}
if (flags & I915_VMA_LOCAL_BIND) {
- struct i915_hw_ppgtt *appgtt =
- to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+ struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
appgtt->base.insert_entries(&appgtt->base,
vma->ggtt_view.pages,
vma->node.start,
@@ -2670,13 +2584,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
static void ggtt_unbind_vma(struct i915_vma *vma)
{
- struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+ struct drm_i915_private *i915 = to_i915(vma->vm->dev);
+ struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
const u64 size = min(vma->size, vma->node.size);
- if (vma->flags & I915_VMA_GLOBAL_BIND)
+ if (vma->flags & I915_VMA_GLOBAL_BIND) {
+ intel_runtime_pm_get(i915);
vma->vm->clear_range(vma->vm,
vma->node.start, size,
true);
+ intel_runtime_pm_put(i915);
+ }
if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
appgtt->base.clear_range(&appgtt->base,
@@ -2684,7 +2602,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
true);
}
-void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
+void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
@@ -2698,7 +2617,8 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
}
}
- dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
+ dma_unmap_sg(&obj->base.dev->pdev->dev,
+ pages->sgl, pages->nents,
PCI_DMA_BIDIRECTIONAL);
}
@@ -2737,6 +2657,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
if (ret)
return ret;
+ /* Reserve a mappable slot for our lockless error capture */
+ ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
+ &ggtt->gpu_error,
+ 4096, 0, -1,
+ 0, ggtt->mappable_end,
+ 0);
+ if (ret)
+ return ret;
+
/* Clear any non-preallocated blocks */
drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
@@ -2795,7 +2724,6 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
if (dev_priv->mm.aliasing_ppgtt) {
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-
ppgtt->base.cleanup(&ppgtt->base);
kfree(ppgtt);
}
@@ -2805,6 +2733,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
if (drm_mm_initialized(&ggtt->base.mm)) {
intel_vgt_deballoon(dev_priv);
+ drm_mm_remove_node(&ggtt->gpu_error);
drm_mm_takedown(&ggtt->base.mm);
list_del(&ggtt->base.global_link);
}
@@ -2812,7 +2741,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
ggtt->base.cleanup(&ggtt->base);
arch_phys_wc_del(ggtt->mtrr);
- io_mapping_free(ggtt->mappable);
+ io_mapping_fini(&ggtt->mappable);
}
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -2896,8 +2825,8 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
{
struct pci_dev *pdev = ggtt->base.dev->pdev;
- struct i915_page_scratch *scratch_page;
phys_addr_t phys_addr;
+ int ret;
/* For Modern GENs the PTEs and register space are split in the BAR */
phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
@@ -2918,16 +2847,16 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return -ENOMEM;
}
- scratch_page = alloc_scratch_page(ggtt->base.dev);
- if (IS_ERR(scratch_page)) {
+ ret = setup_scratch_page(ggtt->base.dev,
+ &ggtt->base.scratch_page,
+ GFP_DMA32);
+ if (ret) {
DRM_ERROR("Scratch setup failed\n");
/* iounmap will also get called at remove, but meh */
iounmap(ggtt->gsm);
- return PTR_ERR(scratch_page);
+ return ret;
}
- ggtt->base.scratch_page = scratch_page;
-
return 0;
}
@@ -3009,7 +2938,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
iounmap(ggtt->gsm);
- free_scratch_page(vm->dev, vm->scratch_page);
+ cleanup_scratch_page(vm->dev, &vm->scratch_page);
}
static int gen8_gmch_probe(struct i915_ggtt *ggtt)
@@ -3210,9 +3139,9 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
if (!HAS_LLC(dev_priv))
ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
- ggtt->mappable =
- io_mapping_create_wc(ggtt->mappable_base, ggtt->mappable_end);
- if (!ggtt->mappable) {
+ if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
+ dev_priv->ggtt.mappable_base,
+ dev_priv->ggtt.mappable_end)) {
ret = -EIO;
goto out_gtt_cleanup;
}
@@ -3258,8 +3187,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
/* Cache flush objects bound into GGTT and rebind them. */
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
list_for_each_entry(vma, &obj->vma_list, obj_link) {
- if (vma->vm != &ggtt->base)
- continue;
+ if (!i915_vma_is_ggtt(vma))
+ break;
WARN_ON(i915_vma_bind(vma, obj->cache_level,
PIN_UPDATE));
@@ -3323,6 +3252,7 @@ void i915_vma_destroy(struct i915_vma *vma)
GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(i915_vma_is_active(vma));
GEM_BUG_ON(!i915_vma_is_closed(vma));
+ GEM_BUG_ON(vma->fence);
list_del(&vma->vm_link);
if (!i915_vma_is_ggtt(vma))
@@ -3331,12 +3261,43 @@ void i915_vma_destroy(struct i915_vma *vma)
kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
}
+void i915_vma_unlink_ctx(struct i915_vma *vma)
+{
+ struct i915_gem_context *ctx = vma->ctx;
+
+ if (ctx->vma.ht_size & 1) {
+ cancel_work_sync(&ctx->vma.resize);
+ ctx->vma.ht_size &= ~1;
+ }
+
+ __hlist_del(&vma->ctx_node);
+ ctx->vma.ht_count--;
+
+ if (i915_vma_is_ggtt(vma))
+ vma->obj->vma_hashed = NULL;
+ vma->ctx = NULL;
+}
+
void i915_vma_close(struct i915_vma *vma)
{
GEM_BUG_ON(i915_vma_is_closed(vma));
vma->flags |= I915_VMA_CLOSED;
+ if (vma->ctx)
+ i915_vma_unlink_ctx(vma);
+
list_del_init(&vma->obj_link);
+ if (!i915_vma_is_ggtt(vma) && vma->obj->vma_ht) {
+ rhashtable_remove_fast(vma->obj->vma_ht,
+ &vma->obj_rht,
+ vma_ht_params);
+ if (atomic_read(&vma->obj->vma_ht->nelems) == 0) {
+ rhashtable_destroy(vma->obj->vma_ht);
+ kfree(vma->obj->vma_ht);
+ vma->obj->vma_ht = NULL;
+ }
+ }
+
if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
WARN_ON(i915_vma_unbind(vma));
}
@@ -3346,29 +3307,27 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *view)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_vma *vma;
int i;
- GEM_BUG_ON(vm->closed);
+ if (vm == NULL)
+ vm = &i915->ggtt.base;
- if (WARN_ON(i915_is_ggtt(vm) != !!view))
- return ERR_PTR(-EINVAL);
+ GEM_BUG_ON(vm->closed);
- vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
+ vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL);
if (vma == NULL)
return ERR_PTR(-ENOMEM);
- INIT_LIST_HEAD(&vma->obj_link);
- INIT_LIST_HEAD(&vma->exec_list);
for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
init_request_active(&vma->last_read[i], i915_vma_retire);
- list_add(&vma->vm_link, &vm->unbound_list);
+ init_request_active(&vma->last_fence, NULL);
vma->vm = vm;
vma->obj = obj;
vma->size = obj->base.size;
- if (i915_is_ggtt(vm)) {
- vma->flags |= I915_VMA_GGTT;
+ if (view) {
vma->ggtt_view = *view;
if (view->type == I915_GGTT_VIEW_PARTIAL) {
vma->size = view->params.partial.size;
@@ -3378,46 +3337,106 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
intel_rotation_info_size(&view->params.rotated);
vma->size <<= PAGE_SHIFT;
}
+ }
+
+ if (i915_is_ggtt(vm)) {
+ vma->flags |= I915_VMA_GGTT;
+ list_add(&vma->obj_link, &obj->vma_list);
} else {
+ if (obj->vma_ht == NULL) {
+ struct i915_vma *last;
+
+ last = NULL;
+ if (!list_empty(&obj->vma_list))
+ last = list_last_entry(&obj->vma_list,
+ struct i915_vma, obj_link);
+ if (last != NULL && !i915_vma_is_ggtt(last)) {
+ obj->vma_ht = kmalloc(sizeof(struct rhashtable),
+ GFP_KERNEL);
+ if (obj->vma_ht == NULL) {
+ kmem_cache_free(i915->vmas, vma);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (rhashtable_init(obj->vma_ht, &vma_ht_params)) {
+ kfree(obj->vma_ht);
+ obj->vma_ht = NULL;
+ kmem_cache_free(i915->vmas, vma);
+ return ERR_PTR(-ENOMEM);
+ }
+ rhashtable_insert_fast(obj->vma_ht,
+ &last->obj_rht,
+ vma_ht_params);
+ }
+ }
+
+ if (obj->vma_ht)
+ rhashtable_insert_fast(obj->vma_ht,
+ &vma->obj_rht,
+ vma_ht_params);
i915_ppgtt_get(i915_vm_to_ppgtt(vm));
+ list_add_tail(&vma->obj_link, &obj->vma_list);
}
- list_add_tail(&vma->obj_link, &obj->vma_list);
-
+ list_add(&vma->vm_link, &vm->unbound_list);
return vma;
}
+static inline bool vma_matches(struct i915_vma *vma,
+ const struct i915_ggtt_view *view)
+{
+ if (!view)
+ return vma->ggtt_view.type == 0;
+
+ if (vma->ggtt_view.type != view->type)
+ return false;
+
+ return memcmp(&vma->ggtt_view.params,
+ &view->params,
+ sizeof(view->params)) == 0;
+}
+
struct i915_vma *
-i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm)
+i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view)
{
struct i915_vma *vma;
- vma = i915_gem_obj_to_vma(obj, vm);
- if (!vma)
- vma = __i915_gem_vma_create(obj, vm,
- i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
+ if (vm == NULL || i915_is_ggtt(vm)) {
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ if (!i915_vma_is_ggtt(vma))
+ break;
- return vma;
+ if (vma_matches(vma, view))
+ return vma;
+ }
+ } else if (obj->vma_ht) {
+ return rhashtable_lookup_fast(obj->vma_ht, vm, vma_ht_params);
+ } else if (!list_empty(&obj->vma_list)) {
+ vma = list_last_entry(&obj->vma_list,
+ struct i915_vma, obj_link);
+ if (vma->vm == vm)
+ return vma;
+ }
+
+ return NULL;
}
struct i915_vma *
-i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view)
+i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view)
{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
- struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
-
- GEM_BUG_ON(!view);
+ struct i915_vma *vma;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ vma = i915_gem_obj_to_vma(obj, vm, view);
if (!vma)
- vma = __i915_gem_vma_create(obj, &ggtt->base, view);
+ vma = __i915_gem_vma_create(obj, vm, view);
GEM_BUG_ON(i915_vma_is_closed(vma));
return vma;
-
}
static struct scatterlist *
@@ -3448,7 +3467,7 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
return sg;
}
-static struct sg_table *
+noinline static struct sg_table *
intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
struct drm_i915_gem_object *obj)
{
@@ -3488,7 +3507,7 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
/* Populate source page list from the object. */
i = 0;
- for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
+ for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
page_addr_list[i++] = dma_addr;
GEM_BUG_ON(i != n_pages);
@@ -3538,7 +3557,7 @@ err_st_alloc:
return ERR_PTR(ret);
}
-static struct sg_table *
+noinline static struct sg_table *
intel_partial_pages(const struct i915_ggtt_view *view,
struct drm_i915_gem_object *obj)
{
@@ -3557,7 +3576,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
sg = st->sgl;
st->nents = 0;
- for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
+ for_each_sg_page(obj->mm.pages->sgl, &obj_sg_iter, obj->mm.pages->nents,
view->params.partial.offset)
{
if (st->nents >= view->params.partial.size)
@@ -3582,34 +3601,36 @@ err_st_alloc:
static int
i915_get_ggtt_vma_pages(struct i915_vma *vma)
{
- int ret = 0;
+ int ret;
- if (vma->ggtt_view.pages)
+ switch (vma->ggtt_view.type) {
+ case I915_GGTT_VIEW_NORMAL:
+ vma->ggtt_view.pages = vma->obj->mm.pages;
return 0;
- if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
- vma->ggtt_view.pages = vma->obj->pages;
- else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
+ case I915_GGTT_VIEW_ROTATED:
vma->ggtt_view.pages =
intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
- else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
+ break;
+
+ case I915_GGTT_VIEW_PARTIAL:
vma->ggtt_view.pages =
intel_partial_pages(&vma->ggtt_view, vma->obj);
- else
+ break;
+
+ default:
WARN_ONCE(1, "GGTT view %u not implemented!\n",
vma->ggtt_view.type);
+ return -EINVAL;
+ }
- if (!vma->ggtt_view.pages) {
- DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
- vma->ggtt_view.type);
- ret = -EINVAL;
- } else if (IS_ERR(vma->ggtt_view.pages)) {
+ ret = 0;
+ if (IS_ERR(vma->ggtt_view.pages)) {
ret = PTR_ERR(vma->ggtt_view.pages);
vma->ggtt_view.pages = NULL;
DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
vma->ggtt_view.type, ret);
}
-
return ret;
}
@@ -3669,7 +3690,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
void __iomem *ptr;
lockdep_assert_held(&vma->vm->dev->struct_mutex);
- if (WARN_ON(!vma->obj->map_and_fenceable))
+
+ /* Access through the GTT requires the device to be awake. */
+ assert_rpm_wakelock_held(to_i915(vma->vm->dev));
+
+ if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
return IO_ERR_PTR(-ENODEV);
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
@@ -3677,7 +3702,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
ptr = vma->iomap;
if (ptr == NULL) {
- ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable,
+ ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
vma->node.start,
vma->node.size);
if (ptr == NULL)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index cc56206a1600..da71fdfd97ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -35,10 +35,17 @@
#define __I915_GEM_GTT_H__
#include <linux/io-mapping.h>
+#include <linux/rhashtable.h>
#include "i915_gem_request.h"
+#define I915_FENCE_REG_NONE -1
+#define I915_MAX_NUM_FENCES 32
+/* 32 fences + sign bit for FENCE_REG_NONE */
+#define I915_MAX_NUM_FENCE_BITS 6
+
struct drm_i915_file_private;
+struct drm_i915_fence_reg;
typedef uint32_t gen6_pte_t;
typedef uint64_t gen8_pte_t;
@@ -165,6 +172,7 @@ struct i915_ggtt_view {
extern const struct i915_ggtt_view i915_ggtt_view_normal;
extern const struct i915_ggtt_view i915_ggtt_view_rotated;
+struct i915_gem_context;
enum i915_cache_level;
/**
@@ -179,8 +187,9 @@ struct i915_vma {
struct drm_mm_node node;
struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
+ struct drm_i915_fence_reg *fence;
void __iomem *iomap;
- u64 size;
+ u64 size, display_alignment;
unsigned int flags;
/**
@@ -201,11 +210,13 @@ struct i915_vma {
#define I915_VMA_LOCAL_BIND BIT(7)
#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
-#define I915_VMA_GGTT BIT(8)
-#define I915_VMA_CLOSED BIT(9)
+#define I915_VMA_GGTT BIT(8)
+#define I915_VMA_CAN_FENCE BIT(9)
+#define I915_VMA_CLOSED BIT(10)
unsigned int active;
struct i915_gem_active last_read[I915_NUM_ENGINES];
+ struct i915_gem_active last_fence;
/**
* Support different GGTT views into the same object.
@@ -220,16 +231,24 @@ struct i915_vma {
struct list_head vm_link;
struct list_head obj_link; /* Link in the object's VMA list */
+ struct hlist_node obj_node;
+ struct rhash_head obj_rht;
/** This vma's place in the batchbuffer or on the eviction list */
- struct list_head exec_list;
+ struct list_head exec_link;
+ struct list_head reloc_link;
+ struct list_head evict_link;
/**
* Used for performing relocations during execbuffer insertion.
*/
struct hlist_node exec_node;
- unsigned long exec_handle;
struct drm_i915_gem_exec_object2 *exec_entry;
+ u32 exec_handle;
+
+ struct i915_gem_context *ctx;
+ struct hlist_node ctx_node;
+ u32 ctx_handle;
};
static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
@@ -237,6 +256,11 @@ static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
return vma->flags & I915_VMA_GGTT;
}
+static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
+{
+ return vma->flags & I915_VMA_CAN_FENCE;
+}
+
static inline bool i915_vma_is_closed(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_CLOSED;
@@ -286,10 +310,6 @@ struct i915_page_dma {
#define px_page(px) (px_base(px)->page)
#define px_dma(px) (px_base(px)->daddr)
-struct i915_page_scratch {
- struct i915_page_dma base;
-};
-
struct i915_page_table {
struct i915_page_dma base;
@@ -335,7 +355,7 @@ struct i915_address_space {
bool closed;
- struct i915_page_scratch *scratch_page;
+ struct i915_page_dma scratch_page;
struct i915_page_table *scratch_pt;
struct i915_page_directory *scratch_pd;
struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
@@ -413,13 +433,13 @@ struct i915_address_space {
*/
struct i915_ggtt {
struct i915_address_space base;
+ struct io_mapping mappable; /* Mapping to our CPU mappable region */
size_t stolen_size; /* Total size of stolen memory */
size_t stolen_usable_size; /* Total size minus BIOS reserved */
size_t stolen_reserved_base;
size_t stolen_reserved_size;
u64 mappable_end; /* End offset that we can CPU map */
- struct io_mapping *mappable; /* Mapping to our CPU mappable region */
phys_addr_t mappable_base; /* PA of our GMADR */
/** "Graphics Stolen Memory" holds the global PTEs */
@@ -428,6 +448,8 @@ struct i915_ggtt {
bool do_idle_maps;
int mtrr;
+
+ struct drm_mm_node gpu_error;
};
struct i915_hw_ppgtt {
@@ -443,7 +465,6 @@ struct i915_hw_ppgtt {
gen6_pte_t __iomem *pd_addr;
- int (*enable)(struct i915_hw_ppgtt *ppgtt);
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req);
void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
@@ -605,27 +626,17 @@ void i915_check_and_clear_faults(struct drm_i915_private *dev_priv);
void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
void i915_gem_restore_gtt_mappings(struct drm_device *dev);
-int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
-void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
-
-static inline bool
-i915_ggtt_view_equal(const struct i915_ggtt_view *a,
- const struct i915_ggtt_view *b)
-{
- if (WARN_ON(!a || !b))
- return false;
-
- if (a->type != b->type)
- return false;
- if (a->type != I915_GGTT_VIEW_NORMAL)
- return !memcmp(&a->params, &b->params, sizeof(a->params));
- return true;
-}
+int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
/* Flags used by pin/bind&friends. */
#define PIN_NONBLOCK BIT(0)
#define PIN_MAPPABLE BIT(1)
#define PIN_ZONE_4G BIT(2)
+#define PIN_NOFAULT BIT(3)
+#define PIN_NOEVICT BIT(4)
#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */
#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
new file mode 100644
index 000000000000..56e89a212905
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+
+static void __i915_gem_object_free_pages(struct sg_table *st)
+{
+ struct sgt_iter iter;
+ struct page *page;
+
+ for_each_sgt_page(page, iter, st)
+ put_page(page);
+
+ sg_free_table(st);
+ kfree(st);
+}
+
+static struct sg_table *
+i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
+{
+ const unsigned int npages = obj->base.size / PAGE_SIZE;
+ struct sg_table *st;
+ struct scatterlist *sg;
+ unsigned long last_pfn = 0; /* suppress gcc warning */
+ gfp_t gfp;
+ int i;
+
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ return ERR_PTR(-ENOMEM);
+
+ if (sg_alloc_table(st, npages, GFP_KERNEL)) {
+ kfree(st);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ sg = st->sgl;
+ st->nents = 0;
+
+ gfp = GFP_KERNEL | __GFP_HIGHMEM;
+ gfp |= __GFP_NORETRY | __GFP_NOWARN;
+ gfp &= ~(__GFP_IO | __GFP_RECLAIM);
+ for (i = 0; i < npages; i++) {
+ struct page *page;
+
+ page = alloc_page(gfp);
+ if (!page) {
+ i915_gem_shrink_all(to_i915(obj->base.dev));
+ page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ if (!page)
+ goto err;
+ }
+
+#ifdef CONFIG_SWIOTLB
+ if (swiotlb_nr_tbl()) {
+ st->nents++;
+ sg_set_page(sg, page, PAGE_SIZE, 0);
+ sg = sg_next(sg);
+ continue;
+ }
+#endif
+ if (!i || page_to_pfn(page) != last_pfn + 1) {
+ if (i)
+ sg = sg_next(sg);
+ st->nents++;
+ sg_set_page(sg, page, PAGE_SIZE, 0);
+ } else {
+ sg->length += PAGE_SIZE;
+ }
+ last_pfn = page_to_pfn(page);
+ }
+#ifdef CONFIG_SWIOTLB
+ if (!swiotlb_nr_tbl())
+#endif
+ sg_mark_end(sg);
+
+ if (i915_gem_gtt_prepare_pages(obj, st))
+ goto err;
+
+ obj->mm.madv = I915_MADV_DONTNEED;
+ return st;
+
+err:
+ sg_mark_end(sg);
+ __i915_gem_object_free_pages(st);
+ return ERR_PTR(-ENOMEM);
+}
+
+static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ i915_gem_gtt_finish_pages(obj, pages);
+ __i915_gem_object_free_pages(pages);
+
+ i915_gem_object_clear_dirty(obj);
+ obj->mm.madv = I915_MADV_WILLNEED;
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
+ .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
+ .get_pages = i915_gem_object_get_pages_internal,
+ .put_pages = i915_gem_object_put_pages_internal,
+};
+
+/**
+ * Creates a new object that wraps some internal memory for private use.
+ * This object is not backed by swappable storage, and as such its contents
+ * are volatile and only valid whilst pinned. If the object is reaped by the
+ * shrinker, its pages and data will be discarded. Equally, it is not a full
+ * GEM object and so not valid for access from userspace. This makes it useful
+ * for hardware interfaces like ringbuffers (which are pinned from the time
+ * the request is written to the time the hardware stops accessing it), but
+ * not for contexts (which need to be preserved when not active for later
+ * reuse).
+ */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_device *dev,
+ unsigned int size)
+{
+ struct drm_i915_gem_object *obj;
+
+ obj = i915_gem_object_alloc(dev);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ drm_gem_private_object_init(dev, &obj->base, size);
+ i915_gem_object_init(obj, &i915_gem_object_internal_ops);
+
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
+
+ return obj;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 57fd767a2d79..828d04cb04ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,18 +28,19 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
-struct render_state {
+struct intel_render_state {
const struct intel_renderstate_rodata *rodata;
- struct drm_i915_gem_object *obj;
- u64 ggtt_offset;
- u32 aux_batch_size;
- u32 aux_batch_offset;
+ struct i915_vma *vma;
+ u32 batch_offset;
+ u32 batch_size;
+ u32 aux_offset;
+ u32 aux_size;
};
static const struct intel_renderstate_rodata *
-render_state_get_rodata(const struct drm_i915_gem_request *req)
+render_state_get_rodata(const struct intel_engine_cs *engine)
{
- switch (INTEL_GEN(req->i915)) {
+ switch (INTEL_GEN(engine->i915)) {
case 6:
return &gen6_null_state;
case 7:
@@ -71,28 +72,28 @@ render_state_get_rodata(const struct drm_i915_gem_request *req)
(batch)[(i)++] = (val); \
} while(0)
-static int render_state_setup(struct render_state *so)
+static int render_state_setup(struct intel_render_state *so,
+ struct drm_i915_private *i915)
{
- struct drm_device *dev = so->obj->base.dev;
const struct intel_renderstate_rodata *rodata = so->rodata;
- const bool has_64bit_reloc = INTEL_GEN(dev) >= 8;
+ const bool has_64bit_reloc = INTEL_GEN(i915) >= 8;
+ struct drm_i915_gem_object *obj = so->vma->obj;
unsigned int i = 0, reloc_index = 0;
- struct page *page;
+ unsigned needs_clflush;
u32 *d;
int ret;
- ret = i915_gem_object_set_to_cpu_domain(so->obj, true);
+ ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
if (ret)
return ret;
- page = i915_gem_object_get_dirty_page(so->obj, 0);
- d = kmap(page);
+ d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0));
while (i < rodata->batch_items) {
u32 s = rodata->batch[i];
if (i * 4 == rodata->reloc[reloc_index]) {
- u64 r = s + so->ggtt_offset;
+ u64 r = s + so->vma->node.start,
s = lower_32_bits(r);
if (has_64bit_reloc) {
if (i + 1 >= rodata->batch_items ||
@@ -111,12 +112,15 @@ static int render_state_setup(struct render_state *so)
d[i++] = s;
}
+ so->batch_offset = so->vma->node.start;
+ so->batch_size = rodata->batch_items * sizeof(u32);
+
while (i % CACHELINE_DWORDS)
OUT_BATCH(d, i, MI_NOOP);
- so->aux_batch_offset = i * sizeof(u32);
+ so->aux_offset = i * sizeof(u32);
- if (HAS_POOLED_EU(dev)) {
+ if (HAS_POOLED_EU(i915)) {
/*
* We always program 3x6 pool config but depending upon which
* subslice is disabled HW drops down to appropriate config
@@ -144,83 +148,130 @@ static int render_state_setup(struct render_state *so)
}
OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
- so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
-
+ so->aux_size = i * sizeof(u32) - so->aux_offset;
+ so->aux_offset += so->batch_offset;
/*
* Since we are sending length, we need to strictly conform to
* all requirements. For Gen2 this must be a multiple of 8.
*/
- so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
+ so->aux_size = ALIGN(so->aux_size, 8);
- kunmap(page);
-
- ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
- if (ret)
- return ret;
+ if (needs_clflush)
+ drm_clflush_virt_range(d, i*sizeof(u32));
+ ret = i915_gem_object_set_to_gtt_domain(obj, false);
if (rodata->reloc[reloc_index] != -1) {
DRM_ERROR("only %d relocs resolved\n", reloc_index);
- return -EINVAL;
+ ret = -EINVAL;
}
- return 0;
-
err_out:
- kunmap(page);
+ kunmap_atomic(d);
+ i915_gem_object_unpin_pages(obj);
return ret;
}
#undef OUT_BATCH
-int i915_gem_render_state_init(struct drm_i915_gem_request *req)
+int i915_gem_render_state_init(struct intel_engine_cs *engine)
{
- struct render_state so;
+ struct intel_render_state *so;
+ const struct intel_renderstate_rodata *rodata;
+ struct drm_i915_gem_object *obj;
int ret;
- if (WARN_ON(req->engine->id != RCS))
- return -ENOENT;
+ if (engine->id != RCS)
+ return 0;
- so.rodata = render_state_get_rodata(req);
- if (!so.rodata)
+ rodata = render_state_get_rodata(engine);
+ if (!rodata)
return 0;
- if (so.rodata->batch_items * 4 > 4096)
+ if (rodata->batch_items * 4 > 4096)
return -EINVAL;
- so.obj = i915_gem_object_create(&req->i915->drm, 4096);
- if (IS_ERR(so.obj))
- return PTR_ERR(so.obj);
+ so = kmalloc(sizeof(*so), GFP_KERNEL);
+ if (!so)
+ return -ENOMEM;
- ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0);
- if (ret)
+ obj = i915_gem_object_create_internal(&engine->i915->drm, 4096);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
+ goto err_free;
+ }
+
+ so->vma = i915_gem_obj_lookup_or_create_vma(obj,
+ &engine->i915->ggtt.base,
+ NULL);
+ if (IS_ERR(so->vma)) {
+ ret = PTR_ERR(so->vma);
goto err_obj;
+ }
+
+ so->rodata = rodata;
+ engine->render_state = so;
+ return 0;
+
+err_obj:
+ i915_gem_object_put(obj);
+err_free:
+ kfree(so);
+ return ret;
+}
+
+int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
+{
+ struct intel_render_state *so;
+ int ret;
- so.ggtt_offset = i915_gem_obj_ggtt_offset(so.obj);
+ so = req->engine->render_state;
+ if (!so)
+ return 0;
+
+ /* Recreate the page after shrinking */
+ if (!so->vma->obj->mm.pages)
+ so->batch_offset = -1;
- ret = render_state_setup(&so);
+ ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL);
if (ret)
- goto err_unpin;
+ return ret;
+
+ if (so->vma->node.start != so->batch_offset) {
+ ret = render_state_setup(so, req->i915);
+ if (ret)
+ goto err_unpin;
+ }
- ret = req->engine->emit_bb_start(req, so.ggtt_offset,
- so.rodata->batch_items * 4,
+ ret = req->engine->emit_bb_start(req,
+ so->batch_offset, so->batch_size,
I915_DISPATCH_SECURE);
if (ret)
goto err_unpin;
- if (so.aux_batch_size > 8) {
+ if (so->aux_size > 8) {
ret = req->engine->emit_bb_start(req,
- (so.ggtt_offset +
- so.aux_batch_offset),
- so.aux_batch_size,
+ so->aux_offset, so->aux_size,
I915_DISPATCH_SECURE);
if (ret)
goto err_unpin;
}
- i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0);
+ i915_vma_move_to_active(so->vma, req, 0);
err_unpin:
- i915_gem_object_ggtt_unpin(so.obj);
-err_obj:
- i915_gem_object_put(so.obj);
+ i915_vma_unpin(so->vma);
return ret;
}
+
+void i915_gem_render_state_fini(struct intel_engine_cs *engine)
+{
+ struct intel_render_state *so;
+
+ so = engine->render_state;
+ if (so == NULL)
+ return;
+
+ __i915_gem_object_release_unless_active(so->vma->obj);
+ kfree(so);
+
+ engine->render_state = NULL;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
index c44fca8599bb..87481845799d 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -24,8 +24,10 @@
#ifndef _I915_GEM_RENDER_STATE_H_
#define _I915_GEM_RENDER_STATE_H_
-#include <linux/types.h>
+struct drm_i915_gem_request;
-int i915_gem_render_state_init(struct drm_i915_gem_request *req);
+int i915_gem_render_state_init(struct intel_engine_cs *engine);
+int i915_gem_render_state_emit(struct drm_i915_gem_request *req);
+void i915_gem_render_state_fini(struct intel_engine_cs *engine);
#endif /* _I915_GEM_RENDER_STATE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6a1661643d3d..41f7ca2eb806 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -115,48 +115,21 @@ const struct fence_ops i915_fence_ops = {
.timeline_value_str = i915_fence_timeline_value_str,
};
-int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
- struct drm_file *file)
-{
- struct drm_i915_private *dev_private;
- struct drm_i915_file_private *file_priv;
-
- WARN_ON(!req || !file || req->file_priv);
-
- if (!req || !file)
- return -EINVAL;
-
- if (req->file_priv)
- return -EINVAL;
-
- dev_private = req->i915;
- file_priv = file->driver_priv;
-
- spin_lock(&file_priv->mm.lock);
- req->file_priv = file_priv;
- list_add_tail(&req->client_list, &file_priv->mm.request_list);
- spin_unlock(&file_priv->mm.lock);
-
- req->pid = get_pid(task_pid(current));
-
- return 0;
-}
-
static inline void
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
{
- struct drm_i915_file_private *file_priv = request->file_priv;
+ struct drm_i915_file_private *file_priv;
+ file_priv = request->file_priv;
if (!file_priv)
return;
spin_lock(&file_priv->mm.lock);
- list_del(&request->client_list);
- request->file_priv = NULL;
+ if (request->file_priv) {
+ list_del(&request->client_link);
+ request->file_priv = NULL;
+ }
spin_unlock(&file_priv->mm.lock);
-
- put_pid(request->pid);
- request->pid = NULL;
}
void i915_gem_retire_noop(struct i915_gem_active *active,
@@ -218,7 +191,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
request->engine);
}
- i915_gem_context_put(request->ctx);
i915_gem_request_put(request);
}
@@ -319,6 +291,19 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
return 0;
}
+static int __kfence_call submit_notify(struct kfence *fence)
+{
+ struct drm_i915_gem_request *request =
+ container_of(fence, typeof(*request), submit);
+
+ if (kfence_done(fence))
+ i915_gem_request_put(request);
+ else
+ request->engine->submit_request(request);
+
+ return NOTIFY_DONE;
+}
+
/**
* i915_gem_request_alloc - allocate a request structure
*
@@ -337,7 +322,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
{
struct drm_i915_private *dev_priv = engine->i915;
unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- struct drm_i915_gem_request *req;
+ struct drm_i915_gem_request *req, *prev;
u32 seqno;
int ret;
@@ -355,7 +340,35 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
if (req && i915_gem_request_completed(req))
i915_gem_request_retire(req);
- req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+ /* Beware: Dragons be flying overhead.
+ *
+ * We use RCU to look up requests in flight. The lookups may
+ * race with the request being allocated from the slab freelist.
+ * That is the request we are writing to here, may be in the process
+ * of being read by __i915_gem_active_get_request_rcu(). As such,
+ * we have to be very careful when overwriting the contents. During
+ * the RCU lookup, we change chase the request->engine pointer,
+ * read the request->fence.seqno and increment the reference count.
+ *
+ * The reference count is incremented atomically. If it is zero,
+ * the lookup knows the request is unallocated and complete. Otherwise,
+ * it is either still in use, or has been reallocated and reset
+ * with fence_init(). This increment is safe for release as we check
+ * that the request we have a reference to and matches the active
+ * request.
+ *
+ * Before we increment the refcount, we chase the request->engine
+ * pointer. We must not call kmem_cache_zalloc() or else we set
+ * that pointer to NULL and cause a crash during the lookup. If
+ * we see the request is completed (based on the value of the
+ * old engine and seqno), the lookup is complete and reports NULL.
+ * If we decide the request is not completed (new engine or seqno),
+ * then we grab a reference and double check that it is still the
+ * active request - which it won't be and restart the lookup.
+ *
+ * Do not use kmem_cache_zalloc() here!
+ */
+ req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);
@@ -373,7 +386,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
INIT_LIST_HEAD(&req->active_list);
req->i915 = dev_priv;
req->engine = engine;
- req->ctx = i915_gem_context_get(ctx);
+ req->ctx = ctx;
+
+ /* No zalloc, must clear what we need by hand */
+ req->signaling.wait.tsk = NULL;
+ req->previous_context = NULL;
+ req->file_priv = NULL;
+ req->batch = NULL;
/*
* Reserve space in the ring buffer for all the commands required to
@@ -384,17 +403,36 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
*/
req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
+ kfence_init(&req->submit, submit_notify);
+ i915_gem_request_get(req);
+
if (i915.enable_execlists)
ret = intel_logical_ring_alloc_request_extras(req);
else
ret = intel_ring_alloc_request_extras(req);
if (ret)
- goto err_ctx;
+ goto err;
+
+ /* Record the position of the start of the request so that
+ * should we detect the updated seqno part-way through the
+ * GPU processing the request, we never over-estimate the
+ * position of the head.
+ */
+ req->head = req->ring->tail;
+
+ prev = i915_gem_active_peek(&engine->last_request,
+ &req->i915->drm.struct_mutex);
+ if (prev) {
+ ret = kfence_await_kfence(&req->submit, &prev->submit,
+ GFP_KERNEL);
+ if (ret < 0) {
+ i915_add_request(req);
+ return ERR_PTR(ret);
+ }
+ }
return req;
-err_ctx:
- i915_gem_context_put(ctx);
err:
kmem_cache_free(dev_priv->requests, req);
return ERR_PTR(ret);
@@ -426,22 +464,14 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
* request is not being tracked for completion but the work itself is
* going to happen on the hardware. This would be a Bad Thing(tm).
*/
-void __i915_add_request(struct drm_i915_gem_request *request,
- struct drm_i915_gem_object *obj,
- bool flush_caches)
+void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
{
- struct intel_engine_cs *engine;
- struct intel_ring *ring;
+ struct intel_engine_cs *engine = request->engine;
+ struct intel_ring *ring = request->ring;
u32 request_start;
u32 reserved_tail;
int ret;
- if (WARN_ON(!request))
- return;
-
- engine = request->engine;
- ring = request->ring;
-
/*
* To ensure that this call will not fail, space for its emissions
* should already have been reserved in the ring buffer. Let the ring
@@ -467,16 +497,6 @@ void __i915_add_request(struct drm_i915_gem_request *request,
trace_i915_gem_request_add(request);
- request->head = request_start;
-
- /* Whilst this request exists, batch_obj will be on the
- * active_list, and so will hold the active reference. Only when this
- * request is retired will the the batch_obj be moved onto the
- * inactive_list and lose its active reference. Hence we do not need
- * to explicitly hold another reference here.
- */
- request->batch_obj = obj;
-
/* Seal the request and mark it as pending execution. Note that
* we may inspect this state, without holding any locks, during
* hangcheck. Hence we apply the barrier to ensure that we do not
@@ -489,10 +509,10 @@ void __i915_add_request(struct drm_i915_gem_request *request,
list_add_tail(&request->link, &engine->request_list);
list_add_tail(&request->ring_link, &ring->request_list);
- /* Record the position of the start of the request so that
+ /* Record the position of the start of the breadcrumb so that
* should we detect the updated seqno part-way through the
* GPU processing the request, we never over-estimate the
- * position of the head.
+ * position of the ring's HEAD.
*/
request->postfix = ring->tail;
@@ -510,7 +530,9 @@ void __i915_add_request(struct drm_i915_gem_request *request,
reserved_tail, ret);
i915_gem_mark_busy(engine);
- engine->submit_request(request);
+
+ kfence_complete(&request->submit);
+ kfence_put(&request->submit);
}
static unsigned long local_clock_us(unsigned int *cpu)
@@ -731,16 +753,18 @@ complete:
return ret;
}
-static void engine_retire_requests(struct intel_engine_cs *engine)
+static bool engine_retire_requests(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request, *next;
list_for_each_entry_safe(request, next, &engine->request_list, link) {
if (!i915_gem_request_completed(request))
- break;
+ return false;
i915_gem_request_retire(request);
}
+
+ return true;
}
void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
@@ -754,9 +778,8 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
GEM_BUG_ON(!dev_priv->gt.awake);
- for_each_engine(engine, dev_priv) {
- engine_retire_requests(engine);
- if (!intel_engine_is_active(engine))
+ for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines) {
+ if (engine_retire_requests(engine))
dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 3496e28785e7..ead83dd55784 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -26,6 +26,7 @@
#define I915_GEM_REQUEST_H
#include <linux/fence.h>
+#include <linux/kfence.h>
#include "i915_gem.h"
@@ -51,6 +52,13 @@ struct intel_signal_node {
* emission time to be associated with the request for tracking how far ahead
* of the GPU the submission is.
*
+ * When modifying this structure be very aware that we perform a lockless
+ * RCU lookup of it that may race against reallocation of the struct
+ * from the slab freelist. We intentionally do not zero the structure on
+ * allocation so that the lookup can use the dangling pointers (and is
+ * cogniscent that those pointers may be wrong). Instead, everything that
+ * needs to be initialised must be done so explicitly.
+ *
* The requests are reference counted.
*/
struct drm_i915_gem_request {
@@ -75,6 +83,8 @@ struct drm_i915_gem_request {
struct intel_ring *ring;
struct intel_signal_node signaling;
+ struct kfence submit;
+
/** GEM sequence number associated with the previous request,
* when the HWS breadcrumb is equal to this the GPU is processing
* this request.
@@ -94,6 +104,9 @@ struct drm_i915_gem_request {
/** Position in the ringbuffer of the end of the whole request */
u32 tail;
+ /** Position in the ringbuffer after the end of the whole request */
+ u32 wa_tail;
+
/** Preallocate space in the ringbuffer for the emitting the request */
u32 reserved_space;
@@ -111,7 +124,7 @@ struct drm_i915_gem_request {
/** Batch buffer related to this request if any (used for
* error state dump only).
*/
- struct drm_i915_gem_object *batch_obj;
+ struct i915_vma *batch;
struct list_head active_list;
/** Time at which this request was emitted, in jiffies. */
@@ -125,32 +138,10 @@ struct drm_i915_gem_request {
struct drm_i915_file_private *file_priv;
/** file_priv list entry for this request */
- struct list_head client_list;
-
- /** process identifier submitting this request */
- struct pid *pid;
-
- /**
- * The ELSP only accepts two elements at a time, so we queue
- * context/tail pairs on a given queue (ring->execlist_queue) until the
- * hardware is available. The queue serves a double purpose: we also use
- * it to keep track of the up to 2 contexts currently in the hardware
- * (usually one in execution and the other queued up by the GPU): We
- * only remove elements from the head of the queue when the hardware
- * informs us that an element has been completed.
- *
- * All accesses to the queue are mediated by a spinlock
- * (ring->execlist_lock).
- */
+ struct list_head client_link;
- /** Execlist link in the submission queue.*/
+ /** Link in the execlist submission queue, guarded by execlist_lock. */
struct list_head execlist_link;
-
- /** Execlists no. of times this request has been sent to the ELSP */
- int elsp_submitted;
-
- /** Execlists context hardware id. */
- unsigned int ctx_hw_id;
};
extern const struct fence_ops i915_fence_ops;
@@ -163,8 +154,6 @@ static inline bool fence_is_i915(struct fence *fence)
struct drm_i915_gem_request * __must_check
i915_gem_request_alloc(struct intel_engine_cs *engine,
struct i915_gem_context *ctx);
-int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
- struct drm_file *file);
void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
static inline u32
@@ -218,13 +207,9 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
*pdst = src;
}
-void __i915_add_request(struct drm_i915_gem_request *req,
- struct drm_i915_gem_object *batch_obj,
- bool flush_caches);
+void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
#define i915_add_request(req) \
- __i915_add_request(req, NULL, true)
-#define i915_add_request_no_flush(req) \
- __i915_add_request(req, NULL, false)
+ __i915_add_request(req, false)
struct intel_rps_client;
#define NO_WAITBOOST ERR_PTR(-1)
@@ -263,11 +248,14 @@ i915_gem_request_completed(const struct drm_i915_gem_request *req)
bool __i915_spin_request(const struct drm_i915_gem_request *request,
int state, unsigned long timeout_us);
+
static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
int state, unsigned long timeout_us)
{
- return (i915_gem_request_started(request) &&
- __i915_spin_request(request, state, timeout_us));
+ if (!i915_gem_request_started(request))
+ return false;
+
+ return __i915_spin_request(request, state, timeout_us);
}
/* We treat requests as fences. This is not be to confused with our
@@ -465,6 +453,10 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* just report the active tracker is idle. If the new request is
* incomplete, then we acquire a reference on it and check that
* it remained the active request.
+ *
+ * It is then imperative that we do not zero the request on
+ * reallocation, so that we can chase the dangling pointers!
+ * See i915_gem_request_alloc().
*/
do {
struct drm_i915_gem_request *request;
@@ -497,6 +489,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* incremented) then the following read for rcu_access_pointer()
* must occur after the atomic operation and so confirm
* that this request is the one currently being tracked.
+ *
+ * The corresponding write barrier is part of
+ * rcu_assign_pointer().
*/
if (!request || request == rcu_access_pointer(active->request))
return rcu_pointer_handoff(request);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index b80802b35353..b4a9341661e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -48,6 +48,19 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
#endif
}
+static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+{
+ if (!mutex_trylock(&dev->struct_mutex)) {
+ if (!mutex_is_locked_by(&dev->struct_mutex, current))
+ return false;
+
+ *unlock = false;
+ } else
+ *unlock = true;
+
+ return true;
+}
+
static bool any_vma_pinned(struct drm_i915_gem_object *obj)
{
struct i915_vma *vma;
@@ -66,6 +79,9 @@ static bool swap_available(void)
static bool can_release_pages(struct drm_i915_gem_object *obj)
{
+ if (obj->mm.pages == NULL)
+ return false;
+
/* Only shmemfs objects are backed by swap */
if (!obj->base.filp)
return false;
@@ -78,7 +94,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
* to the GPU, simply unbinding from the GPU is not going to succeed
* in releasing our pin count on the pages themselves.
*/
- if (obj->pages_pin_count > obj->bind_count)
+ if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count)
return false;
if (any_vma_pinned(obj))
@@ -88,7 +104,14 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
* discard the contents (because the user has marked them as being
* purgeable) or if we can move their contents out to swap.
*/
- return swap_available() || obj->madv == I915_MADV_DONTNEED;
+ return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
+}
+
+static int unsafe_drop_pages(struct drm_i915_gem_object *obj)
+{
+ if (i915_gem_object_unbind(obj) == 0)
+ __i915_gem_object_put_pages(obj);
+ return obj->mm.pages == NULL;
}
/**
@@ -128,6 +151,10 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
{ NULL, 0 },
}, *phase;
unsigned long count = 0;
+ bool unlock;
+
+ if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock))
+ return 0;
trace_i915_gem_shrink(dev_priv, target, flags);
i915_gem_retire_requests(dev_priv);
@@ -173,30 +200,38 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
typeof(*obj),
global_list))) {
list_move_tail(&obj->global_list, &still_in_list);
+ if (obj->mm.pages == NULL) {
+ list_del_init(&obj->global_list);
+ continue;
+ }
+
+ if (!can_release_pages(obj))
+ continue;
if (flags & I915_SHRINK_PURGEABLE &&
- obj->madv != I915_MADV_DONTNEED)
+ obj->mm.madv != I915_MADV_DONTNEED)
continue;
if (flags & I915_SHRINK_VMAPS &&
- !is_vmalloc_addr(obj->mapping))
+ !is_vmalloc_addr(obj->mm.mapping))
continue;
if ((flags & I915_SHRINK_ACTIVE) == 0 &&
i915_gem_object_is_active(obj))
continue;
- if (!can_release_pages(obj))
- continue;
-
- i915_gem_object_get(obj);
-
- /* For the unbound phase, this should be a no-op! */
- i915_gem_object_unbind(obj);
- if (i915_gem_object_put_pages(obj) == 0)
+ if (kref_get_unless_zero(&obj->base.refcount)) {
+ if (unsafe_drop_pages(obj)) {
+ __i915_gem_object_invalidate(obj);
+ count += obj->base.size >> PAGE_SHIFT;
+ }
+ i915_gem_object_put(obj);
+ } else if (atomic_read(&obj->mm.pages_pin_count) == 0) {
+ __i915_gem_object_put_pages(obj);
+ GEM_BUG_ON(obj->mm.pages);
+ __i915_gem_object_invalidate(obj);
count += obj->base.size >> PAGE_SHIFT;
-
- i915_gem_object_put(obj);
+ }
}
list_splice(&still_in_list, phase->list);
}
@@ -205,6 +240,9 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
intel_runtime_pm_put(dev_priv);
i915_gem_retire_requests(dev_priv);
+ if (unlock)
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
/* expedite the RCU grace period to free some request slabs */
synchronize_rcu_expedited();
@@ -238,19 +276,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
return freed;
}
-static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
-{
- if (!mutex_trylock(&dev->struct_mutex)) {
- if (!mutex_is_locked_by(&dev->struct_mutex, current))
- return false;
-
- *unlock = false;
- } else
- *unlock = true;
-
- return true;
-}
-
static unsigned long
i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
{
@@ -304,6 +329,12 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
sc->nr_to_scan - freed,
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND);
+ if (freed < sc->nr_to_scan && current_is_kswapd())
+ freed += i915_gem_shrink(dev_priv,
+ sc->nr_to_scan - freed,
+ I915_SHRINK_ACTIVE |
+ I915_SHRINK_BOUND |
+ I915_SHRINK_PURGEABLE);
if (unlock)
mutex_unlock(&dev->struct_mutex);
@@ -373,12 +404,18 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
*/
unbound = bound = unevictable = 0;
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
+ if (obj->mm.pages == NULL)
+ continue;
+
if (!can_release_pages(obj))
unevictable += obj->base.size >> PAGE_SHIFT;
else
unbound += obj->base.size >> PAGE_SHIFT;
}
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+ if (obj->mm.pages == NULL)
+ continue;
+
if (!can_release_pages(obj))
unevictable += obj->base.size >> PAGE_SHIFT;
else
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 13279610eeec..67e7c9998244 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -63,8 +63,7 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
mutex_lock(&dev_priv->mm.stolen_lock);
ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, size,
- alignment, start, end,
- DRM_MM_SEARCH_DEFAULT);
+ alignment, start, end, 0);
mutex_unlock(&dev_priv->mm.stolen_lock);
return ret;
@@ -533,20 +532,22 @@ i915_pages_create_for_stolen(struct drm_device *dev,
return st;
}
-static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
+static struct sg_table *
+i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
{
- BUG();
- return -EINVAL;
+ return i915_pages_create_for_stolen(obj->base.dev,
+ obj->stolen->start,
+ obj->stolen->size);
}
-static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
+static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
{
/* Should only be called during free */
- sg_free_table(obj->pages);
- kfree(obj->pages);
+ sg_free_table(pages);
+ kfree(pages);
}
-
static void
i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
{
@@ -577,20 +578,13 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
drm_gem_private_object_init(dev, &obj->base, stolen->size);
i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
- obj->pages = i915_pages_create_for_stolen(dev,
- stolen->start, stolen->size);
- if (obj->pages == NULL)
- goto cleanup;
-
- obj->get_page.sg = obj->pages->sgl;
- obj->get_page.last = 0;
-
- i915_gem_object_pin_pages(obj);
obj->stolen = stolen;
-
obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ if (i915_gem_object_pin_pages(obj))
+ goto cleanup;
+
return obj;
cleanup:
@@ -685,10 +679,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
if (gtt_offset == I915_GTT_OFFSET_NONE)
return obj;
- vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base);
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ goto err;
+
+ vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
- goto err;
+ goto err_pages;
}
/* To simplify the initialisation sequence between KMS and GTT,
@@ -702,19 +700,19 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
if (ret) {
DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
- goto err;
+ goto err_pages;
}
+ obj->bind_count++;
vma->flags |= I915_VMA_GLOBAL_BIND;
__i915_vma_set_map_and_fenceable(vma);
list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
- obj->bind_count++;
-
- list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
- i915_gem_object_pin_pages(obj);
+ list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
return obj;
+err_pages:
+ i915_gem_object_unpin_pages(obj);
err:
i915_gem_object_put(obj);
return NULL;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index f4b984de83b5..b3512cf9c921 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -116,37 +116,58 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
return true;
}
-/* Is the current GTT allocation valid for the change in tiling? */
-static bool
-i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
+static bool i915_vma_fence_ok(struct i915_vma *vma, int tiling_mode)
{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
u32 size;
- if (tiling_mode == I915_TILING_NONE)
- return true;
-
- if (INTEL_GEN(dev_priv) >= 4)
+ if (!i915_vma_is_map_and_fenceable(vma))
return true;
- if (IS_GEN3(dev_priv)) {
- if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK)
+ if (INTEL_GEN(dev_priv) == 3) {
+ if (vma->node.start & ~I915_FENCE_START_MASK)
return false;
} else {
- if (i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK)
+ if (vma->node.start & ~I830_FENCE_START_MASK)
return false;
}
- size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, tiling_mode);
- if (i915_gem_obj_ggtt_size(obj) != size)
+ size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
+ if (vma->node.size < size)
return false;
- if (i915_gem_obj_ggtt_offset(obj) & (size - 1))
+ if (vma->node.start & (size - 1))
return false;
return true;
}
+/* Is the current GTT allocation valid for the change in tiling? */
+static int
+i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
+{
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ struct i915_vma *vma;
+ int ret;
+
+ if (tiling_mode == I915_TILING_NONE)
+ return 0;
+
+ if (INTEL_GEN(dev_priv) >= 4)
+ return 0;
+
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ if (i915_vma_fence_ok(vma, tiling_mode))
+ continue;
+
+ ret = i915_vma_unbind(vma);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* i915_gem_set_tiling - IOCTL handler to set tiling mode
* @dev: DRM device
@@ -179,14 +200,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
if (!i915_tiling_ok(dev,
args->stride, obj->base.size, args->tiling_mode)) {
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return -EINVAL;
}
- intel_runtime_pm_get(dev_priv);
-
mutex_lock(&dev->struct_mutex);
- if (obj->pin_display || obj->framebuffer_references) {
+ if (obj->pin_display || atomic_read(&obj->framebuffer_references)) {
ret = -EBUSY;
goto err;
}
@@ -234,25 +253,27 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
* has to also include the unfenced register the GPU uses
* whilst executing a fenced command for an untiled object.
*/
- if (obj->map_and_fenceable &&
- !i915_gem_object_fence_ok(obj, args->tiling_mode))
- ret = i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
-
+ ret = i915_gem_object_fence_ok(obj, args->tiling_mode);
if (ret == 0) {
- if (obj->pages &&
- obj->madv == I915_MADV_WILLNEED &&
+ struct i915_vma *vma;
+
+ mutex_lock(&obj->mm.lock);
+ if (obj->mm.pages &&
+ obj->mm.madv == I915_MADV_WILLNEED &&
dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
if (args->tiling_mode == I915_TILING_NONE)
- i915_gem_object_unpin_pages(obj);
+ atomic_dec(&obj->mm.pages_pin_count);
if (!i915_gem_object_is_tiled(obj))
- i915_gem_object_pin_pages(obj);
+ atomic_inc(&obj->mm.pages_pin_count);
}
+ mutex_unlock(&obj->mm.lock);
- obj->fence_dirty =
- !i915_gem_active_is_idle(&obj->last_fence,
- &dev->struct_mutex) ||
- obj->fence_reg != I915_FENCE_REG_NONE;
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ if (!vma->fence)
+ continue;
+ vma->fence->dirty = true;
+ }
obj->tiling_and_stride =
args->stride | args->tiling_mode;
@@ -279,8 +300,6 @@ err:
i915_gem_object_put(obj);
mutex_unlock(&dev->struct_mutex);
- intel_runtime_pm_put(dev_priv);
-
return ret;
}
@@ -334,6 +353,6 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 57218cca7e05..4a00c7673bed 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -80,14 +80,16 @@ static void cancel_userptr(struct work_struct *work)
wait_rendering(obj);
mutex_lock(&dev->struct_mutex);
+
/* Cancel any active worker and force us to re-evaluate gup */
obj->userptr.work = NULL;
- if (obj->pages != NULL) {
- /* We are inside a kthread context and can't be interrupted */
- WARN_ON(i915_gem_object_unbind(obj));
- WARN_ON(i915_gem_object_put_pages(obj));
- }
+ /* We are inside a kthread context and can't be interrupted */
+ if (i915_gem_object_unbind(obj) == 0)
+ __i915_gem_object_put_pages(obj);
+ WARN_ONCE(obj->mm.pages,
+ "Failed to release pages: bind_count=%d, pages_pin_count=%d\n",
+ obj->bind_count, atomic_read(&obj->mm.pages_pin_count));
i915_gem_object_put(obj);
mutex_unlock(&dev->struct_mutex);
@@ -376,7 +378,7 @@ __i915_mm_struct_free(struct kref *kref)
mutex_unlock(&mm->i915->mm_lock);
INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
- schedule_work(&mm->work);
+ queue_work(mm->i915->mm.userptr_wq, &mm->work);
}
static void
@@ -436,24 +438,25 @@ err:
return ret;
}
-static int
+static struct sg_table *
__i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
struct page **pvec, int num_pages)
{
+ struct sg_table *pages;
int ret;
- ret = st_set_pages(&obj->pages, pvec, num_pages);
+ ret = st_set_pages(&pages, pvec, num_pages);
if (ret)
- return ret;
+ return ERR_PTR(ret);
- ret = i915_gem_gtt_prepare_object(obj);
+ ret = i915_gem_gtt_prepare_pages(obj, pages);
if (ret) {
- sg_free_table(obj->pages);
- kfree(obj->pages);
- obj->pages = NULL;
+ sg_free_table(pages);
+ kfree(pages);
+ return ERR_PTR(ret);
}
- return ret;
+ return pages;
}
static int
@@ -497,7 +500,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
{
struct get_pages_work *work = container_of(_work, typeof(*work), work);
struct drm_i915_gem_object *obj = work->obj;
- struct drm_device *dev = obj->base.dev;
const int npages = obj->base.size >> PAGE_SHIFT;
struct page **pvec;
int pinned, ret;
@@ -529,35 +531,31 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
}
}
- mutex_lock(&dev->struct_mutex);
+ mutex_lock(&obj->mm.lock);
if (obj->userptr.work == &work->work) {
+ struct sg_table *pages = ERR_PTR(ret);
+
if (pinned == npages) {
- ret = __i915_gem_userptr_set_pages(obj, pvec, npages);
- if (ret == 0) {
- list_add_tail(&obj->global_list,
- &to_i915(dev)->mm.unbound_list);
- obj->get_page.sg = obj->pages->sgl;
- obj->get_page.last = 0;
+ pages = __i915_gem_userptr_set_pages(obj, pvec, npages);
+ if (!IS_ERR(pages)) {
+ __i915_gem_object_set_pages(obj, pages);
pinned = 0;
+ pages = NULL;
}
}
- obj->userptr.work = ERR_PTR(ret);
- if (ret)
- __i915_gem_userptr_set_active(obj, false);
+ obj->userptr.work = ERR_CAST(pages);
}
-
- obj->userptr.workers--;
- i915_gem_object_put(obj);
- mutex_unlock(&dev->struct_mutex);
+ mutex_unlock(&obj->mm.lock);
release_pages(pvec, pinned, 0);
drm_free_large(pvec);
+ i915_gem_object_put(obj);
put_task_struct(work->task);
kfree(work);
}
-static int
+static struct sg_table *
__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
bool *active)
{
@@ -582,15 +580,11 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
* that error back to this function through
* obj->userptr.work = ERR_PTR.
*/
- if (obj->userptr.workers >= I915_GEM_USERPTR_MAX_WORKERS)
- return -EAGAIN;
-
work = kmalloc(sizeof(*work), GFP_KERNEL);
if (work == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
obj->userptr.work = &work->work;
- obj->userptr.workers++;
work->obj = i915_gem_object_get(obj);
@@ -598,17 +592,18 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
get_task_struct(work->task);
INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
- schedule_work(&work->work);
+ queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
*active = true;
- return -EAGAIN;
+ return ERR_PTR(-EAGAIN);
}
-static int
+static struct sg_table *
i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
{
const int num_pages = obj->base.size >> PAGE_SHIFT;
struct page **pvec;
+ struct sg_table *pages;
int pinned, ret;
bool active;
@@ -628,20 +623,19 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
* to the vma (discard or cloning) which should prevent the more
* egregious cases from causing harm.
*/
- if (IS_ERR(obj->userptr.work)) {
- /* active flag will have been dropped already by the worker */
- ret = PTR_ERR(obj->userptr.work);
- obj->userptr.work = NULL;
- return ret;
- }
- if (obj->userptr.work)
+
+ if (obj->userptr.work) {
/* active flag should still be held for the pending work */
- return -EAGAIN;
+ if (IS_ERR(obj->userptr.work))
+ return ERR_CAST(obj->userptr.work);
+ else
+ return ERR_PTR(-EAGAIN);
+ }
/* Let the mmu-notifier know that we have begun and need cancellation */
ret = __i915_gem_userptr_set_active(obj, true);
if (ret)
- return ret;
+ return ERR_PTR(ret);
pvec = NULL;
pinned = 0;
@@ -650,7 +644,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
GFP_TEMPORARY);
if (pvec == NULL) {
__i915_gem_userptr_set_active(obj, false);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
@@ -659,21 +653,22 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
active = false;
if (pinned < 0)
- ret = pinned, pinned = 0;
+ pages = ERR_PTR(pinned), pinned = 0;
else if (pinned < num_pages)
- ret = __i915_gem_userptr_get_pages_schedule(obj, &active);
+ pages = __i915_gem_userptr_get_pages_schedule(obj, &active);
else
- ret = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
- if (ret) {
+ pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
+ if (IS_ERR(pages)) {
__i915_gem_userptr_set_active(obj, active);
release_pages(pvec, pinned, 0);
}
drm_free_large(pvec);
- return ret;
+ return pages;
}
static void
-i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
{
struct sgt_iter sgt_iter;
struct page *page;
@@ -681,22 +676,22 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
BUG_ON(obj->userptr.work != NULL);
__i915_gem_userptr_set_active(obj, false);
- if (obj->madv != I915_MADV_WILLNEED)
- obj->dirty = 0;
+ if (obj->mm.madv != I915_MADV_WILLNEED)
+ i915_gem_object_clear_dirty(obj);
- i915_gem_gtt_finish_object(obj);
+ i915_gem_gtt_finish_pages(obj, pages);
- for_each_sgt_page(page, sgt_iter, obj->pages) {
- if (obj->dirty)
+ for_each_sgt_page(page, sgt_iter, pages) {
+ if (i915_gem_object_is_dirty(obj))
set_page_dirty(page);
mark_page_accessed(page);
put_page(page);
}
- obj->dirty = 0;
+ i915_gem_object_clear_dirty(obj);
- sg_free_table(obj->pages);
- kfree(obj->pages);
+ sg_free_table(pages);
+ kfree(pages);
}
static void
@@ -815,7 +810,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
ret = drm_gem_handle_create(file, &obj->base, &handle);
/* drop reference from allocate - handle holds it now */
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
if (ret)
return ret;
@@ -823,8 +818,20 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
return 0;
}
-void i915_gem_init_userptr(struct drm_i915_private *dev_priv)
+int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
{
mutex_init(&dev_priv->mm_lock);
hash_init(dev_priv->mm_structs);
+
+ dev_priv->mm.userptr_wq =
+ alloc_workqueue("i915-userptr-acquire", WQ_HIGHPRI, 0);
+ if (!dev_priv->mm.userptr_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
+{
+ destroy_workqueue(dev_priv->mm.userptr_wq);
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index eecb87063c88..53324be858d4 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -28,6 +28,8 @@
*/
#include <generated/utsrelease.h>
+#include <linux/stop_machine.h>
+#include <linux/zlib.h>
#include "i915_drv.h"
static const char *engine_str(int engine)
@@ -42,16 +44,6 @@ static const char *engine_str(int engine)
}
}
-static const char *pin_flag(int pinned)
-{
- if (pinned > 0)
- return " P";
- else if (pinned < 0)
- return " p";
- else
- return "";
-}
-
static const char *tiling_flag(int tiling)
{
switch (tiling) {
@@ -189,7 +181,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
{
int i;
- err_printf(m, " %s [%d]:\n", name, count);
+ err_printf(m, "%s [%d]:\n", name, count);
while (count--) {
err_printf(m, " %08x_%08x %8u %02x %02x [ ",
@@ -202,7 +194,6 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
err_printf(m, "%02x ", err->rseqno[i]);
err_printf(m, "] %02x", err->wseqno);
- err_puts(m, pin_flag(err->pinned));
err_puts(m, tiling_flag(err->tiling));
err_puts(m, dirty_flag(err->dirty));
err_puts(m, purgeable_flag(err->purgeable));
@@ -247,12 +238,22 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
err_printf(m, " HEAD: 0x%08x\n", ee->head);
err_printf(m, " TAIL: 0x%08x\n", ee->tail);
err_printf(m, " CTL: 0x%08x\n", ee->ctl);
+ err_printf(m, " MODE: 0x%08x [idle? %d]\n",
+ ee->mode, !!(ee->mode & MODE_IDLE));
err_printf(m, " HWS: 0x%08x\n", ee->hws);
err_printf(m, " ACTHD: 0x%08x %08x\n",
(u32)(ee->acthd>>32), (u32)ee->acthd);
err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir);
err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr);
err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone);
+ if (ee->batchbuffer) {
+ u64 start = ee->batchbuffer->gtt_offset;
+ u64 end = start + ee->batchbuffer->gtt_size;
+
+ err_printf(m, " batch: [0x%08x %08x, 0x%08x %08x]\n",
+ upper_32_bits(start), lower_32_bits(start),
+ upper_32_bits(end), lower_32_bits(end));
+ }
if (INTEL_GEN(m->i915) >= 4) {
err_printf(m, " BBADDR: 0x%08x %08x\n",
(u32)(ee->bbaddr>>32), (u32)ee->bbaddr);
@@ -309,18 +310,58 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
va_end(args);
}
+static bool
+ascii85_encode(u32 in, char *out)
+{
+ int i;
+
+ if (in == 0)
+ return false;
+
+ out[5] = '\0';
+ for (i = 5; i--; ) {
+ out[i] = '!' + in % 85;
+ in /= 85;
+ }
+
+ return true;
+}
+
static void print_error_obj(struct drm_i915_error_state_buf *m,
+ struct intel_engine_cs *engine,
+ const char *name,
struct drm_i915_error_object *obj)
{
- int page, offset, elt;
+ char out[6];
+ int page;
+
+ if (!obj)
+ return;
+
+ if (name) {
+ err_printf(m, "%s --- %s gtt_offset = 0x%08x_%08x\n",
+ engine ? engine->name : "global", name,
+ upper_32_bits(obj->gtt_offset),
+ lower_32_bits(obj->gtt_offset));
+ }
+
+ err_puts(m, ":"); /* indicate compressed data */
+ for (page = 0; page < obj->page_count; page++) {
+ int i, len;
+
+ len = PAGE_SIZE;
+ if (page == obj->page_count - 1)
+ len -= obj->unused;
+ len = (len + 3) / 4;
- for (page = offset = 0; page < obj->page_count; page++) {
- for (elt = 0; elt < PAGE_SIZE/4; elt++) {
- err_printf(m, "%08x : %08x\n", offset,
- obj->pages[page][elt]);
- offset += 4;
+ for (i = 0; i < len; i++) {
+ if (ascii85_encode(obj->pages[page][i], out))
+ err_puts(m, out);
+ else
+ err_puts(m, "z");
}
}
+ err_puts(m, "\n");
}
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
@@ -330,8 +371,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_error_state *error = error_priv->error;
struct drm_i915_error_object *obj;
- int i, j, offset, elt;
int max_hangcheck_score;
+ int i, j;
if (!error) {
err_printf(m, "no error state collected\n");
@@ -414,18 +455,25 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
error_print_engine(m, &error->engine[i]);
}
- for (i = 0; i < error->vm_count; i++) {
- err_printf(m, "vm[%d]\n", i);
+ for (i = 0; i < I915_NUM_ENGINES; i++) {
+ if (!error->active_vm[i])
+ break;
- print_error_buffers(m, "Active",
+ err_printf(m, "Active vm[%d]\n", i);
+ for (j = 0; j < I915_NUM_ENGINES; j++) {
+ if (error->engine[j].vm == error->active_vm[i])
+ err_printf(m, " %s\n",
+ dev_priv->engine[j].name);
+ }
+ print_error_buffers(m, " Buffers",
error->active_bo[i],
error->active_bo_count[i]);
-
- print_error_buffers(m, "Pinned",
- error->pinned_bo[i],
- error->pinned_bo_count[i]);
}
+ print_error_buffers(m, "Pinned (global)",
+ error->pinned_bo,
+ error->pinned_bo_count);
+
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
struct drm_i915_error_engine *ee = &error->engine[i];
@@ -439,15 +487,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
upper_32_bits(obj->gtt_offset),
lower_32_bits(obj->gtt_offset));
- print_error_obj(m, obj);
- }
-
- obj = ee->wa_batchbuffer;
- if (obj) {
- err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n",
- dev_priv->engine[i].name,
- lower_32_bits(obj->gtt_offset));
- print_error_obj(m, obj);
+ print_error_obj(m, &dev_priv->engine[i], NULL, obj);
}
if (ee->num_requests) {
@@ -455,9 +495,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
dev_priv->engine[i].name,
ee->num_requests);
for (j = 0; j < ee->num_requests; j++) {
- err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n",
+ err_printf(m, " pid %d, seqno 0x%08x, emitted %ld, head 0x%08x tail 0x%08x\n",
+ ee->requests[j].pid,
ee->requests[j].seqno,
ee->requests[j].jiffies,
+ ee->requests[j].head,
ee->requests[j].tail);
}
}
@@ -474,77 +516,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
}
}
- if ((obj = ee->ringbuffer)) {
- err_printf(m, "%s --- ringbuffer = 0x%08x\n",
- dev_priv->engine[i].name,
- lower_32_bits(obj->gtt_offset));
- print_error_obj(m, obj);
- }
+ print_error_obj(m, &dev_priv->engine[i],
+ "ringbuffer", ee->ringbuffer);
- if ((obj = ee->hws_page)) {
- u64 hws_offset = obj->gtt_offset;
- u32 *hws_page = &obj->pages[0][0];
+ print_error_obj(m, &dev_priv->engine[i],
+ "HW Status", ee->hws_page);
- if (i915.enable_execlists) {
- hws_offset += LRC_PPHWSP_PN * PAGE_SIZE;
- hws_page = &obj->pages[LRC_PPHWSP_PN][0];
- }
- err_printf(m, "%s --- HW Status = 0x%08llx\n",
- dev_priv->engine[i].name, hws_offset);
- offset = 0;
- for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
- err_printf(m, "[%04x] %08x %08x %08x %08x\n",
- offset,
- hws_page[elt],
- hws_page[elt+1],
- hws_page[elt+2],
- hws_page[elt+3]);
- offset += 16;
- }
- }
+ print_error_obj(m, &dev_priv->engine[i],
+ "HW context", ee->ctx);
- obj = ee->wa_ctx;
- if (obj) {
- u64 wa_ctx_offset = obj->gtt_offset;
- u32 *wa_ctx_page = &obj->pages[0][0];
- struct intel_engine_cs *engine = &dev_priv->engine[RCS];
- u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size +
- engine->wa_ctx.per_ctx.size);
-
- err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n",
- dev_priv->engine[i].name, wa_ctx_offset);
- offset = 0;
- for (elt = 0; elt < wa_ctx_size; elt += 4) {
- err_printf(m, "[%04x] %08x %08x %08x %08x\n",
- offset,
- wa_ctx_page[elt + 0],
- wa_ctx_page[elt + 1],
- wa_ctx_page[elt + 2],
- wa_ctx_page[elt + 3]);
- offset += 16;
- }
- }
+ print_error_obj(m, &dev_priv->engine[i],
+ "WA context", ee->wa_ctx);
- if ((obj = ee->ctx)) {
- err_printf(m, "%s --- HW Context = 0x%08x\n",
- dev_priv->engine[i].name,
- lower_32_bits(obj->gtt_offset));
- print_error_obj(m, obj);
- }
+ print_error_obj(m, &dev_priv->engine[i],
+ "WA batchbuffer", ee->wa_batchbuffer);
}
- if ((obj = error->semaphore_obj)) {
- err_printf(m, "Semaphore page = 0x%08x\n",
- lower_32_bits(obj->gtt_offset));
- for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
- err_printf(m, "[%04x] %08x %08x %08x %08x\n",
- elt * 4,
- obj->pages[0][elt],
- obj->pages[0][elt+1],
- obj->pages[0][elt+2],
- obj->pages[0][elt+3]);
- }
- }
+ print_error_obj(m, NULL, "Semaphores", error->semaphore_obj);
if (error->overlay)
intel_overlay_print_error_state(m, error->overlay);
@@ -599,7 +587,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
return;
for (page = 0; page < obj->page_count; page++)
- kfree(obj->pages[page]);
+ free_page((unsigned long)obj->pages[page]);
kfree(obj);
}
@@ -626,121 +614,118 @@ static void i915_error_state_free(struct kref *error_ref)
i915_error_object_free(error->semaphore_obj);
- for (i = 0; i < error->vm_count; i++)
+ for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
kfree(error->active_bo[i]);
-
- kfree(error->active_bo);
- kfree(error->active_bo_count);
kfree(error->pinned_bo);
- kfree(error->pinned_bo_count);
+
kfree(error->overlay);
kfree(error->display);
kfree(error);
}
-static struct drm_i915_error_object *
-i915_error_object_create(struct drm_i915_private *dev_priv,
- struct drm_i915_gem_object *src,
- struct i915_address_space *vm)
+static int compress_page(struct z_stream_s *zstream,
+ void *src,
+ struct drm_i915_error_object *dst)
{
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
- struct drm_i915_error_object *dst;
- struct i915_vma *vma = NULL;
- int num_pages;
- bool use_ggtt;
- int i = 0;
- u64 reloc_offset;
-
- if (src == NULL || src->pages == NULL)
- return NULL;
-
- num_pages = src->base.size >> PAGE_SHIFT;
+ zstream->next_in = src;
+ zstream->avail_in = PAGE_SIZE;
- dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
- if (dst == NULL)
- return NULL;
-
- if (i915_gem_obj_bound(src, vm))
- dst->gtt_offset = i915_gem_obj_offset(src, vm);
- else
- dst->gtt_offset = -1;
+ do {
+ if (zstream->avail_out == 0) {
+ unsigned long page;
- reloc_offset = dst->gtt_offset;
- if (i915_is_ggtt(vm))
- vma = i915_gem_obj_to_ggtt(src);
- use_ggtt = (src->cache_level == I915_CACHE_NONE &&
- vma && (vma->flags & I915_VMA_GLOBAL_BIND) &&
- reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
+ page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (!page)
+ return -ENOMEM;
- /* Cannot access stolen address directly, try to use the aperture */
- if (src->stolen) {
- use_ggtt = true;
-
- if (!(vma && vma->flags & I915_VMA_GLOBAL_BIND))
- goto unwind;
+ dst->pages[dst->page_count++] = (void *)page;
- reloc_offset = i915_gem_obj_ggtt_offset(src);
- if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
- goto unwind;
- }
+ zstream->next_out = (void *)page;
+ zstream->avail_out = PAGE_SIZE;
+ }
- /* Cannot access snooped pages through the aperture */
- if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
- !HAS_LLC(dev_priv))
- goto unwind;
+ if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
+ return -EIO;
- dst->page_count = num_pages;
- while (num_pages--) {
- unsigned long flags;
- void *d;
+#if 0
+ /* XXX fallback to uncompressed if we increases size? */
+ if (zstream->total_out > zstream->total_in)
+ return -E2BIG;
+#endif
+ } while (zstream->avail_in);
- d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
- if (d == NULL)
- goto unwind;
+ return 0;
+}
- local_irq_save(flags);
- if (use_ggtt) {
- void __iomem *s;
+static struct drm_i915_error_object *
+i915_error_object_create(struct drm_i915_private *i915,
+ struct i915_vma *vma)
+{
+ struct i915_ggtt *ggtt = &i915->ggtt;
+ const u64 slot = ggtt->gpu_error.start;
+ struct drm_i915_error_object *dst;
+ struct z_stream_s zstream;
+ unsigned long num_pages;
+ struct sgt_iter iter;
+ dma_addr_t dma;
- /* Simply ignore tiling or any overlapping fence.
- * It's part of the error state, and this hopefully
- * captures what the GPU read.
- */
+ if (!vma)
+ return NULL;
- s = io_mapping_map_atomic_wc(ggtt->mappable,
- reloc_offset);
- memcpy_fromio(d, s, PAGE_SIZE);
- io_mapping_unmap_atomic(s);
- } else {
- struct page *page;
- void *s;
+ num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
+ num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
+ dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (!dst)
+ return NULL;
- page = i915_gem_object_get_page(src, i);
+ dst->gtt_offset = vma->node.start;
+ dst->page_count = 0;
+ dst->unused = 0;
+
+ memset(&zstream, 0, sizeof(zstream));
+ zstream.workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
+ MAX_MEM_LEVEL),
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (!zstream.workspace ||
+ zlib_deflateInit(&zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
+ kfree(zstream.workspace);
+ kfree(dst);
+ return NULL;
+ }
- drm_clflush_pages(&page, 1);
+ for_each_sgt_dma(dma, iter,
+ vma->ggtt_view.pages ?: vma->obj->mm.pages) {
+ int ret;
+ void *s;
- s = kmap_atomic(page);
- memcpy(d, s, PAGE_SIZE);
- kunmap_atomic(s);
+ ggtt->base.insert_page(&ggtt->base, dma, slot,
+ I915_CACHE_NONE, 0);
- drm_clflush_pages(&page, 1);
- }
- local_irq_restore(flags);
+ s = (void *__force)
+ io_mapping_map_atomic_wc(&ggtt->mappable, slot);
+ ret = compress_page(&zstream, s, dst);
+ io_mapping_unmap_atomic(s);
- dst->pages[i++] = d;
- reloc_offset += PAGE_SIZE;
+ if (ret)
+ goto unwind;
}
+ zlib_deflate(&zstream, Z_FINISH);
+ dst->unused = zstream.avail_out;
+out:
+ zlib_deflateEnd(&zstream);
+ kfree(zstream.workspace);
+ ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
return dst;
unwind:
- while (i--)
- kfree(dst->pages[i]);
+ while (dst->page_count--)
+ free_page((unsigned long)dst->pages[dst->page_count]);
kfree(dst);
- return NULL;
+ dst = NULL;
+ goto out;
}
-#define i915_error_ggtt_object_create(dev_priv, src) \
- i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base)
/* The error capture is special as tries to run underneath the normal
* locking rules - so we use the raw version of the i915_gem_active lookup.
@@ -777,24 +762,25 @@ static void capture_bo(struct drm_i915_error_buffer *err,
err->gtt_offset = vma->node.start;
err->read_domains = obj->base.read_domains;
err->write_domain = obj->base.write_domain;
- err->fence_reg = obj->fence_reg;
- err->pinned = 0;
- if (i915_gem_obj_is_pinned(obj))
- err->pinned = 1;
+ err->fence_reg = vma->fence ? vma->fence->id : -1;
err->tiling = i915_gem_object_get_tiling(obj);
- err->dirty = obj->dirty;
- err->purgeable = obj->madv != I915_MADV_WILLNEED;
+ err->dirty = i915_gem_object_is_dirty(obj);
+ err->purgeable = obj->mm.madv != I915_MADV_WILLNEED;
err->userptr = obj->userptr.mm != NULL;
err->cache_level = obj->cache_level;
}
-static u32 capture_active_bo(struct drm_i915_error_buffer *err,
- int count, struct list_head *head)
+static u32 capture_error_bo(struct drm_i915_error_buffer *err,
+ int count, struct list_head *head,
+ bool pinned_only)
{
struct i915_vma *vma;
int i = 0;
list_for_each_entry(vma, head, vm_link) {
+ if (pinned_only && !i915_vma_is_pinned(vma))
+ continue;
+
capture_bo(err++, vma);
if (++i == count)
break;
@@ -803,28 +789,6 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
return i;
}
-static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
- int count, struct list_head *head,
- struct i915_address_space *vm)
-{
- struct drm_i915_gem_object *obj;
- struct drm_i915_error_buffer * const first = err;
- struct drm_i915_error_buffer * const last = err + count;
-
- list_for_each_entry(obj, head, global_list) {
- struct i915_vma *vma;
-
- if (err == last)
- break;
-
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (vma->vm == vm && i915_vma_is_pinned(vma))
- capture_bo(err++, vma);
- }
-
- return err - first;
-}
-
/* Generate a semi-unique error code. The code is not meant to have meaning, The
* code's only purpose is to try to prevent false duplicated bug reports by
* grossly estimating a GPU error state.
@@ -1007,6 +971,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
ee->head = I915_READ_HEAD(engine);
ee->tail = I915_READ_TAIL(engine);
ee->ctl = I915_READ_CTL(engine);
+ if (INTEL_GEN(dev_priv) > 2)
+ ee->mode = I915_READ_MODE(engine);
if (I915_NEED_GFX_HWS(dev_priv)) {
i915_reg_t mmio;
@@ -1062,26 +1028,58 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
}
}
-
-static void i915_gem_record_active_context(struct intel_engine_cs *engine,
- struct drm_i915_error_state *error,
- struct drm_i915_error_engine *ee)
+static void engine_record_requests(struct intel_engine_cs *engine,
+ struct drm_i915_gem_request *first,
+ struct drm_i915_error_engine *ee)
{
- struct drm_i915_private *dev_priv = engine->i915;
- struct drm_i915_gem_object *obj;
+ struct drm_i915_gem_request *request;
+ int count;
- /* Currently render ring is the only HW context user */
- if (engine->id != RCS || !error->ccid)
- return;
+ count = 0;
+ request = first;
+ list_for_each_entry_from(request, &engine->request_list, link)
+ count++;
- list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- if (!i915_gem_obj_ggtt_bound(obj))
- continue;
+ ee->requests = NULL;
+ kcalloc(count, sizeof(*ee->requests),
+ GFP_ATOMIC);
+ if (ee->requests == NULL)
+ return;
+ ee->num_requests = count;
- if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
- ee->ctx = i915_error_ggtt_object_create(dev_priv, obj);
+ count = 0;
+ request = first;
+ list_for_each_entry_from(request, &engine->request_list, link) {
+ struct drm_i915_error_request *erq;
+
+ if (count >= ee->num_requests) {
+ /*
+ * If the ring request list was changed in
+ * between the point where the error request
+ * list was created and dimensioned and this
+ * point then just exit early to avoid crashes.
+ *
+ * We don't need to communicate that the
+ * request list changed state during error
+ * state capture and that the error state is
+ * slightly incorrect as a consequence since we
+ * are typically only interested in the request
+ * list state at the point of error state
+ * capture, not in any changes happening during
+ * the capture.
+ */
break;
}
+
+ erq = &ee->requests[count++];
+ erq->seqno = request->fence.seqno;
+ erq->jiffies = request->emitted_jiffies;
+ erq->head = request->head;
+ erq->tail = request->tail;
+
+ rcu_read_lock();
+ erq->pid = request->ctx ? pid_nr(request->ctx->pid) : 0;
+ rcu_read_unlock();
}
}
@@ -1089,18 +1087,15 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
- struct drm_i915_gem_request *request;
- int i, count;
+ int i;
- if (dev_priv->semaphore_obj) {
- error->semaphore_obj =
- i915_error_ggtt_object_create(dev_priv,
- dev_priv->semaphore_obj);
- }
+ error->semaphore_obj =
+ i915_error_object_create(dev_priv, dev_priv->semaphore_vma);
for (i = 0; i < I915_NUM_ENGINES; i++) {
struct intel_engine_cs *engine = &dev_priv->engine[i];
struct drm_i915_error_engine *ee = &error->engine[i];
+ struct drm_i915_gem_request *request;
ee->pid = -1;
ee->engine_id = -1;
@@ -1115,10 +1110,10 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
request = i915_gem_find_active_request(engine);
if (request) {
- struct i915_address_space *vm;
struct intel_ring *ring;
+ struct pid *pid;
- vm = request->ctx->ppgtt ?
+ ee->vm = request->ctx->ppgtt ?
&request->ctx->ppgtt->base : &ggtt->base;
/* We need to copy these to an anonymous buffer
@@ -1127,19 +1122,23 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
*/
ee->batchbuffer =
i915_error_object_create(dev_priv,
- request->batch_obj,
- vm);
+ request->batch);
if (HAS_BROKEN_CS_TLB(dev_priv))
ee->wa_batchbuffer =
- i915_error_ggtt_object_create(dev_priv,
- engine->scratch.obj);
+ i915_error_object_create(dev_priv,
+ engine->scratch);
+
+ ee->ctx =
+ i915_error_object_create(dev_priv,
+ request->ctx->engine[i].state);
- if (request->pid) {
+ pid = request->ctx->pid;
+ if (pid) {
struct task_struct *task;
rcu_read_lock();
- task = pid_task(request->pid, PIDTYPE_PID);
+ task = pid_task(pid, PIDTYPE_PID);
if (task) {
strcpy(ee->comm, task->comm);
ee->pid = task->pid;
@@ -1154,145 +1153,99 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
ee->cpu_ring_head = ring->head;
ee->cpu_ring_tail = ring->tail;
ee->ringbuffer =
- i915_error_ggtt_object_create(dev_priv,
- ring->obj);
- }
-
- ee->hws_page =
- i915_error_ggtt_object_create(dev_priv,
- engine->status_page.obj);
-
- ee->wa_ctx = i915_error_ggtt_object_create(dev_priv,
- engine->wa_ctx.obj);
-
- i915_gem_record_active_context(engine, error, ee);
+ i915_error_object_create(dev_priv, ring->vma);
- count = 0;
- list_for_each_entry(request, &engine->request_list, link)
- count++;
-
- ee->num_requests = count;
- ee->requests =
- kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
- if (!ee->requests) {
- ee->num_requests = 0;
- continue;
+ engine_record_requests(engine, request, ee);
}
- count = 0;
- list_for_each_entry(request, &engine->request_list, link) {
- struct drm_i915_error_request *erq;
-
- if (count >= ee->num_requests) {
- /*
- * If the ring request list was changed in
- * between the point where the error request
- * list was created and dimensioned and this
- * point then just exit early to avoid crashes.
- *
- * We don't need to communicate that the
- * request list changed state during error
- * state capture and that the error state is
- * slightly incorrect as a consequence since we
- * are typically only interested in the request
- * list state at the point of error state
- * capture, not in any changes happening during
- * the capture.
- */
- break;
- }
+ ee->hws_page =
+ i915_error_object_create(dev_priv,
+ engine->status_page.vma);
- erq = &ee->requests[count++];
- erq->seqno = request->fence.seqno;
- erq->jiffies = request->emitted_jiffies;
- erq->tail = request->postfix;
- }
+ ee->wa_ctx =
+ i915_error_object_create(dev_priv, engine->wa_ctx.vma);
}
}
-/* FIXME: Since pin count/bound list is global, we duplicate what we capture per
- * VM.
- */
static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error,
struct i915_address_space *vm,
const int ndx)
{
- struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
- struct drm_i915_gem_object *obj;
+ struct drm_i915_error_buffer *active_bo;
struct i915_vma *vma;
int i;
i = 0;
list_for_each_entry(vma, &vm->active_list, vm_link)
i++;
- error->active_bo_count[ndx] = i;
-
- list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (vma->vm == vm && i915_vma_is_pinned(vma))
- i++;
- }
- error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
- if (i) {
+ active_bo = NULL;
+ if (i)
active_bo = kcalloc(i, sizeof(*active_bo), GFP_ATOMIC);
- if (active_bo)
- pinned_bo = active_bo + error->active_bo_count[ndx];
- }
-
if (active_bo)
- error->active_bo_count[ndx] =
- capture_active_bo(active_bo,
- error->active_bo_count[ndx],
- &vm->active_list);
-
- if (pinned_bo)
- error->pinned_bo_count[ndx] =
- capture_pinned_bo(pinned_bo,
- error->pinned_bo_count[ndx],
- &dev_priv->mm.bound_list, vm);
+ i = capture_error_bo(active_bo, i, &vm->active_list, false);
+ else
+ i = 0;
+
error->active_bo[ndx] = active_bo;
- error->pinned_bo[ndx] = pinned_bo;
+ error->active_bo_count[ndx] = i;
+ error->active_vm[ndx] = vm;
}
-static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
- struct drm_i915_error_state *error)
+static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
+ struct drm_i915_error_state *error)
{
- struct i915_address_space *vm;
- int cnt = 0, i = 0;
-
- list_for_each_entry(vm, &dev_priv->vm_list, global_link)
- cnt++;
-
- error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
- error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
- error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),
- GFP_ATOMIC);
- error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
- GFP_ATOMIC);
-
- if (error->active_bo == NULL ||
- error->pinned_bo == NULL ||
- error->active_bo_count == NULL ||
- error->pinned_bo_count == NULL) {
- kfree(error->active_bo);
- kfree(error->active_bo_count);
- kfree(error->pinned_bo);
- kfree(error->pinned_bo_count);
-
- error->active_bo = NULL;
- error->active_bo_count = NULL;
- error->pinned_bo = NULL;
- error->pinned_bo_count = NULL;
- } else {
- list_for_each_entry(vm, &dev_priv->vm_list, global_link)
- i915_gem_capture_vm(dev_priv, error, vm, i++);
+ int cnt = 0, i, j;
- error->vm_count = cnt;
+ BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo));
+ BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
+ BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));
+
+ for (i = 0; i < I915_NUM_ENGINES; i++) {
+ struct drm_i915_error_engine *ee = &error->engine[i];
+
+ if (!ee->vm)
+ continue;
+
+ for (j = 0; j < i; j++)
+ if (error->engine[j].vm == ee->vm)
+ break;
+ if (j != i)
+ continue;
+
+ i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++);
}
}
+static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
+ struct drm_i915_error_state *error)
+{
+ struct i915_address_space *vm = &dev_priv->ggtt.base;
+ struct drm_i915_error_buffer *bo;
+ struct i915_vma *vma;
+ int i, j;
+
+ i = 0;
+ list_for_each_entry(vma, &vm->active_list, vm_link)
+ i++;
+
+ j = 0;
+ list_for_each_entry(vma, &vm->inactive_list, vm_link)
+ j++;
+
+ bo = NULL;
+ if (i + j)
+ bo = kcalloc(i + j, sizeof(*bo), GFP_ATOMIC);
+ if (!bo)
+ return;
+
+ i = capture_error_bo(bo, i, &vm->active_list, true);
+ j = capture_error_bo(bo + i, j, &vm->inactive_list, true);
+ error->pinned_bo_count = i + j;
+ error->pinned_bo = bo;
+}
+
/* Capture all registers which don't fit into another category. */
static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
@@ -1405,6 +1358,26 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
error->suspend_count = dev_priv->suspend_count;
}
+static int capture(void *data)
+{
+ struct drm_i915_error_state *error = data;
+
+ i915_capture_gen_state(error->i915, error);
+ i915_capture_reg_state(error->i915, error);
+ i915_gem_record_fences(error->i915, error);
+ i915_gem_record_rings(error->i915, error);
+
+ i915_capture_active_buffers(error->i915, error);
+ i915_capture_pinned_buffers(error->i915, error);
+
+ do_gettimeofday(&error->time);
+
+ error->overlay = intel_overlay_capture_error_state(error->i915);
+ error->display = intel_display_capture_error_state(error->i915);
+
+ return 0;
+}
+
/**
* i915_capture_error_state - capture an error record for later analysis
* @dev: drm device
@@ -1433,17 +1406,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
}
kref_init(&error->ref);
+ error->i915 = dev_priv;
- i915_capture_gen_state(dev_priv, error);
- i915_capture_reg_state(dev_priv, error);
- i915_gem_capture_buffers(dev_priv, error);
- i915_gem_record_fences(dev_priv, error);
- i915_gem_record_rings(dev_priv, error);
-
- do_gettimeofday(&error->time);
-
- error->overlay = intel_overlay_capture_error_state(dev_priv);
- error->display = intel_display_capture_error_state(dev_priv);
+ stop_machine(capture, error, NULL);
i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
DRM_INFO("%s\n", error->error_msg);
@@ -1483,7 +1448,6 @@ void i915_error_state_get(struct drm_device *dev,
if (error_priv->error)
kref_get(&error_priv->error->ref);
spin_unlock_irq(&dev_priv->gpu_error.lock);
-
}
void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 03a5cef353eb..9302ac9e570d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -183,7 +183,7 @@ static int guc_update_doorbell_id(struct intel_guc *guc,
struct i915_guc_client *client,
u16 new_id)
{
- struct sg_table *sg = guc->ctx_pool_obj->pages;
+ struct sg_table *sg = guc->ctx_pool->obj->mm.pages;
void *doorbell_bitmap = guc->doorbell_bitmap;
struct guc_doorbell_info *doorbell;
struct guc_context_desc desc;
@@ -325,8 +325,8 @@ static void guc_init_proc_desc(struct intel_guc *guc,
static void guc_init_ctx_desc(struct intel_guc *guc,
struct i915_guc_client *client)
{
- struct drm_i915_gem_object *client_obj = client->client_obj;
struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ struct drm_i915_gem_object *client_obj = client->client->obj;
struct intel_engine_cs *engine;
struct i915_gem_context *ctx = client->owner;
struct guc_context_desc desc;
@@ -343,7 +343,6 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
for_each_engine(engine, dev_priv) {
struct intel_context *ce = &ctx->engine[engine->id];
struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
- struct drm_i915_gem_object *obj;
/* TODO: We have a design issue to be solved here. Only when we
* receive the first batch, we know which engine is used by the
@@ -358,17 +357,14 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
lrc->context_desc = lower_32_bits(ce->lrc_desc);
/* The state page is after PPHWSP */
- gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
+ gfx_addr = ce->state->node.start;
lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
- obj = ce->ring->obj;
- gfx_addr = i915_gem_obj_ggtt_offset(obj);
-
- lrc->ring_begin = gfx_addr;
- lrc->ring_end = gfx_addr + obj->base.size - 1;
- lrc->ring_next_free_location = gfx_addr;
+ lrc->ring_begin = ce->ring->vma->node.start;
+ lrc->ring_end = gfx_addr + ce->ring->vma->node.size - 1;
+ lrc->ring_next_free_location = lrc->ring_begin;
lrc->ring_current_tail_pointer_value = 0;
desc.engines_used |= (1 << engine->guc_id);
@@ -380,8 +376,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
* The doorbell, process descriptor, and workqueue are all parts
* of the client object, which the GuC will reference via the GGTT
*/
- gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
- desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
+ gfx_addr = client->client->node.start;
+ desc.db_trigger_phy = sg_dma_address(client_obj->mm.pages->sgl) +
client->doorbell_offset;
desc.db_trigger_cpu = (uintptr_t)client->client_base +
client->doorbell_offset;
@@ -397,7 +393,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
desc.desc_private = (uintptr_t)client;
/* Pool context is pinned already */
- sg = guc->ctx_pool_obj->pages;
+ sg = guc->ctx_pool->obj->mm.pages;
sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
sizeof(desc) * client->ctx_index);
}
@@ -410,7 +406,7 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
memset(&desc, 0, sizeof(desc));
- sg = guc->ctx_pool_obj->pages;
+ sg = guc->ctx_pool->obj->mm.pages;
sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
sizeof(desc) * client->ctx_index);
}
@@ -492,7 +488,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
/* WQ starts from the page after doorbell / process_desc */
wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
wq_off &= PAGE_SIZE - 1;
- base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page));
+ base = kmap_atomic(i915_gem_object_get_page(gc->client->obj, wq_page));
wqi = (struct guc_wq_item *)((char *)base + wq_off);
/* Now fill in the 4-word work queue item */
@@ -611,8 +607,8 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
*/
/**
- * gem_allocate_guc_obj() - Allocate gem object for GuC usage
- * @dev_priv: driver private data structure
+ * guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
+ * @guc: the guc
* @size: size of object
*
* This is a wrapper to create a gem obj. In order to use it inside GuC, the
@@ -621,45 +617,40 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
*
* Return: A drm_i915_gem_object if successful, otherwise NULL.
*/
-static struct drm_i915_gem_object *
-gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size)
+static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
obj = i915_gem_object_create(&dev_priv->drm, size);
if (IS_ERR(obj))
- return NULL;
-
- if (i915_gem_object_get_pages(obj)) {
- i915_gem_object_put(obj);
- return NULL;
- }
+ return ERR_CAST(obj);
- if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
- PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+ PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+ if (IS_ERR(vma)) {
i915_gem_object_put(obj);
- return NULL;
+ return vma;
}
/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
- return obj;
+ return vma;
}
/**
- * gem_release_guc_obj() - Release gem object allocated for GuC usage
- * @obj: gem obj to be released
+ * guc_release_vma() - Release gem object allocated for GuC usage
+ * @vma: gem obj to be released
*/
-static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
+static void guc_release_vma(struct i915_vma *vma)
{
- if (!obj)
+ if (!vma)
return;
- if (i915_gem_obj_is_pinned(obj))
- i915_gem_object_ggtt_unpin(obj);
-
- i915_gem_object_put(obj);
+ i915_vma_unpin(vma);
+ i915_gem_object_put(vma->obj);
}
static void
@@ -686,7 +677,7 @@ guc_client_free(struct drm_i915_private *dev_priv,
kunmap(kmap_to_page(client->client_base));
}
- gem_release_guc_obj(client->client_obj);
+ guc_release_vma(client->client);
if (client->ctx_index != GUC_INVALID_CTX_ID) {
guc_fini_ctx_desc(guc, client);
@@ -757,7 +748,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
{
struct i915_guc_client *client;
struct intel_guc *guc = &dev_priv->guc;
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
uint16_t db_id;
client = kzalloc(sizeof(*client), GFP_KERNEL);
@@ -777,13 +768,13 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
}
/* The first page is doorbell/proc_desc. Two followed pages are wq. */
- obj = gem_allocate_guc_obj(dev_priv, GUC_DB_SIZE + GUC_WQ_SIZE);
- if (!obj)
+ vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
+ if (IS_ERR(vma))
goto err;
/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
- client->client_obj = obj;
- client->client_base = kmap(i915_gem_object_get_page(obj, 0));
+ client->client = vma;
+ client->client_base = kmap(i915_gem_object_get_page(vma->obj, 0));
client->wq_offset = GUC_DB_SIZE;
client->wq_size = GUC_WQ_SIZE;
@@ -825,8 +816,7 @@ err:
static void guc_create_log(struct intel_guc *guc)
{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
unsigned long offset;
uint32_t size, flags;
@@ -842,16 +832,16 @@ static void guc_create_log(struct intel_guc *guc)
GUC_LOG_ISR_PAGES + 1 +
GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
- obj = guc->log_obj;
- if (!obj) {
- obj = gem_allocate_guc_obj(dev_priv, size);
- if (!obj) {
+ vma = guc->log;
+ if (!vma) {
+ vma = guc_allocate_vma(guc, size);
+ if (IS_ERR(vma)) {
/* logging will be off */
i915.guc_log_level = -1;
return;
}
- guc->log_obj = obj;
+ guc->log = vma;
}
/* each allocated unit is a page */
@@ -860,7 +850,7 @@ static void guc_create_log(struct intel_guc *guc)
(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
- offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */
+ offset = vma->node.start >> PAGE_SHIFT; /* in pages */
guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
}
@@ -889,7 +879,7 @@ static void init_guc_policies(struct guc_policies *policies)
static void guc_create_ads(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
struct guc_ads *ads;
struct guc_policies *policies;
struct guc_mmio_reg_state *reg_state;
@@ -902,16 +892,16 @@ static void guc_create_ads(struct intel_guc *guc)
sizeof(struct guc_mmio_reg_state) +
GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE;
- obj = guc->ads_obj;
- if (!obj) {
- obj = gem_allocate_guc_obj(dev_priv, PAGE_ALIGN(size));
- if (!obj)
+ vma = guc->ads;
+ if (!vma) {
+ vma = guc_allocate_vma(guc, PAGE_ALIGN(size));
+ if (IS_ERR(vma))
return;
- guc->ads_obj = obj;
+ guc->ads = vma;
}
- page = i915_gem_object_get_page(obj, 0);
+ page = i915_gem_object_get_page(vma->obj, 0);
ads = kmap(page);
/*
@@ -931,8 +921,7 @@ static void guc_create_ads(struct intel_guc *guc)
policies = (void *)ads + sizeof(struct guc_ads);
init_guc_policies(policies);
- ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) +
- sizeof(struct guc_ads);
+ ads->scheduler_policies = vma->node.start + sizeof(struct guc_ads);
/* MMIO reg state */
reg_state = (void *)policies + sizeof(struct guc_policies);
@@ -960,10 +949,9 @@ static void guc_create_ads(struct intel_guc *guc)
*/
int i915_guc_submission_init(struct drm_i915_private *dev_priv)
{
- const size_t ctxsize = sizeof(struct guc_context_desc);
- const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
- const size_t gemsize = round_up(poolsize, PAGE_SIZE);
struct intel_guc *guc = &dev_priv->guc;
+ struct i915_vma *vma;
+ u32 size;
/* Wipe bitmap & delete client in case of reinitialisation */
bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
@@ -972,13 +960,15 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv)
if (!i915.enable_guc_submission)
return 0; /* not enabled */
- if (guc->ctx_pool_obj)
+ if (guc->ctx_pool)
return 0; /* already allocated */
- guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv, gemsize);
- if (!guc->ctx_pool_obj)
- return -ENOMEM;
+ size = PAGE_ALIGN(GUC_MAX_GPU_CONTEXTS*sizeof(struct guc_context_desc));
+ vma = guc_allocate_vma(guc, size);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+ guc->ctx_pool = vma;
ida_init(&guc->ctx_ids);
guc_create_log(guc);
guc_create_ads(guc);
@@ -1030,16 +1020,16 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
- gem_release_guc_obj(dev_priv->guc.ads_obj);
- guc->ads_obj = NULL;
+ guc_release_vma(guc->ads);
+ guc->ads = NULL;
- gem_release_guc_obj(dev_priv->guc.log_obj);
- guc->log_obj = NULL;
+ guc_release_vma(guc->log);
+ guc->log = NULL;
- if (guc->ctx_pool_obj)
+ if (guc->ctx_pool)
ida_destroy(&guc->ctx_ids);
- gem_release_guc_obj(guc->ctx_pool_obj);
- guc->ctx_pool_obj = NULL;
+ guc_release_vma(guc->ctx_pool);
+ guc->ctx_pool = NULL;
}
/**
@@ -1062,7 +1052,7 @@ int intel_guc_suspend(struct drm_device *dev)
/* any value greater than GUC_POWER_D0 */
data[1] = GUC_POWER_D1;
/* first page is shared data with GuC */
- data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state);
+ data[2] = ctx->engine[RCS].state->node.start;
return host2guc_action(guc, data, ARRAY_SIZE(data));
}
@@ -1087,7 +1077,7 @@ int intel_guc_resume(struct drm_device *dev)
data[0] = HOST2GUC_ACTION_EXIT_S_STATE;
data[1] = GUC_POWER_D0;
/* first page is shared data with GuC */
- data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state);
+ data[2] = ctx->engine[RCS].state->node.start;
return host2guc_action(guc, data, ARRAY_SIZE(data));
}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 591f452ece68..7a236dd337fa 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -239,37 +239,19 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv,
}
}
-/**
- * ilk_update_gt_irq - update GTIMR
- * @dev_priv: driver private
- * @interrupt_mask: mask of interrupt bits to update
- * @enabled_irq_mask: mask of interrupt bits to enable
- */
-static void ilk_update_gt_irq(struct drm_i915_private *dev_priv,
- uint32_t interrupt_mask,
- uint32_t enabled_irq_mask)
+void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
{
assert_spin_locked(&dev_priv->irq_lock);
-
- WARN_ON(enabled_irq_mask & ~interrupt_mask);
-
- if (WARN_ON(!intel_irqs_enabled(dev_priv)))
- return;
-
- dev_priv->gt_irq_mask &= ~interrupt_mask;
- dev_priv->gt_irq_mask |= (~enabled_irq_mask & interrupt_mask);
+ dev_priv->gt_irq_mask &= ~mask;
I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-}
-
-void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
-{
- ilk_update_gt_irq(dev_priv, mask, mask);
POSTING_READ_FW(GTIMR);
}
void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
{
- ilk_update_gt_irq(dev_priv, mask, 0);
+ assert_spin_locked(&dev_priv->irq_lock);
+ dev_priv->gt_irq_mask |= mask;
+ I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
}
static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
@@ -972,10 +954,8 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
static void notify_ring(struct intel_engine_cs *engine)
{
smp_store_mb(engine->breadcrumbs.irq_posted, true);
- if (intel_engine_wakeup(engine)) {
+ if (intel_engine_wakeup(engine))
trace_i915_gem_request_notify(engine);
- engine->breadcrumbs.irq_wakeups++;
- }
}
static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -1249,8 +1229,8 @@ static void ivybridge_parity_error_irq_handler(struct drm_i915_private *dev_priv
queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
}
-static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv,
- u32 gt_iir)
+static __always_inline void
+ilk_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
notify_ring(&dev_priv->engine[RCS]);
@@ -1258,8 +1238,8 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv,
notify_ring(&dev_priv->engine[VCS]);
}
-static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
- u32 gt_iir)
+static __always_inline void
+snb_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
notify_ring(&dev_priv->engine[RCS]);
@@ -2208,8 +2188,9 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
{
struct drm_device *dev = arg;
struct drm_i915_private *dev_priv = to_i915(dev);
- u32 de_iir, gt_iir, de_ier, sde_ier = 0;
+ const int gen = INTEL_INFO(dev_priv)->gen;
irqreturn_t ret = IRQ_NONE;
+ u32 iir;
if (!intel_irqs_enabled(dev_priv))
return IRQ_NONE;
@@ -2218,60 +2199,52 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
disable_rpm_wakeref_asserts(dev_priv);
/* disable master interrupt before clearing iir */
- de_ier = I915_READ(DEIER);
- I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
- POSTING_READ(DEIER);
-
- /* Disable south interrupts. We'll only write to SDEIIR once, so further
- * interrupts will will be stored on its back queue, and then we'll be
- * able to process them after we restore SDEIER (as soon as we restore
- * it, we'll get an interrupt if SDEIIR still has something to process
- * due to its back queue). */
- if (!HAS_PCH_NOP(dev_priv)) {
- sde_ier = I915_READ(SDEIER);
- I915_WRITE(SDEIER, 0);
- POSTING_READ(SDEIER);
- }
+ I915_WRITE_FW(DEIER, 0);
/* Find, clear, then process each source of interrupt */
- gt_iir = I915_READ(GTIIR);
- if (gt_iir) {
- I915_WRITE(GTIIR, gt_iir);
+ iir = I915_READ_FW(GTIIR);
+ if (iir) {
+ I915_WRITE_FW(GTIIR, iir);
ret = IRQ_HANDLED;
- if (INTEL_GEN(dev_priv) >= 6)
- snb_gt_irq_handler(dev_priv, gt_iir);
+ if (gen >= 6)
+ snb_gt_irq_handler(dev_priv, iir);
else
- ilk_gt_irq_handler(dev_priv, gt_iir);
+ ilk_gt_irq_handler(dev_priv, iir);
}
-
- de_iir = I915_READ(DEIIR);
- if (de_iir) {
- I915_WRITE(DEIIR, de_iir);
- ret = IRQ_HANDLED;
- if (INTEL_GEN(dev_priv) >= 7)
- ivb_display_irq_handler(dev_priv, de_iir);
- else
- ilk_display_irq_handler(dev_priv, de_iir);
- }
-
- if (INTEL_GEN(dev_priv) >= 6) {
- u32 pm_iir = I915_READ(GEN6_PMIIR);
- if (pm_iir) {
- I915_WRITE(GEN6_PMIIR, pm_iir);
+ if (gen >= 6) {
+ iir = I915_READ_FW(GEN6_PMIIR);
+ if (iir) {
+ I915_WRITE_FW(GEN6_PMIIR, iir);
ret = IRQ_HANDLED;
- gen6_rps_irq_handler(dev_priv, pm_iir);
+ gen6_rps_irq_handler(dev_priv, iir);
}
}
- I915_WRITE(DEIER, de_ier);
- POSTING_READ(DEIER);
- if (!HAS_PCH_NOP(dev_priv)) {
- I915_WRITE(SDEIER, sde_ier);
- POSTING_READ(SDEIER);
+ if (ret == IRQ_NONE) {
+ iir = I915_READ_FW(DEIIR);
+ if (iir) {
+ /* Disable south interrupts. We'll only write to SDEIIR once, so further
+ * interrupts will will be stored on its back queue, and then we'll be
+ * able to process them after we restore SDEIER (as soon as we restore
+ * it, we'll get an interrupt if SDEIIR still has something to process
+ * due to its back queue). */
+ if (!HAS_PCH_NOP(dev_priv))
+ I915_WRITE_FW(SDEIER, 0);
+
+ I915_WRITE_FW(DEIIR, iir);
+ ret = IRQ_HANDLED;
+ if (gen >= 7)
+ ivb_display_irq_handler(dev_priv, iir);
+ else
+ ilk_display_irq_handler(dev_priv, iir);
+
+ if (!HAS_PCH_NOP(dev_priv))
+ I915_WRITE_FW(SDEIER, dev_priv->sde_irq_mask);
+ }
}
- /* IRQs are synced during runtime_suspend, we don't require a wakeref */
+ I915_WRITE_FW(DEIER, dev_priv->gt_irq_mask);
enable_rpm_wakeref_asserts(dev_priv);
return ret;
@@ -2525,7 +2498,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
* simulated reset via debugs, so get an RPM reference.
*/
intel_runtime_pm_get(dev_priv);
-
intel_prepare_reset(dev_priv);
/*
@@ -2537,7 +2509,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
ret = i915_reset(dev_priv);
intel_finish_reset(dev_priv);
-
intel_runtime_pm_put(dev_priv);
if (ret == 0)
@@ -3044,22 +3015,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
return HANGCHECK_HUNG;
}
-static unsigned long kick_waiters(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups);
-
- if (engine->hangcheck.user_interrupts == irq_count &&
- !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
- if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
- DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
- engine->name);
-
- intel_engine_enable_fake_irq(engine);
- }
-
- return irq_count;
-}
/*
* This is called when the chip hasn't reported back with completed
* batchbuffers in a long time. We keep track per ring seqno progress and
@@ -3097,7 +3052,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
bool busy = intel_engine_has_waiter(engine);
u64 acthd;
u32 seqno;
- unsigned user_interrupts;
semaphore_clear_deadlocks(dev_priv);
@@ -3114,15 +3068,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
acthd = intel_engine_get_active_head(engine);
seqno = intel_engine_get_seqno(engine);
- /* Reset stuck interrupts between batch advances */
- user_interrupts = 0;
-
if (engine->hangcheck.seqno == seqno) {
if (!intel_engine_is_active(engine)) {
engine->hangcheck.action = HANGCHECK_IDLE;
if (busy) {
/* Safeguard against driver failure */
- user_interrupts = kick_waiters(engine);
engine->hangcheck.score += BUSY;
}
} else {
@@ -3185,7 +3135,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
engine->hangcheck.seqno = seqno;
engine->hangcheck.acthd = acthd;
- engine->hangcheck.user_interrupts = user_interrupts;
busy_count += busy;
}
@@ -3438,6 +3387,7 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv)
}
ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
+ dev_priv->sde_irq_mask = enabled_irqs;
/*
* Enable digital hotplug on the PCH, and configure the DP short pulse
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
new file mode 100644
index 000000000000..50fc5799255f
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <asm/fpu/api.h>
+
+#include "i915_drv.h"
+
+DEFINE_STATIC_KEY_FALSE(has_movntdqa);
+
+#ifdef CONFIG_AS_MOVNTDQA
+static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
+{
+ kernel_fpu_begin();
+
+ len >>= 4;
+ while (len >= 4) {
+ asm("movntdqa (%0), %%xmm0\n"
+ "movntdqa 16(%0), %%xmm1\n"
+ "movntdqa 32(%0), %%xmm2\n"
+ "movntdqa 48(%0), %%xmm3\n"
+ "movaps %%xmm0, (%1)\n"
+ "movaps %%xmm1, 16(%1)\n"
+ "movaps %%xmm2, 32(%1)\n"
+ "movaps %%xmm3, 48(%1)\n"
+ :: "r" (src), "r" (dst) : "memory");
+ src += 64;
+ dst += 64;
+ len -= 4;
+ }
+ while (len--) {
+ asm("movntdqa (%0), %%xmm0\n"
+ "movaps %%xmm0, (%1)\n"
+ :: "r" (src), "r" (dst) : "memory");
+ src += 16;
+ dst += 16;
+ }
+
+ kernel_fpu_end();
+}
+#endif
+
+/**
+ * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
+ * @dst: destination pointer
+ * @src: source pointer
+ * @len: how many bytes to copy
+ *
+ * i915_memcpy_from_wc copies @len bytes from @src to @dst using
+ * non-temporal instructions where available. Note that all arguments
+ * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
+ * of 16.
+ *
+ * To test whether accelerated reads from WC are supported, use
+ * i915_memcpy_from_wc(NULL, NULL, 0);
+ *
+ * Returns true if the copy was successful, false if the preconditions
+ * are not met.
+ */
+bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+{
+ if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
+ return false;
+
+#ifdef CONFIG_AS_MOVNTDQA
+ if (static_branch_likely(&has_movntdqa)) {
+ if (len)
+ __memcpy_ntdqa(dst, src, len);
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
+{
+ if (static_cpu_has(X86_FEATURE_XMM4_1))
+ static_branch_enable(&has_movntdqa);
+}
diff --git a/drivers/gpu/drm/i915/i915_memory.c b/drivers/gpu/drm/i915/i915_memory.c
new file mode 100644
index 000000000000..fe7465f70e28
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_memory.c
@@ -0,0 +1,128 @@
+#include <linux/mm.h>
+#include <linux/io-mapping.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#include "i915_drv.h"
+
+struct remap_pfn {
+ struct mm_struct *mm;
+ unsigned long addr;
+ unsigned long pfn;
+ pgprot_t prot;
+};
+
+static inline void remap_pfn(struct remap_pfn *r, pte_t *pte)
+{
+ set_pte_at(r->mm, r->addr, pte,
+ pte_mkspecial(pfn_pte(r->pfn, r->prot)));
+ r->pfn++;
+ r->addr += PAGE_SIZE;
+}
+
+static inline void remap_pte_range(struct remap_pfn *r,
+ pmd_t *pmd,
+ unsigned long end)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ pte = pte_alloc_map_lock(r->mm, pmd, r->addr, &ptl);
+ if (unlikely(!pte)) {
+ r->addr = ~0UL;
+ return;
+ }
+
+ arch_enter_lazy_mmu_mode();
+ do
+ remap_pfn(r, pte++);
+ while (r->addr < end);
+ arch_leave_lazy_mmu_mode();
+
+ pte_unmap_unlock(pte - 1, ptl);
+}
+
+static inline void remap_pmd_range(struct remap_pfn *r,
+ pud_t *pud,
+ unsigned long end)
+{
+ pmd_t *pmd;
+
+ pmd = pmd_alloc(r->mm, pud, r->addr);
+ if (unlikely(!pmd)) {
+ r->addr = ~0UL;
+ return;
+ }
+
+ do
+ remap_pte_range(r, pmd++, pmd_addr_end(r->addr, end));
+ while (r->addr < end);
+}
+
+static inline void remap_pud_range(struct remap_pfn *r,
+ pgd_t *pgd,
+ unsigned long end)
+{
+ pud_t *pud;
+
+ pud = pud_alloc(r->mm, pgd, r->addr);
+ if (unlikely(!pud)) {
+ r->addr = ~0UL;
+ return;
+ }
+
+ do
+ remap_pmd_range(r, pud++, pud_addr_end(r->addr, end));
+ while (r->addr < end);
+}
+
+/**
+ * remap_io_mapping - remap an IO mapping to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @pfn: physical address of kernel memory
+ * @size: size of map area
+ * @iomap: the source io_mapping
+ *
+ * Note: this is only safe if the mm semaphore is held when called.
+ */
+int remap_io_mapping(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long pfn, unsigned long size,
+ struct io_mapping *iomap)
+{
+ unsigned long end = addr + PAGE_ALIGN(size);
+ struct remap_pfn r;
+ pgd_t *pgd;
+
+ if (WARN_ON(addr >= end))
+ return -EINVAL;
+
+#define MUST_SET (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
+ if (WARN_ON((vma->vm_flags & MUST_SET) != MUST_SET))
+ return -EINVAL;
+#undef MUST_SET
+
+ r.mm = vma->vm_mm;
+ r.addr = addr;
+ r.pfn = pfn;
+ r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) |
+ (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK));
+
+ pgd = pgd_offset(r.mm, addr);
+ do
+ remap_pud_range(&r, pgd++, pgd_addr_end(r.addr, end));
+ while (r.addr < end);
+
+ if (unlikely(r.addr > end)) {
+ zap_vma_ptes(vma, addr, end - addr);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(remap_io_mapping);
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index b6e404c91eed..221842fecb0f 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -48,7 +48,7 @@ struct i915_params i915 __read_mostly = {
.reset = true,
.invert_brightness = 0,
.disable_display = 0,
- .enable_cmd_parser = 1,
+ .enable_cmd_parser = true,
.use_mmio_flip = 0,
.mmio_debug = 0,
.verbose_state_checks = 1,
@@ -172,9 +172,9 @@ MODULE_PARM_DESC(invert_brightness,
module_param_named(disable_display, i915.disable_display, bool, 0400);
MODULE_PARM_DESC(disable_display, "Disable display (default: false)");
-module_param_named_unsafe(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
+module_param_named_unsafe(enable_cmd_parser, i915.enable_cmd_parser, bool, 0400);
MODULE_PARM_DESC(enable_cmd_parser,
- "Enable command parsing (1=enabled [default], 0=disabled)");
+ "Enable command parsing (true=enabled [default], false=disabled)");
module_param_named_unsafe(use_mmio_flip, i915.use_mmio_flip, int, 0600);
MODULE_PARM_DESC(use_mmio_flip,
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 0ad020b4a925..d7284cfbc6de 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -44,7 +44,6 @@ struct i915_params {
int disable_power_well;
int enable_ips;
int invert_brightness;
- int enable_cmd_parser;
int enable_guc_loading;
int enable_guc_submission;
int guc_log_level;
@@ -53,6 +52,7 @@ struct i915_params {
int edp_vswing;
unsigned int inject_load_failure;
/* leave bools at the end to not create holes */
+ bool enable_cmd_parser;
bool enable_hangcheck;
bool fastboot;
bool prefault_disable;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 178798002a73..ed54e99dc3f4 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -450,6 +450,39 @@ TRACE_EVENT(i915_gem_evict_vm,
TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
);
+TRACE_EVENT(i915_gem_evict_node,
+ TP_PROTO(struct i915_address_space *vm,
+ struct drm_mm_node *node,
+ unsigned int flags),
+ TP_ARGS(vm, node, flags),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(struct i915_address_space *, vm)
+ __field(u64, start)
+ __field(u64, size)
+ __field(unsigned long, color)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = vm->dev->primary->index;
+ __entry->vm = vm;
+ __entry->start = node->start;
+ __entry->size = node->size;
+ __entry->color = node->color;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev=%d, vm=%p, start=%llx size=%llx, color=%lx, flags=%x",
+ __entry->dev,
+ __entry->vm,
+ (long long)__entry->start,
+ (long long)__entry->size,
+ __entry->color,
+ __entry->flags)
+);
+
TRACE_EVENT(i915_gem_ring_sync_to,
TP_PROTO(struct drm_i915_gem_request *to,
struct drm_i915_gem_request *from),
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 90867446f1a5..be27ff8ced39 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -26,6 +26,29 @@
#include "i915_drv.h"
+static void intel_breadcrumbs_hangcheck(unsigned long data)
+{
+ struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ if (!b->irq_enabled)
+ return;
+
+ if (time_before(jiffies, b->timeout)) {
+ mod_timer(&b->hangcheck, b->timeout);
+ return;
+ }
+
+ DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
+ set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+ mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
+}
+
+static unsigned long wait_timeout(void)
+{
+ return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
+}
+
static void intel_breadcrumbs_fake_irq(unsigned long data)
{
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@@ -37,10 +60,8 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
* every jiffie in order to kick the oldest waiter to do the
* coherent seqno check.
*/
- rcu_read_lock();
if (intel_engine_wakeup(engine))
mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
- rcu_read_unlock();
}
static void irq_enable(struct intel_engine_cs *engine)
@@ -51,13 +72,6 @@ static void irq_enable(struct intel_engine_cs *engine)
*/
engine->breadcrumbs.irq_posted = true;
- /* Make sure the current hangcheck doesn't falsely accuse a just
- * started irq handler from missing an interrupt (because the
- * interrupt count still matches the stale value from when
- * the irq handler was disabled, many hangchecks ago).
- */
- engine->breadcrumbs.irq_wakeups++;
-
spin_lock_irq(&engine->i915->irq_lock);
engine->irq_enable(engine);
spin_unlock_irq(&engine->i915->irq_lock);
@@ -98,8 +112,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
}
if (!b->irq_enabled ||
- test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
+ test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
mod_timer(&b->fake_irq, jiffies + 1);
+ } else {
+ /* Ensure we never sleep indefinitely */
+ GEM_BUG_ON(!time_after(b->timeout, jiffies));
+ mod_timer(&b->hangcheck, b->timeout);
+ }
/* Ensure that even if the GPU hangs, we get woken up.
*
@@ -211,7 +230,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
}
rb_link_node(&wait->node, parent, p);
rb_insert_color(&wait->node, &b->waiters);
- GEM_BUG_ON(!first && !b->irq_seqno_bh);
+ GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
if (completed) {
struct rb_node *next = rb_next(completed);
@@ -219,8 +238,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
GEM_BUG_ON(!next && !first);
if (next && next != &wait->node) {
GEM_BUG_ON(first);
+ b->timeout = wait_timeout();
b->first_wait = to_wait(next);
- smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
+ rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
/* As there is a delay between reading the current
* seqno, processing the completed tasks and selecting
* the next waiter, we may have missed the interrupt
@@ -245,8 +265,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
if (first) {
GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
+ b->timeout = wait_timeout();
b->first_wait = wait;
- smp_store_mb(b->irq_seqno_bh, wait->tsk);
+ rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
/* After assigning ourselves as the new bottom-half, we must
* perform a cursory check to prevent a missed interrupt.
* Either we miss the interrupt whilst programming the hardware,
@@ -257,7 +278,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
*/
__intel_breadcrumbs_enable_irq(b);
}
- GEM_BUG_ON(!b->irq_seqno_bh);
+ GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
GEM_BUG_ON(!b->first_wait);
GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
@@ -277,11 +298,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
return first;
}
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
-{
- mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
-}
-
static inline bool chain_wakeup(struct rb_node *rb, int priority)
{
return rb && to_wait(rb)->tsk->prio <= priority;
@@ -317,7 +333,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
const int priority = wakeup_priority(b, wait->tsk);
struct rb_node *next;
- GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
+ GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
/* We are the current bottom-half. Find the next candidate,
* the first waiter in the queue on the remaining oldest
@@ -359,14 +375,15 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
* the interrupt, or if we have to handle an
* exception rather than a seqno completion.
*/
+ b->timeout = wait_timeout();
b->first_wait = to_wait(next);
- smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
+ rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
if (b->first_wait->seqno != wait->seqno)
__intel_breadcrumbs_enable_irq(b);
- wake_up_process(b->irq_seqno_bh);
+ wake_up_process(b->first_wait->tsk);
} else {
b->first_wait = NULL;
- WRITE_ONCE(b->irq_seqno_bh, NULL);
+ rcu_assign_pointer(b->irq_seqno_bh, NULL);
__intel_breadcrumbs_disable_irq(b);
}
} else {
@@ -380,7 +397,7 @@ out_unlock:
GEM_BUG_ON(b->first_wait == wait);
GEM_BUG_ON(rb_first(&b->waiters) !=
(b->first_wait ? &b->first_wait->node : NULL));
- GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
+ GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
spin_unlock(&b->lock);
}
@@ -401,6 +418,9 @@ static bool signal_complete(struct drm_i915_gem_request *request)
if (__i915_request_irq_complete(request))
return true;
+ if (i915_spin_request(request, TASK_UNINTERRUPTIBLE, 2))
+ return true;
+
return false;
}
@@ -443,7 +463,10 @@ static int intel_breadcrumbs_signaler(void *arg)
*/
intel_engine_remove_wait(engine,
&request->signaling.wait);
+
+ local_bh_disable();
fence_signal(&request->fence);
+ local_bh_enable();
/* Find the next oldest signal. Note that as we have
* not been holding the lock, another client may
@@ -533,6 +556,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
struct task_struct *tsk;
spin_lock_init(&b->lock);
+ setup_timer(&b->hangcheck,
+ intel_breadcrumbs_hangcheck,
+ (unsigned long)engine);
setup_timer(&b->fake_irq,
intel_breadcrumbs_fake_irq,
(unsigned long)engine);
@@ -561,6 +587,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
kthread_stop(b->signaler);
del_timer_sync(&b->fake_irq);
+ del_timer_sync(&b->hangcheck);
}
unsigned int intel_kick_waiters(struct drm_i915_private *i915)
@@ -573,11 +600,9 @@ unsigned int intel_kick_waiters(struct drm_i915_private *i915)
* RCU lock, i.e. as we call wake_up_process() we must be holding the
* rcu_read_lock().
*/
- rcu_read_lock();
for_each_engine(engine, i915)
if (unlikely(intel_engine_wakeup(engine)))
mask |= intel_engine_flag(engine);
- rcu_read_unlock();
return mask;
}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9cbf5431c1e3..9979fce88ea7 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -97,10 +97,9 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
static void ironlake_pch_clock_get(struct intel_crtc *crtc,
struct intel_crtc_state *pipe_config);
-static int intel_framebuffer_init(struct drm_device *dev,
- struct intel_framebuffer *ifb,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj);
+static int intel_framebuffer_init(struct intel_framebuffer *ifb,
+ struct drm_i915_gem_object *obj,
+ struct drm_mode_fb_cmd2 *mode_cmd);
static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc);
static void intel_set_pipe_timings(struct intel_crtc *intel_crtc);
static void intel_set_pipe_src_size(struct intel_crtc *intel_crtc);
@@ -2114,11 +2113,13 @@ static void intel_tile_dims(const struct drm_i915_private *dev_priv,
}
unsigned int
-intel_fb_align_height(struct drm_device *dev, unsigned int height,
- uint32_t pixel_format, uint64_t fb_modifier)
+intel_fb_align_height(struct drm_i915_private *dev_priv,
+ unsigned int height,
+ uint32_t pixel_format,
+ uint64_t fb_modifier)
{
unsigned int cpp = drm_format_plane_cpp(pixel_format, 0);
- unsigned int tile_height = intel_tile_height(to_i915(dev), fb_modifier, cpp);
+ unsigned int tile_height = intel_tile_height(dev_priv, fb_modifier, cpp);
return ALIGN(height, tile_height);
}
@@ -2206,16 +2207,15 @@ static unsigned int intel_surf_alignment(const struct drm_i915_private *dev_priv
}
}
-int
-intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
- unsigned int rotation)
+struct i915_vma *
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
{
struct drm_device *dev = fb->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
struct i915_ggtt_view view;
+ struct i915_vma *vma;
u32 alignment;
- int ret;
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
@@ -2240,58 +2240,49 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
*/
intel_runtime_pm_get(dev_priv);
- ret = i915_gem_object_pin_to_display_plane(obj, alignment,
- &view);
- if (ret)
- goto err_pm;
-
- /* Install a fence for tiled scan-out. Pre-i965 always needs a
- * fence, whereas 965+ only requires a fence if using
- * framebuffer compression. For simplicity, we always install
- * a fence as the cost is not that onerous.
- */
- if (view.type == I915_GGTT_VIEW_NORMAL) {
- ret = i915_gem_object_get_fence(obj);
- if (ret == -EDEADLK) {
- /*
- * -EDEADLK means there are no free fences
- * no pending flips.
- *
- * This is propagated to atomic, but it uses
- * -EDEADLK to force a locking recovery, so
- * change the returned error to -EBUSY.
- */
- ret = -EBUSY;
- goto err_unpin;
- } else if (ret)
- goto err_unpin;
+ vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
+ if (IS_ERR(vma))
+ goto err;
- i915_gem_object_pin_fence(obj);
+ if (i915_vma_is_map_and_fenceable(vma)) {
+ /* Install a fence for tiled scan-out. Pre-i965 always needs a
+ * fence, whereas 965+ only requires a fence if using
+ * framebuffer compression. For simplicity, we always, when
+ * possible, install a fence as the cost is not that onerous.
+ *
+ * If we fail to fence the tiled scanout, then either the
+ * modeset will reject the change (which is highly unlikely as
+ * the affected systems, all but one, do not have unmappable
+ * space) or we will not be able to enable full powersaving
+ * techniques (also likely not to apply due to various limits
+ * FBC and the like impose on the size of the buffer, which
+ * presumably we violated anyway with this unmappable buffer).
+ * Anyway, it is presumably better to stumble onwards with
+ * something and try to run the system in a "less than optimal"
+ * mode that matches the user configuration.
+ */
+ if (i915_vma_get_fence(vma) == 0)
+ i915_vma_pin_fence(vma);
}
+err:
intel_runtime_pm_put(dev_priv);
- return 0;
-
-err_unpin:
- i915_gem_object_unpin_from_display_plane(obj, &view);
-err_pm:
- intel_runtime_pm_put(dev_priv);
- return ret;
+ return vma;
}
void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
{
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
struct i915_ggtt_view view;
+ struct i915_vma *vma;
WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
intel_fill_fb_ggtt_view(&view, fb, rotation);
+ vma = i915_gem_object_to_ggtt(obj, &view);
- if (view.type == I915_GGTT_VIEW_NORMAL)
- i915_gem_object_unpin_fence(obj);
-
- i915_gem_object_unpin_from_display_plane(obj, &view);
+ i915_vma_unpin_fence(vma);
+ i915_gem_object_unpin_from_display_plane(vma);
}
/*
@@ -2456,15 +2447,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
return false;
mutex_lock(&dev->struct_mutex);
-
obj = i915_gem_object_create_stolen_for_preallocated(dev,
base_aligned,
base_aligned,
size_aligned);
- if (!obj) {
- mutex_unlock(&dev->struct_mutex);
+ mutex_unlock(&dev->struct_mutex);
+ if (!obj)
return false;
- }
if (plane_config->tiling == I915_TILING_X)
obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X;
@@ -2476,13 +2465,11 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
mode_cmd.modifier[0] = fb->modifier[0];
mode_cmd.flags = DRM_MODE_FB_MODIFIERS;
- if (intel_framebuffer_init(dev, to_intel_framebuffer(fb),
- &mode_cmd, obj)) {
+ if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) {
DRM_DEBUG_KMS("intel fb init failed\n");
goto out_unref_obj;
}
- mutex_unlock(&dev->struct_mutex);
DRM_DEBUG_KMS("initial plane fb obj %p\n", obj);
return true;
@@ -2552,7 +2539,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
continue;
obj = intel_fb_obj(fb);
- if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
+ if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) {
drm_framebuffer_reference(fb);
goto valid_fb;
}
@@ -2709,11 +2696,11 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
if (INTEL_INFO(dev)->gen >= 4) {
I915_WRITE(DSPSURF(plane),
- i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+ i915_gem_object_ggtt_offset(obj, NULL) + intel_crtc->dspaddr_offset);
I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
I915_WRITE(DSPLINOFF(plane), linear_offset);
} else
- I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset);
+ I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset);
POSTING_READ(reg);
}
@@ -2813,7 +2800,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
I915_WRITE(DSPSURF(plane),
- i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+ i915_gem_object_ggtt_offset(obj, NULL) + intel_crtc->dspaddr_offset);
if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
} else {
@@ -2846,7 +2833,7 @@ u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
intel_plane->base.state->rotation);
- vma = i915_gem_obj_to_ggtt_view(obj, &view);
+ vma = i915_gem_object_to_ggtt(obj, &view);
if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
view.type))
return -1;
@@ -8166,7 +8153,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
val = I915_READ(DSPSTRIDE(pipe));
fb->pitches[0] = val & 0xffffffc0;
- aligned_height = intel_fb_align_height(dev, fb->height,
+ aligned_height = intel_fb_align_height(dev_priv,
+ fb->height,
fb->pixel_format,
fb->modifier[0]);
@@ -9190,7 +9178,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
fb->pixel_format);
fb->pitches[0] = (val & 0x3ff) * stride_mult;
- aligned_height = intel_fb_align_height(dev, fb->height,
+ aligned_height = intel_fb_align_height(dev_priv,
+ fb->height,
fb->pixel_format,
fb->modifier[0]);
@@ -9287,7 +9276,8 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc,
val = I915_READ(DSPSTRIDE(pipe));
fb->pitches[0] = val & 0xffffffc0;
- aligned_height = intel_fb_align_height(dev, fb->height,
+ aligned_height = intel_fb_align_height(dev_priv,
+ fb->height,
fb->pixel_format,
fb->modifier[0]);
@@ -10358,9 +10348,8 @@ static struct drm_display_mode load_detect_mode = {
};
struct drm_framebuffer *
-__intel_framebuffer_create(struct drm_device *dev,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj)
+intel_framebuffer_create(struct drm_i915_gem_object *obj,
+ struct drm_mode_fb_cmd2 *mode_cmd)
{
struct intel_framebuffer *intel_fb;
int ret;
@@ -10369,7 +10358,7 @@ __intel_framebuffer_create(struct drm_device *dev,
if (!intel_fb)
return ERR_PTR(-ENOMEM);
- ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj);
+ ret = intel_framebuffer_init(intel_fb, obj, mode_cmd);
if (ret)
goto err;
@@ -10380,23 +10369,6 @@ err:
return ERR_PTR(ret);
}
-static struct drm_framebuffer *
-intel_framebuffer_create(struct drm_device *dev,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj)
-{
- struct drm_framebuffer *fb;
- int ret;
-
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ERR_PTR(ret);
- fb = __intel_framebuffer_create(dev, mode_cmd, obj);
- mutex_unlock(&dev->struct_mutex);
-
- return fb;
-}
-
static u32
intel_framebuffer_pitch_for_width(int width, int bpp)
{
@@ -10431,9 +10403,9 @@ intel_framebuffer_create_for_mode(struct drm_device *dev,
bpp);
mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth);
- fb = intel_framebuffer_create(dev, &mode_cmd, obj);
+ fb = intel_framebuffer_create(obj, &mode_cmd);
if (IS_ERR(fb))
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return fb;
}
@@ -11325,7 +11297,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
MI_SRM_LRM_GLOBAL_GTT);
intel_ring_emit_reg(ring, DERRMR);
- intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256);
+ intel_ring_emit(ring, req->engine->scratch->node.start + 256);
if (IS_GEN8(dev)) {
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
@@ -11577,6 +11549,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
struct intel_engine_cs *engine;
bool mmio_flip;
struct drm_i915_gem_request *request;
+ struct i915_vma *vma;
int ret;
/*
@@ -11685,9 +11658,11 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
mmio_flip = use_mmio_flip(engine, obj);
- ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
- if (ret)
+ vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto cleanup_pending;
+ }
work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary),
obj, 0);
@@ -11707,7 +11682,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
goto cleanup_unpin;
}
- ret = i915_gem_object_sync(obj, request);
+ ret = i915_gem_object_sync(obj, request, false);
if (ret)
goto cleanup_request;
@@ -11719,7 +11694,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
intel_mark_page_flip_active(intel_crtc, work);
work->flip_queued_req = i915_gem_request_get(request);
- i915_add_request_no_flush(request);
+ i915_add_request(request);
}
i915_gem_track_fb(intel_fb_obj(old_fb), obj,
@@ -11734,7 +11709,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
return 0;
cleanup_request:
- i915_add_request_no_flush(request);
+ i915_add_request(request);
cleanup_unpin:
intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
cleanup_pending:
@@ -11744,7 +11719,7 @@ cleanup:
crtc->primary->fb = old_fb;
update_state_fb(crtc->primary);
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
drm_framebuffer_unreference(work->old_fb);
spin_lock_irq(&dev->event_lock);
@@ -14040,7 +14015,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
if (ret)
DRM_DEBUG_KMS("failed to attach phys object\n");
} else {
- ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+ struct i915_vma *vma;
+
+ vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+ if (IS_ERR(vma))
+ ret = PTR_ERR(vma);
}
if (ret == 0) {
@@ -14396,7 +14375,7 @@ intel_update_cursor_plane(struct drm_plane *plane,
if (!obj)
addr = 0;
else if (!INTEL_INFO(dev)->cursor_needs_physical)
- addr = i915_gem_obj_ggtt_offset(obj);
+ addr = i915_gem_object_ggtt_offset(obj, NULL);
else
addr = obj->phys_handle->busaddr;
@@ -14836,14 +14815,14 @@ static void intel_setup_outputs(struct drm_device *dev)
static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
{
- struct drm_device *dev = fb->dev;
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
drm_framebuffer_cleanup(fb);
- mutex_lock(&dev->struct_mutex);
- WARN_ON(!intel_fb->obj->framebuffer_references--);
+
+ WARN_ON(atomic_read(&intel_fb->obj->framebuffer_references) == 0);
+ atomic_dec(&intel_fb->obj->framebuffer_references);
i915_gem_object_put(intel_fb->obj);
- mutex_unlock(&dev->struct_mutex);
+
kfree(intel_fb);
}
@@ -14886,10 +14865,11 @@ static const struct drm_framebuffer_funcs intel_fb_funcs = {
};
static
-u32 intel_fb_pitch_limit(struct drm_device *dev, uint64_t fb_modifier,
+u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv,
+ uint64_t fb_modifier,
uint32_t pixel_format)
{
- u32 gen = INTEL_INFO(dev)->gen;
+ u32 gen = INTEL_GEN(dev_priv);
if (gen >= 9) {
int cpp = drm_format_plane_cpp(pixel_format, 0);
@@ -14898,7 +14878,7 @@ u32 intel_fb_pitch_limit(struct drm_device *dev, uint64_t fb_modifier,
* pixels and 32K bytes."
*/
return min(8192 * cpp, 32768);
- } else if (gen >= 5 && !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
+ } else if (gen >= 5 && !IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) {
return 32*1024;
} else if (gen >= 4) {
if (fb_modifier == I915_FORMAT_MOD_X_TILED)
@@ -14916,17 +14896,16 @@ u32 intel_fb_pitch_limit(struct drm_device *dev, uint64_t fb_modifier,
}
}
-static int intel_framebuffer_init(struct drm_device *dev,
- struct intel_framebuffer *intel_fb,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj)
+static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
+ struct drm_i915_gem_object *obj,
+ struct drm_mode_fb_cmd2 *mode_cmd)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- unsigned int aligned_height;
- int ret;
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
u32 pitch_limit, stride_alignment;
+ unsigned int aligned_height;
+ int ret = -EINVAL;
- WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+ atomic_inc(&obj->framebuffer_references);
if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
/* Enforce that fb modifier and tiling mode match, but only for
@@ -14934,14 +14913,14 @@ static int intel_framebuffer_init(struct drm_device *dev,
if (!!(i915_gem_object_get_tiling(obj) == I915_TILING_X) !=
!!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) {
DRM_DEBUG("tiling_mode doesn't match fb modifier\n");
- return -EINVAL;
+ goto err;
}
} else {
if (i915_gem_object_get_tiling(obj) == I915_TILING_X)
mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
else if (i915_gem_object_get_tiling(obj) == I915_TILING_Y) {
DRM_DEBUG("No Y tiling for legacy addfb\n");
- return -EINVAL;
+ goto err;
}
}
@@ -14949,10 +14928,10 @@ static int intel_framebuffer_init(struct drm_device *dev,
switch (mode_cmd->modifier[0]) {
case I915_FORMAT_MOD_Y_TILED:
case I915_FORMAT_MOD_Yf_TILED:
- if (INTEL_INFO(dev)->gen < 9) {
+ if (INTEL_GEN(dev_priv) < 9) {
DRM_DEBUG("Unsupported tiling 0x%llx!\n",
mode_cmd->modifier[0]);
- return -EINVAL;
+ goto err;
}
case DRM_FORMAT_MOD_NONE:
case I915_FORMAT_MOD_X_TILED:
@@ -14960,7 +14939,7 @@ static int intel_framebuffer_init(struct drm_device *dev,
default:
DRM_DEBUG("Unsupported fb modifier 0x%llx!\n",
mode_cmd->modifier[0]);
- return -EINVAL;
+ goto err;
}
stride_alignment = intel_fb_stride_alignment(dev_priv,
@@ -14969,17 +14948,18 @@ static int intel_framebuffer_init(struct drm_device *dev,
if (mode_cmd->pitches[0] & (stride_alignment - 1)) {
DRM_DEBUG("pitch (%d) must be at least %u byte aligned\n",
mode_cmd->pitches[0], stride_alignment);
- return -EINVAL;
+ goto err;
}
- pitch_limit = intel_fb_pitch_limit(dev, mode_cmd->modifier[0],
+ pitch_limit = intel_fb_pitch_limit(dev_priv,
+ mode_cmd->modifier[0],
mode_cmd->pixel_format);
if (mode_cmd->pitches[0] > pitch_limit) {
DRM_DEBUG("%s pitch (%u) must be at less than %d\n",
mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ?
"tiled" : "linear",
mode_cmd->pitches[0], pitch_limit);
- return -EINVAL;
+ goto err;
}
if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED &&
@@ -14987,7 +14967,7 @@ static int intel_framebuffer_init(struct drm_device *dev,
DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n",
mode_cmd->pitches[0],
i915_gem_object_get_stride(obj));
- return -EINVAL;
+ goto err;
}
/* Reject formats not supported by any plane early. */
@@ -14998,77 +14978,83 @@ static int intel_framebuffer_init(struct drm_device *dev,
case DRM_FORMAT_ARGB8888:
break;
case DRM_FORMAT_XRGB1555:
- if (INTEL_INFO(dev)->gen > 3) {
+ if (INTEL_GEN(dev_priv) > 3) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
case DRM_FORMAT_ABGR8888:
- if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
- INTEL_INFO(dev)->gen < 9) {
+ if (!IS_VALLEYVIEW(dev_priv) &&
+ !IS_CHERRYVIEW(dev_priv) &&
+ INTEL_GEN(dev_priv) < 9) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_XRGB2101010:
case DRM_FORMAT_XBGR2101010:
- if (INTEL_INFO(dev)->gen < 4) {
+ if (INTEL_GEN(dev_priv) < 4) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
case DRM_FORMAT_ABGR2101010:
- if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
+ if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
case DRM_FORMAT_YUYV:
case DRM_FORMAT_UYVY:
case DRM_FORMAT_YVYU:
case DRM_FORMAT_VYUY:
- if (INTEL_INFO(dev)->gen < 5) {
+ if (INTEL_GEN(dev_priv) < 5) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
break;
default:
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
- return -EINVAL;
+ goto err;
}
/* FIXME need to adjust LINOFF/TILEOFF accordingly. */
if (mode_cmd->offsets[0] != 0)
- return -EINVAL;
+ goto err;
- aligned_height = intel_fb_align_height(dev, mode_cmd->height,
+ aligned_height = intel_fb_align_height(dev_priv,
+ mode_cmd->height,
mode_cmd->pixel_format,
mode_cmd->modifier[0]);
/* FIXME drm helper for size checks (especially planar formats)? */
if (obj->base.size < aligned_height * mode_cmd->pitches[0])
- return -EINVAL;
+ goto err;
drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
intel_fb->obj = obj;
intel_fill_fb_info(dev_priv, &intel_fb->base);
- ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
+ ret = drm_framebuffer_init(obj->base.dev,
+ &intel_fb->base,
+ &intel_fb_funcs);
if (ret) {
DRM_ERROR("framebuffer init failed %d\n", ret);
- return ret;
+ goto err;
}
- intel_fb->obj->framebuffer_references++;
-
return 0;
+
+err:
+ atomic_dec(&obj->framebuffer_references);
+ return ret;
}
static struct drm_framebuffer *
@@ -15084,9 +15070,9 @@ intel_user_framebuffer_create(struct drm_device *dev,
if (!obj)
return ERR_PTR(-ENOENT);
- fb = intel_framebuffer_create(dev, &mode_cmd, obj);
+ fb = intel_framebuffer_create(obj, &mode_cmd);
if (IS_ERR(fb))
- i915_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return fb;
}
@@ -16228,7 +16214,6 @@ void intel_modeset_gem_init(struct drm_device *dev)
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_crtc *c;
struct drm_i915_gem_object *obj;
- int ret;
intel_init_gt_powersave(dev_priv);
@@ -16242,15 +16227,17 @@ void intel_modeset_gem_init(struct drm_device *dev)
* for this.
*/
for_each_crtc(dev, c) {
+ struct i915_vma *vma;
+
obj = intel_fb_obj(c->primary->fb);
if (obj == NULL)
continue;
mutex_lock(&dev->struct_mutex);
- ret = intel_pin_and_fence_fb_obj(c->primary->fb,
+ vma = intel_pin_and_fence_fb_obj(c->primary->fb,
c->primary->state->rotation);
mutex_unlock(&dev->struct_mutex);
- if (ret) {
+ if (IS_ERR(vma)) {
DRM_ERROR("failed to pin boot fb on pipe %d\n",
to_intel_crtc(c)->pipe);
drm_framebuffer_unreference(c->primary->fb);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1ad2e2c5f580..69b2a735deea 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -183,6 +183,7 @@ struct intel_framebuffer {
struct intel_fbdev {
struct drm_fb_helper helper;
struct intel_framebuffer *fb;
+ struct i915_vma *vma;
async_cookie_t cookie;
int preferred_bpp;
};
@@ -1135,7 +1136,7 @@ void intel_ddi_clock_get(struct intel_encoder *encoder,
void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
-unsigned int intel_fb_align_height(struct drm_device *dev,
+unsigned int intel_fb_align_height(struct drm_i915_private *dev_priv,
unsigned int height,
uint32_t pixel_format,
uint64_t fb_format_modifier);
@@ -1217,13 +1218,12 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
void intel_release_load_detect_pipe(struct drm_connector *connector,
struct intel_load_detect_pipe *old,
struct drm_modeset_acquire_ctx *ctx);
-int intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
- unsigned int rotation);
+struct i915_vma *
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
struct drm_framebuffer *
-__intel_framebuffer_create(struct drm_device *dev,
- struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_i915_gem_object *obj);
+intel_framebuffer_create(struct drm_i915_gem_object *obj,
+ struct drm_mode_fb_cmd2 *mode_cmd);
void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe);
void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe);
void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe);
@@ -1600,23 +1600,6 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
DRM_DEBUG_DRIVER("RPM wakelock ref not held during HW access");
}
-static inline int
-assert_rpm_atomic_begin(struct drm_i915_private *dev_priv)
-{
- int seq = atomic_read(&dev_priv->pm.atomic_seq);
-
- assert_rpm_wakelock_held(dev_priv);
-
- return seq;
-}
-
-static inline void
-assert_rpm_atomic_end(struct drm_i915_private *dev_priv, int begin_seq)
-{
- WARN_ONCE(atomic_read(&dev_priv->pm.atomic_seq) != begin_seq,
- "HW access outside of RPM atomic section\n");
-}
-
/**
* disable_rpm_wakeref_asserts - disable the RPM assert checks
* @dev_priv: i915 device instance
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e9b301ae2d0c..394ef2ca2bd2 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -164,6 +164,7 @@ cleanup:
void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
{
memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
+ clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
}
static void intel_engine_init_requests(struct intel_engine_cs *engine)
@@ -192,6 +193,8 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_requests(engine);
intel_engine_init_hangcheck(engine);
i915_gem_batch_pool_init(engine, &engine->batch_pool);
+
+ intel_engine_init_cmd_parser(engine);
}
/**
@@ -213,7 +216,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
if (ret)
return ret;
- return intel_engine_init_cmd_parser(engine);
+ ret = i915_gem_render_state_init(engine);
+ if (ret)
+ return ret;
+
+ return 0;
}
/**
@@ -225,7 +232,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
*/
void intel_engine_cleanup_common(struct intel_engine_cs *engine)
{
- intel_engine_cleanup_cmd_parser(engine);
+ i915_gem_render_state_fini(engine);
intel_engine_fini_breadcrumbs(engine);
+ intel_engine_cleanup_cmd_parser(engine);
i915_gem_batch_pool_fini(&engine->batch_pool);
}
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 85adc2b92594..9e713a836fb6 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -244,7 +244,6 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
dpfc_ctl |= DPFC_CTL_LIMIT_1X;
break;
}
- dpfc_ctl |= DPFC_CTL_FENCE_EN;
if (IS_GEN5(dev_priv))
dpfc_ctl |= params->fb.fence_reg;
@@ -709,6 +708,14 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
return effective_w <= max_w && effective_h <= max_h;
}
+/* XXX replace me when we have VMA tracking for intel_plane_state */
+static int get_fence_id(struct drm_framebuffer *fb)
+{
+ struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL);
+
+ return vma->fence ? vma->fence->id : I915_FENCE_REG_NONE;
+}
+
static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
struct intel_crtc_state *crtc_state,
struct intel_plane_state *plane_state)
@@ -737,10 +744,10 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
/* FIXME: We lack the proper locking here, so only run this on the
* platforms that need. */
if (IS_GEN(dev_priv, 5, 6))
- cache->fb.ilk_ggtt_offset = i915_gem_obj_ggtt_offset(obj);
+ cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL);
cache->fb.pixel_format = fb->pixel_format;
cache->fb.stride = fb->pitches[0];
- cache->fb.fence_reg = obj->fence_reg;
+ cache->fb.fence_reg = get_fence_id(fb);
cache->fb.tiling_mode = i915_gem_object_get_tiling(obj);
}
@@ -768,6 +775,10 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
/* The use of a CPU fence is mandatory in order to detect writes
* by the CPU to the scanout and trigger updates to the FBC.
+ *
+ * Note that is possible for a tiled surface to be unmappable (and
+ * so have no fence associated with it) due to aperture constaints
+ * at the time of pinning.
*/
if (cache->fb.tiling_mode != I915_TILING_X ||
cache->fb.fence_reg == I915_FENCE_REG_NONE) {
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 0436b4869d57..2893a2d67403 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -125,7 +125,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct drm_mode_fb_cmd2 mode_cmd = {};
- struct drm_i915_gem_object *obj = NULL;
+ struct drm_i915_gem_object *obj;
int size, ret;
/* we don't do packed 24bpp */
@@ -140,14 +140,13 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
sizes->surface_depth);
- mutex_lock(&dev->struct_mutex);
-
size = mode_cmd.pitches[0] * mode_cmd.height;
size = PAGE_ALIGN(size);
/* If the FB is too big, just don't use it since fbdev is not very
* important and we should probably use that space with FBC or other
* features. */
+ obj = NULL;
if (size * 2 < ggtt->stolen_usable_size)
obj = i915_gem_object_create_stolen(dev, size);
if (obj == NULL)
@@ -155,24 +154,22 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
if (IS_ERR(obj)) {
DRM_ERROR("failed to allocate framebuffer\n");
ret = PTR_ERR(obj);
- goto out;
+ goto err;
}
- fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
+ fb = intel_framebuffer_create(obj, &mode_cmd);
if (IS_ERR(fb)) {
- i915_gem_object_put(obj);
ret = PTR_ERR(fb);
- goto out;
+ goto err_obj;
}
- mutex_unlock(&dev->struct_mutex);
-
ifbdev->fb = to_intel_framebuffer(fb);
return 0;
-out:
- mutex_unlock(&dev->struct_mutex);
+err_obj:
+ i915_gem_object_put(obj);
+err:
return ret;
}
@@ -188,7 +185,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
struct fb_info *info;
struct drm_framebuffer *fb;
struct i915_vma *vma;
- struct drm_i915_gem_object *obj;
bool prealloc = false;
void __iomem *vaddr;
int ret;
@@ -216,17 +212,17 @@ static int intelfb_create(struct drm_fb_helper *helper,
sizes->fb_height = intel_fb->base.height;
}
- obj = intel_fb->obj;
-
mutex_lock(&dev->struct_mutex);
/* Pin the GGTT vma for our access via info->screen_base.
* This also validates that any existing fb inherited from the
* BIOS is suitable for own access.
*/
- ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0));
- if (ret)
+ vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0));
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto out_unlock;
+ }
info = drm_fb_helper_alloc_fbi(helper);
if (IS_ERR(info)) {
@@ -246,8 +242,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT;
info->fbops = &intelfb_ops;
- vma = i915_gem_obj_to_ggtt(obj);
-
/* setup aperture base/size for vesafb takeover */
info->apertures->ranges[0].base = dev->mode_config.fb_base;
info->apertures->ranges[0].size = ggtt->mappable_end;
@@ -274,14 +268,14 @@ static int intelfb_create(struct drm_fb_helper *helper,
* If the object is stolen however, it will be full of whatever
* garbage was left in there.
*/
- if (ifbdev->fb->obj->stolen && !prealloc)
+ if (intel_fb->obj->stolen && !prealloc)
memset_io(info->screen_base, 0, info->screen_size);
/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
- DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx, bo %p\n",
- fb->width, fb->height,
- i915_gem_obj_ggtt_offset(obj), obj);
+ DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx\n",
+ fb->width, fb->height, vma->node.start);
+ ifbdev->vma = vma;
mutex_unlock(&dev->struct_mutex);
vga_switcheroo_client_fb_set(dev->pdev, info);
@@ -637,7 +631,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
}
cur_size = intel_crtc->config->base.adjusted_mode.crtc_vdisplay;
- cur_size = intel_fb_align_height(dev, cur_size,
+ cur_size = intel_fb_align_height(to_i915(dev), cur_size,
fb->base.pixel_format,
fb->base.modifier[0]);
cur_size *= fb->base.pitches[0];
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 623cf26cd784..a8da563cadb7 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -63,7 +63,7 @@ struct drm_i915_gem_request;
* retcode: errno from last guc_submit()
*/
struct i915_guc_client {
- struct drm_i915_gem_object *client_obj;
+ struct i915_vma *client;
void *client_base; /* first page (only) of above */
struct i915_gem_context *owner;
struct intel_guc *guc;
@@ -125,11 +125,10 @@ struct intel_guc_fw {
struct intel_guc {
struct intel_guc_fw guc_fw;
uint32_t log_flags;
- struct drm_i915_gem_object *log_obj;
+ struct i915_vma *log;
- struct drm_i915_gem_object *ads_obj;
-
- struct drm_i915_gem_object *ctx_pool_obj;
+ struct i915_vma *ads;
+ struct i915_vma *ctx_pool;
struct ida ctx_ids;
struct i915_guc_client *execbuf_client;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 3763e30cc165..78288f0af669 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -181,16 +181,15 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
}
- if (guc->ads_obj) {
- u32 ads = (u32)i915_gem_obj_ggtt_offset(guc->ads_obj)
- >> PAGE_SHIFT;
+ if (guc->ads) {
+ u32 ads = (u32)guc->ads->node.start >> PAGE_SHIFT;
params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
}
/* If GuC submission is enabled, set up additional parameters here */
if (i915.enable_guc_submission) {
- u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
+ u32 pgs = dev_priv->guc.ctx_pool->node.start;
u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
pgs >>= PAGE_SHIFT;
@@ -238,12 +237,12 @@ static inline bool guc_ucode_response(struct drm_i915_private *dev_priv,
* Note that GuC needs the CSS header plus uKernel code to be copied by the
* DMA engine in one operation, whereas the RSA signature is loaded via MMIO.
*/
-static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
+static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
+ struct i915_vma *vma)
{
struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
- struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj;
unsigned long offset;
- struct sg_table *sg = fw_obj->pages;
+ struct sg_table *sg = vma->obj->mm.pages;
u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
int i, ret = 0;
@@ -260,7 +259,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
/* Set the source address for the new blob */
- offset = i915_gem_obj_ggtt_offset(fw_obj) + guc_fw->header_offset;
+ offset = vma->node.start + guc_fw->header_offset;
I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
@@ -315,6 +314,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
{
struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
struct drm_device *dev = &dev_priv->drm;
+ struct i915_vma *vma;
int ret;
ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false);
@@ -323,10 +323,10 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
return ret;
}
- ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
- if (ret) {
- DRM_DEBUG_DRIVER("pin failed %d\n", ret);
- return ret;
+ vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
+ if (IS_ERR(vma)) {
+ DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma));
+ return PTR_ERR(vma);
}
/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
@@ -369,7 +369,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
set_guc_init_params(dev_priv);
- ret = guc_ucode_xfer_dma(dev_priv);
+ ret = guc_ucode_xfer_dma(dev_priv, vma);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@@ -377,7 +377,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
* We keep the object pages for reuse during resume. But we can unpin it
* now that DMA has completed, so it doesn't continue to take up space.
*/
- i915_gem_object_ggtt_unpin(guc_fw->guc_fw_obj);
+ i915_vma_unpin(vma);
return ret;
}
@@ -661,13 +661,6 @@ fail:
DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n",
guc_fw->guc_fw_path, err);
- mutex_lock(&dev->struct_mutex);
- obj = guc_fw->guc_fw_obj;
- if (obj)
- i915_gem_object_put(obj);
- guc_fw->guc_fw_obj = NULL;
- mutex_unlock(&dev->struct_mutex);
-
release_firmware(fw); /* OK even if fw is NULL */
guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
}
@@ -747,7 +740,7 @@ void intel_guc_fini(struct drm_device *dev)
if (guc_fw->guc_fw_obj)
i915_gem_object_put(guc_fw->guc_fw_obj);
guc_fw->guc_fw_obj = NULL;
- mutex_unlock(&dev->struct_mutex);
guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
+ mutex_unlock(&dev->struct_mutex);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 309c5d9b1c57..db3e85ca698d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -156,6 +156,11 @@
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN8_CTX_STATUS_COMPLETED_MASK \
+ (GEN8_CTX_STATUS_ACTIVE_IDLE | \
+ GEN8_CTX_STATUS_PREEMPTED | \
+ GEN8_CTX_STATUS_ELEMENT_SWITCH)
+
#define CTX_LRI_HEADER_0 0x01
#define CTX_CONTEXT_CONTROL 0x02
#define CTX_RING_HEAD 0x04
@@ -263,12 +268,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv))
- engine->idle_lite_restore_wa = ~0;
-
- engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
- IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
- (engine->id == VCS || engine->id == VCS2);
+ engine->disable_lite_restore_wa =
+ (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
+ IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
+ (engine->id == VCS || engine->id == VCS2);
engine->ctx_desc_template = GEN8_CTX_VALID;
if (IS_GEN8(dev_priv))
@@ -315,7 +318,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
desc = ctx->desc_template; /* bits 3-4 */
desc |= engine->ctx_desc_template; /* bits 0-11 */
- desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
+ desc |= ce->state->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
/* bits 12-31 */
desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
@@ -328,36 +331,6 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
return ctx->engine[engine->id].lrc_desc;
}
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
- struct drm_i915_gem_request *rq1)
-{
-
- struct intel_engine_cs *engine = rq0->engine;
- struct drm_i915_private *dev_priv = rq0->i915;
- uint64_t desc[2];
-
- if (rq1) {
- desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
- rq1->elsp_submitted++;
- } else {
- desc[1] = 0;
- }
-
- desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
- rq0->elsp_submitted++;
-
- /* You must always write both descriptors in the order below. */
- I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1]));
- I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1]));
-
- I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0]));
- /* The context is automatically loaded after the following */
- I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0]));
-
- /* ELSP is a wo register, use another nearby reg for posting */
- POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine));
-}
-
static void
execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
{
@@ -367,13 +340,13 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
}
-static void execlists_update_context(struct drm_i915_gem_request *rq)
+static u64 execlists_update_context(struct drm_i915_gem_request *rq)
{
- struct intel_engine_cs *engine = rq->engine;
+ struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
- uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
- reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
+ ce->lrc_reg_state[CTX_RING_TAIL+1] =
+ intel_ring_offset(rq->ring, rq->tail);
/* True 32b PPGTT with dynamic page allocation: update PDP
* registers and point the unallocated PDPs to scratch page.
@@ -381,32 +354,14 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
* in 48-bit mode.
*/
if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
- execlists_update_context_pdps(ppgtt, reg_state);
-}
-
-static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
- struct drm_i915_gem_request *rq1)
-{
- struct drm_i915_private *dev_priv = rq0->i915;
- unsigned int fw_domains = rq0->engine->fw_domains;
-
- execlists_update_context(rq0);
-
- if (rq1)
- execlists_update_context(rq1);
-
- spin_lock_irq(&dev_priv->uncore.lock);
- intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
-
- execlists_elsp_write(rq0, rq1);
+ execlists_update_context_pdps(ppgtt, ce->lrc_reg_state);
- intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
- spin_unlock_irq(&dev_priv->uncore.lock);
+ return ce->lrc_desc;
}
-static inline void execlists_context_status_change(
- struct drm_i915_gem_request *rq,
- unsigned long status)
+static inline void
+execlists_context_status_change(struct drm_i915_gem_request *rq,
+ unsigned long status)
{
/*
* Only used when GVT-g is enabled now. When GVT-g is disabled,
@@ -418,122 +373,106 @@ static inline void execlists_context_status_change(
atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
}
-static void execlists_unqueue(struct intel_engine_cs *engine)
+static void execlists_submit_ports(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
- struct drm_i915_gem_request *cursor, *tmp;
-
- assert_spin_locked(&engine->execlist_lock);
-
- /*
- * If irqs are not active generate a warning as batches that finish
- * without the irqs may get lost and a GPU Hang may occur.
- */
- WARN_ON(!intel_irqs_enabled(engine->i915));
-
- /* Try to read in pairs */
- list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
- execlist_link) {
- if (!req0) {
- req0 = cursor;
- } else if (req0->ctx == cursor->ctx) {
- /* Same ctx: ignore first request, as second request
- * will update tail past first request's workload */
- cursor->elsp_submitted = req0->elsp_submitted;
- list_del(&req0->execlist_link);
- i915_gem_request_put(req0);
- req0 = cursor;
- } else {
- if (IS_ENABLED(CONFIG_DRM_I915_GVT)) {
- /*
- * req0 (after merged) ctx requires single
- * submission, stop picking
- */
- if (req0->ctx->execlists_force_single_submission)
- break;
- /*
- * req0 ctx doesn't require single submission,
- * but next req ctx requires, stop picking
- */
- if (cursor->ctx->execlists_force_single_submission)
- break;
- }
- req1 = cursor;
- WARN_ON(req1->elsp_submitted);
- break;
- }
- }
-
- if (unlikely(!req0))
- return;
+ struct drm_i915_private *dev_priv = engine->i915;
+ u32 *elsp = dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ u64 desc[2];
- execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN);
+ if (!engine->execlist_port[0].count)
+ execlists_context_status_change(engine->execlist_port[0].request,
+ INTEL_CONTEXT_SCHEDULE_IN);
+ desc[0] = execlists_update_context(engine->execlist_port[0].request);
+ engine->preempt_wa = engine->execlist_port[0].count++;
- if (req1)
- execlists_context_status_change(req1,
+ if (engine->execlist_port[1].request) {
+ GEM_BUG_ON(engine->execlist_port[1].count);
+ execlists_context_status_change(engine->execlist_port[1].request,
INTEL_CONTEXT_SCHEDULE_IN);
+ desc[1] = execlists_update_context(engine->execlist_port[1].request);
+ engine->execlist_port[1].count = 1;
+ } else
+ desc[1] = 0;
- if (req0->elsp_submitted & engine->idle_lite_restore_wa) {
- /*
- * WaIdleLiteRestore: make sure we never cause a lite restore
- * with HEAD==TAIL.
- *
- * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we
- * resubmit the request. See gen8_emit_request() for where we
- * prepare the padding after the end of the request.
- */
- req0->tail += 8;
- req0->tail &= req0->ring->size - 1;
- }
+ /* You must always write both descriptors in the order below. */
+ writel(upper_32_bits(desc[1]), elsp);
+ writel(lower_32_bits(desc[1]), elsp);
- execlists_elsp_submit_contexts(req0, req1);
+ writel(upper_32_bits(desc[0]), elsp);
+ /* The context is automatically loaded after the following */
+ writel(lower_32_bits(desc[0]), elsp);
}
-static unsigned int
-execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id)
+static bool merge_ctx(struct i915_gem_context *prev,
+ struct i915_gem_context *next)
{
- struct drm_i915_gem_request *head_req;
+ if (prev != next)
+ return false;
- assert_spin_locked(&engine->execlist_lock);
+ if (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
+ prev->execlists_force_single_submission)
+ return false;
- head_req = list_first_entry_or_null(&engine->execlist_queue,
- struct drm_i915_gem_request,
- execlist_link);
+ return true;
+}
- if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id)))
- return 0;
+static void execlists_context_unqueue(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_request *cursor, *last;
+ struct execlist_port *port = engine->execlist_port;
+ bool submit = false;
+
+ last = port->request;
+ if (last != NULL) {
+ /* WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
+ * as we resubmit the request. See gen8_emit_request()
+ * for where we prepare the padding after the end of the
+ * request.
+ */
+ last->tail = last->wa_tail;
+ }
- WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
+ /* Try to read in pairs and fill both submission ports */
+ spin_lock(&engine->execlist_lock);
+ list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
+ if (last && !merge_ctx(cursor->ctx, last->ctx)) {
+ if (port != engine->execlist_port)
+ break;
- if (--head_req->elsp_submitted > 0)
- return 0;
+ if (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
+ cursor->ctx->execlists_force_single_submission)
+ break;
- execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT);
+ i915_gem_request_assign(&port->request, last);
+ port++;
- list_del(&head_req->execlist_link);
- i915_gem_request_put(head_req);
+ }
+ last = cursor;
+ submit = true;
+ }
+ if (submit) {
+ i915_gem_request_assign(&port->request, last);
+ engine->execlist_queue.next = &cursor->execlist_link;
+ cursor->execlist_link.prev = &engine->execlist_queue;
+ }
+ spin_unlock(&engine->execlist_lock);
- return 1;
+ if (submit)
+ execlists_submit_ports(engine);
}
-static u32
-get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
- u32 *context_id)
+static bool execlists_elsp_idle(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
- u32 status;
-
- read_pointer %= GEN8_CSB_ENTRIES;
-
- status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer));
-
- if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
- return 0;
-
- *context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine,
- read_pointer));
+ return engine->execlist_port[0].request == NULL;
+}
- return status;
+static bool execlists_elsp_ready(struct intel_engine_cs *engine)
+{
+ if (engine->disable_lite_restore_wa || engine->preempt_wa)
+ return engine->execlist_port[0].request == NULL;
+ else
+ return engine->execlist_port[1].request == NULL;
}
/*
@@ -544,102 +483,65 @@ static void intel_lrc_irq_handler(unsigned long data)
{
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
struct drm_i915_private *dev_priv = engine->i915;
- u32 status_pointer;
- unsigned int read_pointer, write_pointer;
- u32 csb[GEN8_CSB_ENTRIES][2];
- unsigned int csb_read = 0, i;
- unsigned int submit_contexts = 0;
intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
- status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine));
-
- read_pointer = engine->next_context_status_buffer;
- write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
- if (read_pointer > write_pointer)
- write_pointer += GEN8_CSB_ENTRIES;
-
- while (read_pointer < write_pointer) {
- if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES))
- break;
- csb[csb_read][0] = get_context_status(engine, ++read_pointer,
- &csb[csb_read][1]);
- csb_read++;
- }
-
- engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
-
- /* Update the read pointer to the old write pointer. Manual ringbuffer
- * management ftw </sarcasm> */
- I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine),
- _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
- engine->next_context_status_buffer << 8));
-
- intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
-
- spin_lock(&engine->execlist_lock);
-
- for (i = 0; i < csb_read; i++) {
- if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) {
- if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) {
- if (execlists_check_remove_request(engine, csb[i][1]))
- WARN(1, "Lite Restored request removed from queue\n");
- } else
- WARN(1, "Preemption without Lite Restore\n");
+ if (!execlists_elsp_idle(engine)) {
+ u32 *ring_mmio =
+ dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+ u32 *csb_mmio =
+ dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+ unsigned ring, head, tail;
+
+ ring = readl(ring_mmio);
+ head = GEN8_CSB_READ_PTR(ring);
+ tail = GEN8_CSB_WRITE_PTR(ring);
+ if (tail < head)
+ tail += GEN8_CSB_ENTRIES;
+ while (head < tail) {
+ unsigned idx = ++head % GEN8_CSB_ENTRIES;
+ unsigned status = readl(&csb_mmio[2*idx]);
+
+ if (status & GEN8_CTX_STATUS_COMPLETED_MASK) {
+ GEM_BUG_ON(engine->execlist_port[0].count == 0);
+ if (--engine->execlist_port[0].count == 0) {
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+ execlists_context_status_change(engine->execlist_port[0].request,
+ INTEL_CONTEXT_SCHEDULE_OUT);
+ i915_gem_request_put(engine->execlist_port[0].request);
+ engine->execlist_port[0] = engine->execlist_port[1];
+ memset(&engine->execlist_port[1], 0,
+ sizeof(engine->execlist_port[1]));
+ engine->preempt_wa = false;
+ }
+ }
+ GEM_BUG_ON(engine->execlist_port[0].count == 0 &&
+ !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
}
- if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE |
- GEN8_CTX_STATUS_ELEMENT_SWITCH))
- submit_contexts +=
- execlists_check_remove_request(engine, csb[i][1]);
+ writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
+ GEN8_CSB_WRITE_PTR(ring) << 8),
+ ring_mmio);
}
- if (submit_contexts) {
- if (!engine->disable_lite_restore_wa ||
- (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE))
- execlists_unqueue(engine);
- }
-
- spin_unlock(&engine->execlist_lock);
+ if (execlists_elsp_ready(engine))
+ execlists_context_unqueue(engine);
- if (unlikely(submit_contexts > 2))
- DRM_ERROR("More than two context complete events?\n");
+ intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
}
static void execlists_submit_request(struct drm_i915_gem_request *request)
{
struct intel_engine_cs *engine = request->engine;
- struct drm_i915_gem_request *cursor;
- int num_elements = 0;
+ unsigned long flags;
- spin_lock_bh(&engine->execlist_lock);
+ spin_lock_irqsave(&engine->execlist_lock, flags);
- list_for_each_entry(cursor, &engine->execlist_queue, execlist_link)
- if (++num_elements > 2)
- break;
-
- if (num_elements > 2) {
- struct drm_i915_gem_request *tail_req;
-
- tail_req = list_last_entry(&engine->execlist_queue,
- struct drm_i915_gem_request,
- execlist_link);
-
- if (request->ctx == tail_req->ctx) {
- WARN(tail_req->elsp_submitted != 0,
- "More than 2 already-submitted reqs queued\n");
- list_del(&tail_req->execlist_link);
- i915_gem_request_put(tail_req);
- }
- }
-
- i915_gem_request_get(request);
list_add_tail(&request->execlist_link, &engine->execlist_queue);
- request->ctx_hw_id = request->ctx->hw_id;
- if (num_elements == 0)
- execlists_unqueue(engine);
+ if (execlists_elsp_idle(engine))
+ tasklet_hi_schedule(&engine->irq_tasklet);
- spin_unlock_bh(&engine->execlist_lock);
+ spin_unlock_irqrestore(&engine->execlist_lock, flags);
}
int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
@@ -731,6 +633,7 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
+ request->wa_tail = request->ring->tail;
/* We keep the previous context alive until we retire the following
* request. This ensures that any the context object is still pinned
@@ -743,27 +646,9 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
return 0;
}
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
-{
- struct drm_i915_gem_request *req, *tmp;
- LIST_HEAD(cancel_list);
-
- WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex));
-
- spin_lock_bh(&engine->execlist_lock);
- list_replace_init(&engine->execlist_queue, &cancel_list);
- spin_unlock_bh(&engine->execlist_lock);
-
- list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) {
- list_del(&req->execlist_link);
- i915_gem_request_put(req);
- }
-}
-
static int intel_lr_context_pin(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = ctx->i915;
struct intel_context *ce = &ctx->engine[engine->id];
void *vaddr;
u32 *lrc_reg_state;
@@ -774,16 +659,15 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
if (ce->pin_count++)
return 0;
- ret = i915_gem_object_ggtt_pin(ce->state, NULL,
- 0, GEN8_LR_CONTEXT_ALIGN,
- PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+ ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN,
+ PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL);
if (ret)
goto err;
- vaddr = i915_gem_object_pin_map(ce->state);
+ vaddr = i915_gem_object_pin_map(ce->state->obj, !HAS_LLC(ctx->i915));
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
- goto unpin_ctx_obj;
+ goto unpin_vma;
}
lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
@@ -792,24 +676,25 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
if (ret)
goto unpin_map;
- ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state);
intel_lr_context_descriptor_update(ctx, engine);
lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start;
ce->lrc_reg_state = lrc_reg_state;
- ce->state->dirty = true;
+ i915_gem_object_set_dirty(ce->state->obj);
/* Invalidate GuC TLB. */
- if (i915.enable_guc_submission)
+ if (i915.enable_guc_submission) {
+ struct drm_i915_private *dev_priv = ctx->i915;
I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+ }
i915_gem_context_get(ctx);
return 0;
unpin_map:
- i915_gem_object_unpin_map(ce->state);
-unpin_ctx_obj:
- i915_gem_object_ggtt_unpin(ce->state);
+ i915_gem_object_unpin_map(ce->state->obj);
+unpin_vma:
+ __i915_vma_unpin(ce->state);
err:
ce->pin_count = 0;
return ret;
@@ -828,12 +713,8 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
intel_ring_unpin(ce->ring);
- i915_gem_object_unpin_map(ce->state);
- i915_gem_object_ggtt_unpin(ce->state);
-
- ce->lrc_vma = NULL;
- ce->lrc_desc = 0;
- ce->lrc_reg_state = NULL;
+ i915_gem_object_unpin_map(ce->state->obj);
+ i915_vma_unpin(ce->state);
i915_gem_context_put(ctx);
}
@@ -919,7 +800,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT));
wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
- wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256);
+ wa_ctx_emit(batch, index, engine->scratch->node.start + 256);
wa_ctx_emit(batch, index, 0);
wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
@@ -937,7 +818,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT));
wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
- wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256);
+ wa_ctx_emit(batch, index, engine->scratch->node.start + 256);
wa_ctx_emit(batch, index, 0);
return index;
@@ -998,7 +879,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
/* Actual scratch location is at 128 bytes offset */
- scratch_addr = engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
+ scratch_addr = engine->scratch->node.start + 2*CACHELINE_BYTES;
wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
@@ -1078,7 +959,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
/* Actual scratch location is at 128 bytes offset */
if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
uint32_t scratch_addr
- = engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
+ = engine->scratch->node.start + 2*CACHELINE_BYTES;
wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
@@ -1168,47 +1049,41 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
return wa_ctx_end(wa_ctx, *offset = index, 1);
}
-static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
+static struct i915_vma *
+lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
{
- int ret;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
- engine->wa_ctx.obj = i915_gem_object_create(&engine->i915->drm,
- PAGE_ALIGN(size));
- if (IS_ERR(engine->wa_ctx.obj)) {
- DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
- ret = PTR_ERR(engine->wa_ctx.obj);
- engine->wa_ctx.obj = NULL;
- return ret;
- }
+ obj = i915_gem_object_create(&engine->i915->drm, PAGE_ALIGN(size));
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
- ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL,
- 0, PAGE_SIZE, 0);
- if (ret) {
- DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
- ret);
- i915_gem_object_put(engine->wa_ctx.obj);
- return ret;
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, 0);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
}
- return 0;
+ return vma;
}
static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine)
{
- if (engine->wa_ctx.obj) {
- i915_gem_object_ggtt_unpin(engine->wa_ctx.obj);
- i915_gem_object_put(engine->wa_ctx.obj);
- engine->wa_ctx.obj = NULL;
+ if (engine->wa_ctx.vma) {
+ i915_vma_unpin(engine->wa_ctx.vma);
+ i915_gem_object_put(engine->wa_ctx.vma->obj);
+ engine->wa_ctx.vma = NULL;
}
}
static int intel_init_workaround_bb(struct intel_engine_cs *engine)
{
- int ret;
+ struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
uint32_t *batch;
uint32_t offset;
struct page *page;
- struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+ int ret;
WARN_ON(engine->id != RCS);
@@ -1220,20 +1095,22 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
}
/* some WA perform writes to scratch page, ensure it is valid */
- if (engine->scratch.obj == NULL) {
+ if (!engine->scratch) {
DRM_ERROR("scratch page not allocated for %s\n", engine->name);
return -EINVAL;
}
- ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE);
- if (ret) {
+ wa_ctx->vma = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE);
+ if (IS_ERR(wa_ctx->vma)) {
+ ret = PTR_ERR(wa_ctx->vma);
DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
return ret;
}
- page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0);
+ page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
batch = kmap_atomic(page);
offset = 0;
+ ret = 0;
if (IS_GEN8(engine->i915)) {
ret = gen8_init_indirectctx_bb(engine,
@@ -1285,7 +1162,6 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
static int gen8_init_common_ring(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- unsigned int next_context_status_buffer_hw;
lrc_init_hws(engine);
@@ -1296,32 +1172,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
I915_WRITE(RING_MODE_GEN7(engine),
_MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
_MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
- POSTING_READ(RING_MODE_GEN7(engine));
-
- /*
- * Instead of resetting the Context Status Buffer (CSB) read pointer to
- * zero, we need to read the write pointer from hardware and use its
- * value because "this register is power context save restored".
- * Effectively, these states have been observed:
- *
- * | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
- * BDW | CSB regs not reset | CSB regs reset |
- * CHT | CSB regs not reset | CSB regs not reset |
- * SKL | ? | ? |
- * BXT | ? | ? |
- */
- next_context_status_buffer_hw =
- GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine)));
- /*
- * When the CSB registers are reset (also after power-up / gpu reset),
- * CSB write pointer is set to all 1's, which is not valid, use '5' in
- * this special case, so the first element read is CSB[0].
- */
- if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
- next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
+ I915_WRITE(RING_CONTEXT_STATUS_PTR(engine),
+ _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK |
+ GEN8_CSB_WRITE_PTR_MASK,
+ 0));
- engine->next_context_status_buffer = next_context_status_buffer_hw;
DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
intel_engine_init_hangcheck(engine);
@@ -1488,7 +1344,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
{
struct intel_ring *ring = request->ring;
struct intel_engine_cs *engine = request->engine;
- u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+ u32 scratch_addr = engine->scratch->node.start + 2 * CACHELINE_BYTES;
bool vf_flush_wa = false, dc_flush_wa = false;
u32 flags = 0;
int ret;
@@ -1668,7 +1524,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
if (ret)
DRM_ERROR("MOCS failed to program: expect performance issues.\n");
- return i915_gem_render_state_init(req);
+ return i915_gem_render_state_emit(req);
}
/**
@@ -1700,13 +1556,12 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
intel_engine_cleanup_common(engine);
- if (engine->status_page.obj) {
- i915_gem_object_unpin_map(engine->status_page.obj);
- engine->status_page.obj = NULL;
+ if (engine->status_page.vma) {
+ kunmap(kmap_to_page(engine->status_page.page_addr));
+ engine->status_page.vma = NULL;
}
intel_lr_context_unpin(dev_priv->kernel_context, engine);
- engine->idle_lite_restore_wa = 0;
engine->disable_lite_restore_wa = false;
engine->ctx_desc_template = 0;
@@ -1746,22 +1601,15 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
}
-static int
-lrc_setup_hws(struct intel_engine_cs *engine,
- struct drm_i915_gem_object *dctx_obj)
+static void
+lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
{
- void *hws;
-
/* The HWSP is part of the default context object in LRC mode. */
- engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
- LRC_PPHWSP_PN * PAGE_SIZE;
- hws = i915_gem_object_pin_map(dctx_obj);
- if (IS_ERR(hws))
- return PTR_ERR(hws);
- engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
- engine->status_page.obj = dctx_obj;
-
- return 0;
+ engine->status_page.page_addr =
+ kmap(i915_gem_object_get_page(vma->obj, LRC_PPHWSP_PN));
+ engine->status_page.gfx_addr =
+ vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
+ engine->status_page.vma = vma;
}
static void
@@ -1820,11 +1668,7 @@ logical_ring_init(struct intel_engine_cs *engine)
}
/* And setup the hardware status page. */
- ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
- if (ret) {
- DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
- goto error;
- }
+ lrc_setup_hws(engine, dctx->engine[engine->id].state);
return 0;
@@ -1968,13 +1812,13 @@ populate_lr_context(struct i915_gem_context *ctx,
return ret;
}
- vaddr = i915_gem_object_pin_map(ctx_obj);
+ vaddr = i915_gem_object_pin_map(ctx_obj, !HAS_LLC(dev_priv));
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
return ret;
}
- ctx_obj->dirty = true;
+ i915_gem_object_set_dirty(ctx_obj);
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
@@ -2025,9 +1869,9 @@ populate_lr_context(struct i915_gem_context *ctx,
RING_INDIRECT_CTX(engine->mmio_base), 0);
ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0);
- if (engine->wa_ctx.obj) {
+ if (engine->wa_ctx.vma) {
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
+ u32 ggtt_offset = wa_ctx->vma->node.start;
reg_state[CTX_RCS_INDIRECT_CTX+1] =
(ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
@@ -2131,6 +1975,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
{
struct drm_i915_gem_object *ctx_obj;
struct intel_context *ce = &ctx->engine[engine->id];
+ struct i915_vma *vma;
uint32_t context_size;
struct intel_ring *ring;
int ret;
@@ -2148,6 +1993,14 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
return PTR_ERR(ctx_obj);
}
+ vma = i915_gem_obj_lookup_or_create_vma(ctx_obj,
+ &engine->i915->ggtt.base,
+ NULL);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto error_deref_obj;
+ }
+
ring = intel_engine_create_ring(engine, ctx->ring_size);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
@@ -2161,7 +2014,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
}
ce->ring = ring;
- ce->state = ctx_obj;
+ ce->state = vma;
ce->initialised = engine->init_context == NULL;
return 0;
@@ -2170,8 +2023,6 @@ error_ring_free:
intel_ring_free(ring);
error_deref_obj:
i915_gem_object_put(ctx_obj);
- ce->ring = NULL;
- ce->state = NULL;
return ret;
}
@@ -2182,24 +2033,24 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv,
for_each_engine(engine, dev_priv) {
struct intel_context *ce = &ctx->engine[engine->id];
- struct drm_i915_gem_object *ctx_obj = ce->state;
void *vaddr;
uint32_t *reg_state;
- if (!ctx_obj)
+ if (!ce->state)
continue;
- vaddr = i915_gem_object_pin_map(ctx_obj);
+ vaddr = i915_gem_object_pin_map(ce->state->obj,
+ !HAS_LLC(dev_priv));
if (WARN_ON(IS_ERR(vaddr)))
continue;
reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
- ctx_obj->dirty = true;
reg_state[CTX_RING_HEAD+1] = 0;
reg_state[CTX_RING_TAIL+1] = 0;
- i915_gem_object_unpin_map(ctx_obj);
+ i915_gem_object_set_dirty(ce->state->obj);
+ i915_gem_object_unpin_map(ce->state->obj);
ce->ring->head = 0;
ce->ring->tail = 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a52cf57dbd40..4d70346500c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -97,6 +97,4 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
int enable_execlists);
void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine);
-
#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 90f3ab424e01..aa8ee7b870bf 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -171,8 +171,7 @@ struct overlay_registers {
struct intel_overlay {
struct drm_i915_private *i915;
struct intel_crtc *crtc;
- struct drm_i915_gem_object *vid_bo;
- struct drm_i915_gem_object *old_vid_bo;
+ struct i915_vma *vma, *old_vma;
bool active;
bool pfit_active;
u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
@@ -196,7 +195,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay)
if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
else
- regs = io_mapping_map_wc(dev_priv->ggtt.mappable,
+ regs = io_mapping_map_wc(&dev_priv->ggtt.mappable,
overlay->flip_addr,
PAGE_SIZE);
@@ -255,7 +254,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
ret = intel_ring_begin(req, 4);
if (ret) {
- i915_add_request_no_flush(req);
+ i915_add_request(req);
return ret;
}
@@ -298,7 +297,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
ret = intel_ring_begin(req, 2);
if (ret) {
- i915_add_request_no_flush(req);
+ i915_add_request(req);
return ret;
}
@@ -317,15 +316,14 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
{
struct intel_overlay *overlay =
container_of(active, typeof(*overlay), last_flip);
- struct drm_i915_gem_object *obj = overlay->old_vid_bo;
- i915_gem_track_fb(obj, NULL,
+ i915_gem_track_fb(overlay->old_vma->obj, NULL,
INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
- i915_gem_object_ggtt_unpin(obj);
- i915_gem_object_put(obj);
+ i915_gem_object_unpin_from_display_plane(overlay->old_vma);
+ i915_gem_object_put(overlay->old_vma->obj);
- overlay->old_vid_bo = NULL;
+ overlay->old_vma = NULL;
}
static void intel_overlay_off_tail(struct i915_gem_active *active,
@@ -333,15 +331,14 @@ static void intel_overlay_off_tail(struct i915_gem_active *active,
{
struct intel_overlay *overlay =
container_of(active, typeof(*overlay), last_flip);
- struct drm_i915_gem_object *obj = overlay->vid_bo;
/* never have the overlay hw on without showing a frame */
- if (WARN_ON(!obj))
+ if (WARN_ON(overlay->vma))
return;
- i915_gem_object_ggtt_unpin(obj);
- i915_gem_object_put(obj);
- overlay->vid_bo = NULL;
+ i915_gem_object_unpin_from_display_plane(overlay->vma);
+ i915_gem_object_put(overlay->vma->obj);
+ overlay->vma = NULL;
overlay->crtc->overlay = NULL;
overlay->crtc = NULL;
@@ -371,7 +368,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
ret = intel_ring_begin(req, 6);
if (ret) {
- i915_add_request_no_flush(req);
+ i915_add_request(req);
return ret;
}
@@ -421,7 +418,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
/* Only wait if there is actually an old frame to release to
* guarantee forward progress.
*/
- if (!overlay->old_vid_bo)
+ if (!overlay->old_vma)
return 0;
if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
@@ -435,7 +432,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
ret = intel_ring_begin(req, 2);
if (ret) {
- i915_add_request_no_flush(req);
+ i915_add_request(req);
return ret;
}
@@ -744,6 +741,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
struct drm_i915_private *dev_priv = overlay->i915;
u32 swidth, swidthsw, sheight, ostride;
enum pipe pipe = overlay->crtc->pipe;
+ struct i915_vma *vma;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
@@ -752,12 +750,12 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
if (ret != 0)
return ret;
- ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
+ vma = i915_gem_object_pin_to_display_plane(new_bo, 0,
&i915_ggtt_view_normal);
- if (ret != 0)
- return ret;
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
- ret = i915_gem_object_put_fence(new_bo);
+ ret = i915_vma_put_fence(vma);
if (ret)
goto out_unpin;
@@ -798,7 +796,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
swidth = params->src_w;
swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width);
sheight = params->src_h;
- iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, ®s->OBUF_0Y);
+ iowrite32(vma->node.start + params->offset_Y, ®s->OBUF_0Y);
ostride = params->stride_Y;
if (params->format & I915_OVERLAY_YUV_PLANAR) {
@@ -812,8 +810,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
params->src_w/uv_hscale);
swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
sheight |= (params->src_h/uv_vscale) << 16;
- iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, ®s->OBUF_0U);
- iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, ®s->OBUF_0V);
+ iowrite32(vma->node.start + params->offset_U, ®s->OBUF_0U);
+ iowrite32(vma->node.start + params->offset_V, ®s->OBUF_0V);
ostride |= params->stride_UV << 16;
}
@@ -834,18 +832,18 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
if (ret)
goto out_unpin;
- i915_gem_track_fb(overlay->vid_bo, new_bo,
+ i915_gem_track_fb(overlay->vma->obj, new_bo,
INTEL_FRONTBUFFER_OVERLAY(pipe));
- overlay->old_vid_bo = overlay->vid_bo;
- overlay->vid_bo = new_bo;
+ overlay->old_vma = overlay->vma;
+ overlay->vma = vma;
intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe));
return 0;
out_unpin:
- i915_gem_object_ggtt_unpin(new_bo);
+ i915_gem_object_unpin_from_display_plane(vma);
return ret;
}
@@ -1217,7 +1215,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
out_unlock:
mutex_unlock(&dev->struct_mutex);
drm_modeset_unlock_all(dev);
- i915_gem_object_put_unlocked(new_bo);
+ i915_gem_object_put(new_bo);
out_free:
kfree(params);
@@ -1368,6 +1366,7 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
struct intel_overlay *overlay;
struct drm_i915_gem_object *reg_bo;
struct overlay_registers __iomem *regs;
+ struct i915_vma *vma = NULL;
int ret;
if (!HAS_OVERLAY(dev_priv))
@@ -1401,13 +1400,14 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
}
overlay->flip_addr = reg_bo->phys_handle->busaddr;
} else {
- ret = i915_gem_object_ggtt_pin(reg_bo, NULL,
+ vma = i915_gem_object_ggtt_pin(reg_bo, NULL,
0, PAGE_SIZE, PIN_MAPPABLE);
- if (ret) {
+ if (IS_ERR(vma)) {
DRM_ERROR("failed to pin overlay register bo\n");
+ ret = PTR_ERR(vma);
goto out_free_bo;
}
- overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
+ overlay->flip_addr = vma->node.start;
ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
if (ret) {
@@ -1439,8 +1439,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
return;
out_unpin_bo:
- if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
- i915_gem_object_ggtt_unpin(reg_bo);
+ if (vma)
+ i915_vma_unpin(vma);
out_free_bo:
i915_gem_object_put(reg_bo);
out_free:
@@ -1459,7 +1459,7 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
* hardware should be off already */
WARN_ON(dev_priv->overlay->active);
- i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo);
+ i915_gem_object_put(dev_priv->overlay->reg_bo);
kfree(dev_priv->overlay);
}
@@ -1482,7 +1482,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
regs = (struct overlay_registers __iomem *)
overlay->reg_bo->phys_handle->vaddr;
else
- regs = io_mapping_map_atomic_wc(dev_priv->ggtt.mappable,
+ regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.mappable,
overlay->flip_addr);
return regs;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index aef0b105eb58..073d12502736 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5720,7 +5720,7 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
if (WARN_ON(!dev_priv->vlv_pctx))
return;
- i915_gem_object_put_unlocked(dev_priv->vlv_pctx);
+ i915_gem_object_put(dev_priv->vlv_pctx);
dev_priv->vlv_pctx = NULL;
}
@@ -6664,7 +6664,7 @@ static void __intel_autoenable_gt_powersave(struct work_struct *work)
rcs->init_context(req);
/* Mark the device busy, calling intel_enable_gt_powersave() */
- i915_add_request_no_flush(req);
+ i915_add_request(req);
unlock:
mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -7813,5 +7813,4 @@ void intel_pm_setup(struct drm_device *dev)
dev_priv->pm.suspended = false;
atomic_set(&dev_priv->pm.wakeref_count, 0);
- atomic_set(&dev_priv->pm.atomic_seq, 0);
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e08a1e1b04e4..1a0095fe8d51 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -176,7 +176,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
{
struct intel_ring *ring = req->ring;
u32 scratch_addr =
- req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+ req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
int ret;
ret = intel_ring_begin(req, 6);
@@ -212,7 +212,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{
struct intel_ring *ring = req->ring;
u32 scratch_addr =
- req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+ req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
u32 flags = 0;
int ret;
@@ -286,7 +286,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{
struct intel_ring *ring = req->ring;
u32 scratch_addr =
- req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+ req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
u32 flags = 0;
int ret;
@@ -370,7 +370,8 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req,
static int
gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{
- u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+ u32 scratch_addr =
+ req->engine->scratch->node.start + 2 * CACHELINE_BYTES;
u32 flags = 0;
int ret;
@@ -531,7 +532,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
struct intel_ring *ring = engine->buffer;
- struct drm_i915_gem_object *obj = ring->obj;
+ struct i915_vma *vma = ring->vma;
int ret = 0;
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
@@ -571,7 +572,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
* registers with the above sequence (the readback of the HEAD registers
* also enforces ordering), otherwise the hw might lose the new ring
* register values. */
- I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj));
+ I915_WRITE_START(engine, vma->node.start);
/* WaClearRingBufHeadRegAtInit:ctg,elk */
if (I915_READ_HEAD(engine))
@@ -586,16 +587,15 @@ static int init_ring_common(struct intel_engine_cs *engine)
/* If the head is still not zero, the ring is dead */
if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
- I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) &&
+ I915_READ_START(engine) == vma->node.start &&
(I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
DRM_ERROR("%s initialization failed "
- "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
+ "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
engine->name,
I915_READ_CTL(engine),
I915_READ_CTL(engine) & RING_VALID,
I915_READ_HEAD(engine), I915_READ_TAIL(engine),
- I915_READ_START(engine),
- (unsigned long)i915_gem_obj_ggtt_offset(obj));
+ I915_READ_START(engine), (u32)vma->node.start);
ret = -EIO;
goto out;
}
@@ -615,42 +615,44 @@ out:
void intel_fini_pipe_control(struct intel_engine_cs *engine)
{
- if (engine->scratch.obj == NULL)
+ if (!engine->scratch)
return;
- i915_gem_object_ggtt_unpin(engine->scratch.obj);
- i915_gem_object_put(engine->scratch.obj);
- engine->scratch.obj = NULL;
+ i915_vma_unpin(engine->scratch);
+ i915_gem_object_put(engine->scratch->obj);
+ engine->scratch = NULL;
}
int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
{
struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
int ret;
- WARN_ON(engine->scratch.obj);
+ WARN_ON(engine->scratch);
obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
if (!obj)
- obj = i915_gem_object_create(&engine->i915->drm, size);
+ obj = i915_gem_object_create_internal(&engine->i915->drm, size);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate scratch page\n");
ret = PTR_ERR(obj);
goto err;
}
- ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
- if (ret)
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto err_unref;
+ }
- engine->scratch.obj = obj;
- engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
- DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
- engine->name, engine->scratch.gtt_offset);
+ engine->scratch = vma;
+ DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08llx\n",
+ engine->name, (long long)vma->node.start);
return 0;
err_unref:
- i915_gem_object_put(engine->scratch.obj);
+ i915_gem_object_put(obj);
err:
return ret;
}
@@ -698,7 +700,7 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
if (ret != 0)
return ret;
- ret = i915_gem_render_state_init(req);
+ ret = i915_gem_render_state_emit(req);
if (ret)
return ret;
@@ -1300,10 +1302,13 @@ static void render_ring_cleanup(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- if (dev_priv->semaphore_obj) {
- i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
- i915_gem_object_put(dev_priv->semaphore_obj);
- dev_priv->semaphore_obj = NULL;
+ if (dev_priv->semaphore_vma) {
+ struct drm_i915_gem_object *obj = dev_priv->semaphore_vma->obj;
+
+ i915_vma_unpin(dev_priv->semaphore_vma);
+ dev_priv->semaphore_vma = NULL;
+
+ i915_gem_object_put(obj);
}
intel_fini_pipe_control(engine);
@@ -1764,7 +1769,7 @@ i830_emit_bb_start(struct drm_i915_gem_request *req,
unsigned int dispatch_flags)
{
struct intel_ring *ring = req->ring;
- u32 cs_offset = req->engine->scratch.gtt_offset;
+ u32 cs_offset = req->engine->scratch->node.start;
int ret;
ret = intel_ring_begin(req, 6);
@@ -1853,67 +1858,68 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
static void cleanup_status_page(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
- obj = engine->status_page.obj;
- if (obj == NULL)
+ vma = engine->status_page.vma;
+ if (!vma)
return;
+ engine->status_page.vma = NULL;
- kunmap(sg_page(obj->pages->sgl));
- i915_gem_object_ggtt_unpin(obj);
- i915_gem_object_put(obj);
- engine->status_page.obj = NULL;
+ kunmap(kmap_to_page(engine->status_page.page_addr));
+ i915_vma_unpin(vma);
}
static int init_status_page(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_object *obj = engine->status_page.obj;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ unsigned int flags;
+ int ret;
- if (obj == NULL) {
- unsigned flags;
- int ret;
+ if (engine->status_page.vma)
+ return 0;
- obj = i915_gem_object_create(&engine->i915->drm, 4096);
- if (IS_ERR(obj)) {
- DRM_ERROR("Failed to allocate status page\n");
- return PTR_ERR(obj);
- }
+ obj = i915_gem_object_create_internal(&engine->i915->drm, 4096);
+ if (IS_ERR(obj)) {
+ DRM_ERROR("Failed to allocate status page\n");
+ return PTR_ERR(obj);
+ }
- ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
- if (ret)
- goto err_unref;
-
- flags = 0;
- if (!HAS_LLC(engine->i915))
- /* On g33, we cannot place HWS above 256MiB, so
- * restrict its pinning to the low mappable arena.
- * Though this restriction is not documented for
- * gen4, gen5, or byt, they also behave similarly
- * and hang if the HWS is placed at the top of the
- * GTT. To generalise, it appears that all !llc
- * platforms have issues with us placing the HWS
- * above the mappable region (even though we never
- * actualy map it).
- */
- flags |= PIN_MAPPABLE;
- ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
- if (ret) {
-err_unref:
- i915_gem_object_put(obj);
- return ret;
- }
+ ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+ if (ret)
+ goto err_unref;
- engine->status_page.obj = obj;
+ flags = 0;
+ if (!HAS_LLC(engine->i915))
+ /* On g33, we cannot place HWS above 256MiB, so
+ * restrict its pinning to the low mappable arena.
+ * Though this restriction is not documented for
+ * gen4, gen5, or byt, they also behave similarly
+ * and hang if the HWS is placed at the top of the
+ * GTT. To generalise, it appears that all !llc
+ * platforms have issues with us placing the HWS
+ * above the mappable region (even though we never
+ * actualy map it).
+ */
+ flags |= PIN_MAPPABLE;
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err_unref;
}
- engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
- engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
- memset(engine->status_page.page_addr, 0, PAGE_SIZE);
+ engine->status_page.vma = vma;
+ engine->status_page.gfx_addr = vma->node.start;
+ engine->status_page.page_addr = kmap(sg_page(obj->mm.pages->sgl));
DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
engine->name, engine->status_page.gfx_addr);
return 0;
+
+err_unref:
+ i915_gem_object_put(obj);
+ return ret;
}
static int init_phys_status_page(struct intel_engine_cs *engine)
@@ -1935,55 +1941,32 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
int intel_ring_pin(struct intel_ring *ring)
{
- struct drm_i915_private *dev_priv = ring->engine->i915;
- struct drm_i915_gem_object *obj = ring->obj;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- unsigned flags = PIN_OFFSET_BIAS | 4096;
+ unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096;
void *addr;
int ret;
- if (HAS_LLC(dev_priv) && !obj->stolen) {
- ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
- if (ret)
- return ret;
+ GEM_BUG_ON(ring->vaddr);
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret)
- goto err_unpin;
-
- addr = i915_gem_object_pin_map(obj);
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- goto err_unpin;
- }
- } else {
- ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
- flags | PIN_MAPPABLE);
- if (ret)
- return ret;
-
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret)
- goto err_unpin;
+ if (ring->vma->obj->stolen)
+ flags |= PIN_MAPPABLE;
- /* Access through the GTT requires the device to be awake. */
- assert_rpm_wakelock_held(dev_priv);
+ ret = i915_vma_pin(ring->vma, 0, PAGE_SIZE, flags);
+ if (unlikely(ret))
+ return ret;
- addr = (void __force *)
- i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj));
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- goto err_unpin;
- }
+ if (i915_vma_is_map_and_fenceable(ring->vma))
+ addr = (void __force *)i915_vma_pin_iomap(ring->vma);
+ else
+ addr = i915_gem_object_pin_map(ring->vma->obj,
+ !HAS_LLC(ring->engine->i915));
+ if (IS_ERR(addr)) {
+ i915_vma_unpin(ring->vma);
+ return PTR_ERR(addr);
}
ring->vaddr = addr;
- ring->vma = i915_gem_obj_to_ggtt(obj);
return 0;
-
-err_unpin:
- i915_gem_object_ggtt_unpin(obj);
- return ret;
}
void intel_ring_unpin(struct intel_ring *ring)
@@ -1991,60 +1974,63 @@ void intel_ring_unpin(struct intel_ring *ring)
GEM_BUG_ON(!ring->vma);
GEM_BUG_ON(!ring->vaddr);
- if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen)
- i915_gem_object_unpin_map(ring->obj);
- else
+ if (i915_vma_is_map_and_fenceable(ring->vma))
i915_vma_unpin_iomap(ring->vma);
+ else
+ i915_gem_object_unpin_map(ring->vma->obj);
ring->vaddr = NULL;
- i915_gem_object_ggtt_unpin(ring->obj);
- ring->vma = NULL;
+ i915_vma_unpin(ring->vma);
}
-static void intel_destroy_ringbuffer_obj(struct intel_ring *ring)
-{
- i915_gem_object_put(ring->obj);
- ring->obj = NULL;
-}
-
-static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
- struct intel_ring *ring)
+static struct i915_vma *
+intel_ring_create_vma(struct drm_device *dev, int size)
{
struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int ret;
- obj = NULL;
- if (!HAS_LLC(dev))
- obj = i915_gem_object_create_stolen(dev, ring->size);
+ obj = i915_gem_object_create_stolen(dev, size);
if (obj == NULL)
- obj = i915_gem_object_create(dev, ring->size);
+ obj = i915_gem_object_create(dev, size);
if (IS_ERR(obj))
- return PTR_ERR(obj);
+ return ERR_CAST(obj);
/* mark ring buffers as read-only from GPU side by default */
obj->gt_ro = 1;
- ring->obj = obj;
+ ret = i915_gem_object_set_to_gtt_domain(obj, false);
+ if (ret) {
+ vma = ERR_PTR(ret);
+ goto err;
+ }
- return 0;
+ vma = i915_gem_obj_lookup_or_create_vma(obj,
+ &to_i915(dev)->ggtt.base,
+ NULL);
+ if (IS_ERR(vma))
+ goto err;
+
+ return vma;
+
+err:
+ i915_gem_object_put(obj);
+ return vma;
}
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size)
{
struct intel_ring *ring;
- int ret;
+ struct i915_vma *vma;
GEM_BUG_ON(!is_power_of_2(size));
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (ring == NULL) {
- DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
- engine->name);
+ if (!ring)
return ERR_PTR(-ENOMEM);
- }
ring->engine = engine;
- list_add(&ring->link, &engine->buffers);
INIT_LIST_HEAD(&ring->request_list);
@@ -2060,22 +2046,21 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
ring->last_retired_head = -1;
intel_ring_update_space(ring);
- ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring);
- if (ret) {
- DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
- engine->name, ret);
- list_del(&ring->link);
+ vma = intel_ring_create_vma(&engine->i915->drm, size);
+ if (IS_ERR(vma)) {
kfree(ring);
- return ERR_PTR(ret);
+ return ERR_CAST(vma);
}
+ ring->vma = vma;
+ list_add(&ring->link, &engine->buffers);
return ring;
}
void
intel_ring_free(struct intel_ring *ring)
{
- intel_destroy_ringbuffer_obj(ring);
+ __i915_gem_object_release_unless_active(ring->vma->obj);
list_del(&ring->link);
kfree(ring);
}
@@ -2092,8 +2077,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
return 0;
if (ce->state) {
- ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
- ctx->ggtt_alignment, 0);
+ ret = i915_vma_pin(ce->state, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (ret)
goto error;
}
@@ -2127,7 +2111,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx,
return;
if (ce->state)
- i915_gem_object_ggtt_unpin(ce->state);
+ i915_vma_unpin(ce->state);
i915_gem_context_put(ctx);
}
@@ -2159,13 +2143,11 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
ret = intel_ring_context_pin(dev_priv->kernel_context, engine);
if (ret)
goto error;
-
ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
goto error;
}
- engine->buffer = ring;
if (I915_NEED_GFX_HWS(dev_priv)) {
ret = init_status_page(engine);
@@ -2180,11 +2162,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
ret = intel_ring_pin(ring);
if (ret) {
- DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
- engine->name, ret);
- intel_destroy_ringbuffer_obj(ring);
+ intel_ring_free(ring);
goto error;
}
+ engine->buffer = ring;
return 0;
@@ -2389,8 +2370,8 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
if (HAS_VEBOX(dev_priv))
I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
}
- if (dev_priv->semaphore_obj) {
- struct drm_i915_gem_object *obj = dev_priv->semaphore_obj;
+ if (dev_priv->semaphore_vma) {
+ struct drm_i915_gem_object *obj = dev_priv->semaphore_vma->obj;
struct page *page = i915_gem_object_get_dirty_page(obj, 0);
void *semaphores = kmap(page);
memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
@@ -2410,9 +2391,7 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
/* After manually advancing the seqno, fake the interrupt in case
* there are any waiters for that seqno.
*/
- rcu_read_lock();
intel_engine_wakeup(engine);
- rcu_read_unlock();
}
static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
@@ -2618,35 +2597,36 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine)
{
- struct drm_i915_gem_object *obj;
- int ret, i;
+ int i;
if (!i915.semaphores)
return;
- if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) {
+ if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_vma) {
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma = ERR_PTR(-ENODEV);
+
obj = i915_gem_object_create(&dev_priv->drm, 4096);
- if (IS_ERR(obj)) {
- DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
- i915.semaphores = 0;
- } else {
+ if (!IS_ERR(obj)) {
i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
- ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
- if (ret != 0) {
+ vma = i915_gem_object_ggtt_pin(obj, NULL,
+ 0, 0, PIN_HIGH);
+ }
+
+ if (IS_ERR(vma)) {
+ if (!IS_ERR(obj))
i915_gem_object_put(obj);
- DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
- i915.semaphores = 0;
- } else {
- dev_priv->semaphore_obj = obj;
- }
+
+ DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
+ i915.semaphores = 0;
+ return;
}
- }
- if (!i915.semaphores)
- return;
+ dev_priv->semaphore_vma = vma;
+ }
if (INTEL_GEN(dev_priv) >= 8) {
- u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj);
+ u64 offset = dev_priv->semaphore_vma->node.start;
engine->semaphore.sync_to = gen8_ring_sync_to;
engine->semaphore.signal = gen8_xcs_signal;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 43e545e44352..5cdd3c960903 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -26,10 +26,10 @@
*/
#define I915_RING_FREE_SPACE 64
-struct intel_hw_status_page {
+struct intel_hw_status_page {
u32 *page_addr;
unsigned int gfx_addr;
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
};
#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -57,10 +57,10 @@ struct intel_hw_status_page {
#define GEN8_SEMAPHORE_OFFSET(__from, __to) \
(((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size)
#define GEN8_SIGNAL_OFFSET(__ring, to) \
- (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+ (dev_priv->semaphore_vma->node.start + \
GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
#define GEN8_WAIT_OFFSET(__ring, from) \
- (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+ (dev_priv->semaphore_vma->node.start + \
GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
enum intel_engine_hangcheck_action {
@@ -75,7 +75,6 @@ enum intel_engine_hangcheck_action {
struct intel_engine_hangcheck {
u64 acthd;
- unsigned long user_interrupts;
u32 seqno;
int score;
enum intel_engine_hangcheck_action action;
@@ -84,9 +83,8 @@ struct intel_engine_hangcheck {
};
struct intel_ring {
- struct drm_i915_gem_object *obj;
- void *vaddr;
struct i915_vma *vma;
+ void *vaddr;
struct intel_engine_cs *engine;
struct list_head link;
@@ -98,6 +96,7 @@ struct intel_ring {
int space;
int size;
int effective_size;
+ bool vmap;
/** We track the position of the requests in the ring buffer, and
* when each is retired we increment last_retired_head as the GPU
@@ -124,15 +123,16 @@ struct drm_i915_reg_table;
* an option for future use.
* size: size of the batch in DWORDS
*/
-struct i915_ctx_workarounds {
+struct i915_ctx_workarounds {
struct i915_wa_ctx_bb {
u32 offset;
u32 size;
} indirect_ctx, per_ctx;
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
};
struct drm_i915_gem_request;
+struct intel_render_state;
struct intel_engine_cs {
struct drm_i915_private *i915;
@@ -155,6 +155,8 @@ struct intel_engine_cs {
struct intel_ring *buffer;
struct list_head buffers;
+ struct intel_render_state *render_state;
+
/* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the
* heavyweight seqno dance, we delegate the task (of being the
@@ -172,8 +174,7 @@ struct intel_engine_cs {
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
- struct task_struct *irq_seqno_bh; /* bh for user interrupts */
- unsigned long irq_wakeups;
+ struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
bool irq_posted;
spinlock_t lock; /* protects the lists of requests */
@@ -183,6 +184,9 @@ struct intel_engine_cs {
struct task_struct *signaler; /* used for fence signalling */
struct drm_i915_gem_request *first_signal;
struct timer_list fake_irq; /* used after a missed interrupt */
+ struct timer_list hangcheck; /* detect missed interrupts */
+
+ unsigned long timeout;
bool irq_enabled : 1;
bool rpm_wakelock : 1;
@@ -288,11 +292,14 @@ struct intel_engine_cs {
/* Execlists */
struct tasklet_struct irq_tasklet;
spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
+ struct execlist_port {
+ struct drm_i915_gem_request *request;
+ unsigned count;
+ } execlist_port[2];
struct list_head execlist_queue;
unsigned int fw_domains;
- unsigned int next_context_status_buffer;
- unsigned int idle_lite_restore_wa;
bool disable_lite_restore_wa;
+ bool preempt_wa;
u32 ctx_desc_template;
/**
@@ -319,10 +326,7 @@ struct intel_engine_cs {
struct intel_engine_hangcheck hangcheck;
- struct {
- struct drm_i915_gem_object *obj;
- u32 gtt_offset;
- } scratch;
+ struct i915_vma *scratch;
bool needs_cmd_parser;
@@ -363,6 +367,21 @@ intel_engine_flag(const struct intel_engine_cs *engine)
return 1 << engine->id;
}
+/**
+ * intel_engine_needs_cmd_parser() - should a given engine use software
+ * command parsing?
+ * @engine: the engine in question
+ *
+ * Only certain platforms require software batch buffer command parsing, and
+ * only when enabled via module parameter.
+ *
+ * Return: true if the engine requires software command parsing
+ */
+static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
+{
+ return engine->needs_cmd_parser;
+}
+
static inline u32
intel_engine_sync_index(struct intel_engine_cs *engine,
struct intel_engine_cs *other)
@@ -540,27 +559,33 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
{
- return READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
+ return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
}
static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
{
bool wakeup = false;
- struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
+
/* Note that for this not to dangerously chase a dangling pointer,
- * the caller is responsible for ensure that the task remain valid for
- * wake_up_process() i.e. that the RCU grace period cannot expire.
+ * we must hold the rcu_read_lock here.
*
* Also note that tsk is likely to be in !TASK_RUNNING state so an
* early test for tsk->state != TASK_RUNNING before wake_up_process()
* is unlikely to be beneficial.
*/
- if (tsk)
- wakeup = wake_up_process(tsk);
+ if (rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh)) {
+ struct task_struct *tsk;
+
+ rcu_read_lock();
+ tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
+ if (tsk)
+ wakeup = wake_up_process(tsk);
+ rcu_read_unlock();
+ }
+
return wakeup;
}
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
unsigned int intel_kick_waiters(struct drm_i915_private *i915);
unsigned int intel_kick_signalers(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 1c603bbe5784..ef8078bc15b1 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -2733,8 +2733,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
struct device *device = &dev->pdev->dev;
assert_rpm_wakelock_held(dev_priv);
- if (atomic_dec_and_test(&dev_priv->pm.wakeref_count))
- atomic_inc(&dev_priv->pm.atomic_seq);
+ atomic_dec(&dev_priv->pm.wakeref_count);
pm_runtime_mark_last_busy(device);
pm_runtime_put_autosuspend(device);
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 5beafd4bc1c1..010d69810657 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -477,8 +477,8 @@ vlv_update_plane(struct drm_plane *dplane,
I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w);
I915_WRITE(SPCNTR(pipe, plane), sprctl);
- I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
- sprsurf_offset);
+ I915_WRITE(SPSURF(pipe, plane),
+ i915_gem_object_ggtt_offset(obj, NULL) + sprsurf_offset);
POSTING_READ(SPSURF(pipe, plane));
}
@@ -617,7 +617,7 @@ ivb_update_plane(struct drm_plane *plane,
I915_WRITE(SPRSCALE(pipe), sprscale);
I915_WRITE(SPRCTL(pipe), sprctl);
I915_WRITE(SPRSURF(pipe),
- i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
+ i915_gem_object_ggtt_offset(obj, NULL) + sprsurf_offset);
POSTING_READ(SPRSURF(pipe));
}
@@ -746,7 +746,7 @@ ilk_update_plane(struct drm_plane *plane,
I915_WRITE(DVSSCALE(pipe), dvsscale);
I915_WRITE(DVSCNTR(pipe), dvscntr);
I915_WRITE(DVSSURF(pipe),
- i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
+ i915_gem_object_ggtt_offset(obj, NULL) + dvssurf_offset);
POSTING_READ(DVSSURF(pipe));
}
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 43f833901b8e..a6b04da4bf21 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1414,7 +1414,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
struct register_whitelist const *entry = whitelist;
unsigned size;
i915_reg_t offset_ldw, offset_udw;
- int i, ret = 0;
+ int i, ret;
for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) &&
@@ -1436,6 +1436,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
intel_runtime_pm_get(dev_priv);
+ ret = 0;
switch (size) {
case 8 | 1:
reg->val = I915_READ64_2x32(offset_ldw, offset_udw);
@@ -1454,10 +1455,9 @@ int i915_reg_read_ioctl(struct drm_device *dev,
break;
default:
ret = -EINVAL;
- goto out;
+ break;
}
-out:
intel_runtime_pm_put(dev_priv);
return ret;
}
diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 03defda77766..1622db24cd39 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c
@@ -109,8 +109,7 @@ static int sis_drm_alloc(struct drm_device *dev, struct drm_file *file,
if (pool == AGP_TYPE) {
retval = drm_mm_insert_node(&dev_priv->agp_mm,
&item->mm_node,
- mem->size, 0,
- DRM_MM_SEARCH_DEFAULT);
+ mem->size);
offset = item->mm_node.start;
} else {
#if defined(CONFIG_FB_SIS) || defined(CONFIG_FB_SIS_MODULE)
@@ -122,8 +121,7 @@ static int sis_drm_alloc(struct drm_device *dev, struct drm_file *file,
#else
retval = drm_mm_insert_node(&dev_priv->vram_mm,
&item->mm_node,
- mem->size, 0,
- DRM_MM_SEARCH_DEFAULT);
+ mem->size);
offset = item->mm_node.start;
#endif
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index aa0bd054d3e9..a3ddc95825f7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -54,9 +54,8 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
{
struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
struct drm_mm *mm = &rman->mm;
- struct drm_mm_node *node = NULL;
- enum drm_mm_search_flags sflags = DRM_MM_SEARCH_BEST;
- enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT;
+ struct drm_mm_node *node;
+ enum drm_mm_flags flags;
unsigned long lpfn;
int ret;
@@ -68,16 +67,14 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
if (!node)
return -ENOMEM;
- if (place->flags & TTM_PL_FLAG_TOPDOWN) {
- sflags = DRM_MM_SEARCH_BELOW;
- aflags = DRM_MM_CREATE_TOP;
- }
+ flags = 0;
+ if (place->flags & TTM_PL_FLAG_TOPDOWN)
+ flags = DRM_MM_INSERT_HIGH;
spin_lock(&rman->lock);
ret = drm_mm_insert_node_in_range_generic(mm, node, mem->num_pages,
mem->page_alignment, 0,
- place->fpfn, lpfn,
- sflags, aflags);
+ place->fpfn, lpfn, flags);
spin_unlock(&rman->lock);
if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/via/via_mm.c b/drivers/gpu/drm/via/via_mm.c
index a04ef1c992d9..4217d66a5cc6 100644
--- a/drivers/gpu/drm/via/via_mm.c
+++ b/drivers/gpu/drm/via/via_mm.c
@@ -140,11 +140,11 @@ int via_mem_alloc(struct drm_device *dev, void *data,
if (mem->type == VIA_MEM_AGP)
retval = drm_mm_insert_node(&dev_priv->agp_mm,
&item->mm_node,
- tmpSize, 0, DRM_MM_SEARCH_DEFAULT);
+ tmpSize);
else
retval = drm_mm_insert_node(&dev_priv->vram_mm,
&item->mm_node,
- tmpSize, 0, DRM_MM_SEARCH_DEFAULT);
+ tmpSize);
if (retval)
goto fail_alloc;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
index aa04fb0159a7..77cb7c627e09 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -673,16 +673,10 @@ static bool vmw_cmdbuf_try_alloc(struct vmw_cmdbuf_man *man,
memset(info->node, 0, sizeof(*info->node));
spin_lock_bh(&man->lock);
- ret = drm_mm_insert_node_generic(&man->mm, info->node, info->page_size,
- 0, 0,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
+ ret = drm_mm_insert_node(&man->mm, info->node, info->page_size);
if (ret) {
vmw_cmdbuf_man_process(man);
- ret = drm_mm_insert_node_generic(&man->mm, info->node,
- info->page_size, 0, 0,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
+ ret = drm_mm_insert_node(&man->mm, info->node, info->page_size);
}
spin_unlock_bh(&man->lock);
diff --git a/drivers/staging/android/sync_debug.c b/drivers/staging/android/sync_debug.c
index 4c5a85595a85..961a94bf156c 100644
--- a/drivers/staging/android/sync_debug.c
+++ b/drivers/staging/android/sync_debug.c
@@ -135,10 +135,16 @@ static void sync_print_sync_file(struct seq_file *s,
int i;
seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
- sync_status_str(atomic_read(&sync_file->status)));
+ sync_status_str(!fence_is_signaled(sync_file->fence)));
- for (i = 0; i < sync_file->num_fences; ++i)
- sync_print_fence(s, sync_file->cbs[i].fence, true);
+ if (fence_is_array(sync_file->fence)) {
+ struct fence_array *array = to_fence_array(sync_file->fence);
+
+ for (i = 0; i < array->num_fences; ++i)
+ sync_print_fence(s, array->fences[i], true);
+ } else {
+ sync_print_fence(s, sync_file->fence, true);
+ }
}
static int sync_debugfs_show(struct seq_file *s, void *unused)
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index fca1cd1b9c26..f06f6c53f476 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -127,21 +127,6 @@ struct drm_gem_object {
uint32_t write_domain;
/**
- * @pending_read_domains:
- *
- * While validating an exec operation, the
- * new read/write domain values are computed here.
- * They will be transferred to the above values
- * at the point that any cache flushing occurs
- */
- uint32_t pending_read_domains;
-
- /**
- * @pending_write_domain: Write domain similar to @pending_read_domains.
- */
- uint32_t pending_write_domain;
-
- /**
* @dma_buf:
*
* dma-buf associated with this GEM object.
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index fc65118e5077..6b4025833e6f 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -37,6 +37,7 @@
* Generic range manager structs
*/
#include <linux/bug.h>
+#include <linux/rbtree.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
@@ -44,33 +45,27 @@
#include <linux/seq_file.h>
#endif
-enum drm_mm_search_flags {
- DRM_MM_SEARCH_DEFAULT = 0,
- DRM_MM_SEARCH_BEST = 1 << 0,
- DRM_MM_SEARCH_BELOW = 1 << 1,
+enum drm_mm_flags {
+ DRM_MM_INSERT_BEST = 0,
+ DRM_MM_INSERT_LOW,
+ DRM_MM_INSERT_HIGH,
+ DRM_MM_INSERT_EVICT,
};
-enum drm_mm_allocator_flags {
- DRM_MM_CREATE_DEFAULT = 0,
- DRM_MM_CREATE_TOP = 1 << 0,
-};
-
-#define DRM_MM_BOTTOMUP DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT
-#define DRM_MM_TOPDOWN DRM_MM_SEARCH_BELOW, DRM_MM_CREATE_TOP
-
struct drm_mm_node {
+ struct drm_mm *mm;
struct list_head node_list;
struct list_head hole_stack;
- unsigned hole_follows : 1;
- unsigned scanned_block : 1;
- unsigned scanned_prev_free : 1;
- unsigned scanned_next_free : 1;
- unsigned scanned_preceeds_hole : 1;
- unsigned allocated : 1;
- unsigned long color;
+ struct rb_node rb;
+ struct rb_node rb_hole_size;
+ struct rb_node rb_hole_addr;
u64 start;
u64 size;
- struct drm_mm *mm;
+ u64 __subtree_last;
+ u64 hole_size;
+ unsigned long color;
+ unsigned allocated : 1;
+ unsigned scanned_block : 1;
};
struct drm_mm {
@@ -79,21 +74,30 @@ struct drm_mm {
/* head_node.node_list is the list of all memory nodes, ordered
* according to the (increasing) start address of the memory node. */
struct drm_mm_node head_node;
- unsigned int scan_check_range : 1;
- unsigned scan_alignment;
- unsigned long scan_color;
- u64 scan_size;
- u64 scan_hit_start;
- u64 scan_hit_end;
- unsigned scanned_blocks;
- u64 scan_start;
- u64 scan_end;
- struct drm_mm_node *prev_scanned_node;
+ /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */
+ struct rb_root interval_tree;
+ struct rb_root holes_size;
+ struct rb_root holes_addr;
void (*color_adjust)(struct drm_mm_node *node, unsigned long color,
u64 *start, u64 *end);
};
+struct drm_mm_scan {
+ struct drm_mm_node *prev_node;
+
+ u64 size;
+ u64 alignment;
+ u64 alignment_mask;
+ u64 start;
+ u64 end;
+ u64 hit_start;
+ u64 hit_end;
+
+ unsigned long color;
+ unsigned int flags;
+};
+
/**
* drm_mm_node_allocated - checks whether a node is allocated
* @node: drm_mm_node to check
@@ -142,7 +146,7 @@ static inline u64 __drm_mm_hole_node_start(struct drm_mm_node *hole_node)
*/
static inline u64 drm_mm_hole_node_start(struct drm_mm_node *hole_node)
{
- BUG_ON(!hole_node->hole_follows);
+ BUG_ON(!hole_node->hole_size);
return __drm_mm_hole_node_start(hole_node);
}
@@ -213,14 +217,39 @@ static inline u64 drm_mm_hole_node_end(struct drm_mm_node *hole_node)
* Basic range manager support (drm_mm.c)
*/
int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node);
+int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
+ struct drm_mm_node *node,
+ u64 size,
+ u64 alignment,
+ unsigned long color,
+ u64 start,
+ u64 end,
+ unsigned flags);
+
+/**
+ * drm_mm_insert_node_generic - search for space and insert @node
+ * @mm: drm_mm to allocate from
+ * @node: preallocate node to insert
+ * @size: size of the allocation
+ * @alignment: alignment of the allocation
+ * @color: opaque tag value to use for this node
+ * @flags: flags to fine-tune the allocation search and creation
+ *
+ * The preallocated node must be cleared to 0.
+ *
+ * Returns:
+ * 0 on success, -ENOSPC if there's no suitable hole.
+ */
+static inline int
+drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
+ u64 size, u64 alignment,
+ unsigned long color,
+ unsigned flags)
+{
+ return drm_mm_insert_node_in_range_generic(mm, node, size, alignment,
+ color, 0, ~0ull, flags);
+}
-int drm_mm_insert_node_generic(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- unsigned alignment,
- unsigned long color,
- enum drm_mm_search_flags sflags,
- enum drm_mm_allocator_flags aflags);
/**
* drm_mm_insert_node - search for space and insert @node
* @mm: drm_mm to allocate from
@@ -239,23 +268,11 @@ int drm_mm_insert_node_generic(struct drm_mm *mm,
*/
static inline int drm_mm_insert_node(struct drm_mm *mm,
struct drm_mm_node *node,
- u64 size,
- unsigned alignment,
- enum drm_mm_search_flags flags)
+ u64 size)
{
- return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags,
- DRM_MM_CREATE_DEFAULT);
+ return drm_mm_insert_node_generic(mm, node, size, 0, 0, 0);
}
-int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- unsigned alignment,
- unsigned long color,
- u64 start,
- u64 end,
- enum drm_mm_search_flags sflags,
- enum drm_mm_allocator_flags aflags);
/**
* drm_mm_insert_node_in_range - ranged search for space and insert @node
* @mm: drm_mm to allocate from
@@ -277,14 +294,13 @@ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
static inline int drm_mm_insert_node_in_range(struct drm_mm *mm,
struct drm_mm_node *node,
u64 size,
- unsigned alignment,
+ u64 alignment,
u64 start,
u64 end,
- enum drm_mm_search_flags flags)
+ unsigned flags)
{
return drm_mm_insert_node_in_range_generic(mm, node, size, alignment,
- 0, start, end, flags,
- DRM_MM_CREATE_DEFAULT);
+ 0, start, end, flags);
}
void drm_mm_remove_node(struct drm_mm_node *node);
@@ -293,20 +309,59 @@ void drm_mm_init(struct drm_mm *mm,
u64 start,
u64 size);
void drm_mm_takedown(struct drm_mm *mm);
-bool drm_mm_clean(struct drm_mm *mm);
-void drm_mm_init_scan(struct drm_mm *mm,
- u64 size,
- unsigned alignment,
- unsigned long color);
-void drm_mm_init_scan_with_range(struct drm_mm *mm,
- u64 size,
- unsigned alignment,
- unsigned long color,
- u64 start,
- u64 end);
-bool drm_mm_scan_add_block(struct drm_mm_node *node);
-bool drm_mm_scan_remove_block(struct drm_mm_node *node);
+/**
+ * drm_mm_clean - checks whether an allocator is clean
+ * @mm: drm_mm allocator to check
+ *
+ * Returns:
+ * True if the allocator is completely free, false if there's still a node
+ * allocated in it.
+ */
+static inline bool drm_mm_clean(struct drm_mm *mm)
+{
+ return list_empty(&mm->head_node.node_list);
+}
+
+struct drm_mm_node *
+drm_mm_interval_first(struct drm_mm *mm, u64 start, u64 last);
+
+struct drm_mm_node *
+drm_mm_interval_next(struct drm_mm_node *node, u64 start, u64 last);
+
+void drm_mm_init_scan_with_range(struct drm_mm_scan *scan,
+ u64 size, u64 alignment, unsigned long color,
+ u64 start, u64 end,
+ unsigned flags);
+
+/**
+ * drm_mm_init_scan - initialize lru scanning
+ * @mm: drm_mm to scan
+ * @size: size of the allocation
+ * @alignment: alignment of the allocation
+ * @color: opaque tag value to use for the allocation
+ * @flags: flags to specify how the allocation will be performed afterwards
+ *
+ * This simply sets up the scanning routines with the parameters for the desired
+ * hole.
+ *
+ * Warning:
+ * As long as the scan list is non-empty, no other operations than
+ * adding/removing nodes to/from the scan list are allowed.
+ */
+static inline void drm_mm_init_scan(struct drm_mm_scan *scan,
+ u64 size,
+ u64 alignment,
+ unsigned long color,
+ unsigned flags)
+{
+ return drm_mm_init_scan_with_range(scan, size, alignment, color,
+ 0, ~0ull, flags);
+}
+bool drm_mm_scan_add_block(struct drm_mm_scan *scan,
+ struct drm_mm_node *node);
+bool drm_mm_scan_remove_block(struct drm_mm_scan *scan,
+ struct drm_mm_node *node);
void drm_mm_debug_table(struct drm_mm *mm, const char *prefix);
#ifdef CONFIG_DEBUG_FS
diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h
index 06ea8e077ec2..afba6fcac853 100644
--- a/include/drm/drm_vma_manager.h
+++ b/include/drm/drm_vma_manager.h
@@ -40,13 +40,11 @@ struct drm_vma_offset_file {
struct drm_vma_offset_node {
rwlock_t vm_lock;
struct drm_mm_node vm_node;
- struct rb_node vm_rb;
struct rb_root vm_files;
};
struct drm_vma_offset_manager {
rwlock_t vm_lock;
- struct rb_root vm_addr_space_rb;
struct drm_mm vm_addr_space_mm;
};
diff --git a/include/linux/fence-array.h b/include/linux/fence-array.h
index 86baaa45567c..a44794e508df 100644
--- a/include/linux/fence-array.h
+++ b/include/linux/fence-array.h
@@ -52,6 +52,16 @@ struct fence_array {
extern const struct fence_ops fence_array_ops;
/**
+ * fence_is_array - check if a fence is from the array subsclass
+ *
+ * Return true if it is a fence_array and false otherwise.
+ */
+static inline bool fence_is_array(struct fence *fence)
+{
+ return fence->ops == &fence_array_ops;
+}
+
+/**
* to_fence_array - cast a fence to a fence_array
* @fence: fence to cast to a fence_array
*
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 523ea3fbbddd..798bde6fde74 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -34,6 +34,8 @@ struct fence;
struct fence_ops;
struct fence_cb;
+struct kfence;
+
/**
* struct fence - software synchronization primitive
* @refcount: refcount for this fence
@@ -377,4 +379,8 @@ u64 fence_context_alloc(unsigned num);
##args); \
} while (0)
+int kfence_await_dma_fence(struct kfence *fence,
+ struct fence *dma,
+ gfp_t gfp);
+
#endif /* __LINUX_FENCE_H */
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 645ad06b5d52..cdb8870ecf90 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -31,16 +31,16 @@
* See Documentation/io-mapping.txt
*/
-#ifdef CONFIG_HAVE_ATOMIC_IOMAP
-
-#include <asm/iomap.h>
-
struct io_mapping {
resource_size_t base;
unsigned long size;
pgprot_t prot;
+ void __iomem *iomem;
};
+#ifdef CONFIG_HAVE_ATOMIC_IOMAP
+
+#include <asm/iomap.h>
/*
* For small address space machines, mapping large objects
* into the kernel virtual space isn't practical. Where
@@ -49,34 +49,25 @@ struct io_mapping {
*/
static inline struct io_mapping *
-io_mapping_create_wc(resource_size_t base, unsigned long size)
+io_mapping_init_wc(struct io_mapping *iomap,
+ resource_size_t base,
+ unsigned long size)
{
- struct io_mapping *iomap;
pgprot_t prot;
- iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
- if (!iomap)
- goto out_err;
-
if (iomap_create_wc(base, size, &prot))
- goto out_free;
+ return NULL;
iomap->base = base;
iomap->size = size;
iomap->prot = prot;
return iomap;
-
-out_free:
- kfree(iomap);
-out_err:
- return NULL;
}
static inline void
-io_mapping_free(struct io_mapping *mapping)
+io_mapping_fini(struct io_mapping *mapping)
{
iomap_free(mapping->base, mapping->size);
- kfree(mapping);
}
/* Atomic map/unmap */
@@ -121,21 +112,40 @@ io_mapping_unmap(void __iomem *vaddr)
#else
#include <linux/uaccess.h>
-
-/* this struct isn't actually defined anywhere */
-struct io_mapping;
+#include <asm/pgtable_types.h>
/* Create the io_mapping object*/
static inline struct io_mapping *
-io_mapping_create_wc(resource_size_t base, unsigned long size)
+io_mapping_init_wc(struct io_mapping *iomap,
+ resource_size_t base,
+ unsigned long size)
+{
+ iomap->base = base;
+ iomap->size = size;
+ iomap->iomem = ioremap_wc(base, size);
+ iomap->prot = pgprot_writecombine(PAGE_KERNEL_IO);
+
+ return iomap;
+}
+
+static inline void
+io_mapping_fini(struct io_mapping *mapping)
+{
+ iounmap(mapping->iomem);
+}
+
+/* Non-atomic map/unmap */
+static inline void __iomem *
+io_mapping_map_wc(struct io_mapping *mapping,
+ unsigned long offset,
+ unsigned long size)
{
- return (struct io_mapping __force *) ioremap_wc(base, size);
+ return mapping->iomem + offset;
}
static inline void
-io_mapping_free(struct io_mapping *mapping)
+io_mapping_unmap(void __iomem *vaddr)
{
- iounmap((void __force __iomem *) mapping);
}
/* Atomic map/unmap */
@@ -145,30 +155,37 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
{
preempt_disable();
pagefault_disable();
- return ((char __force __iomem *) mapping) + offset;
+ return io_mapping_map_wc(mapping, offset, PAGE_SIZE);
}
static inline void
io_mapping_unmap_atomic(void __iomem *vaddr)
{
+ io_mapping_unmap(vaddr);
pagefault_enable();
preempt_enable();
}
-/* Non-atomic map/unmap */
-static inline void __iomem *
-io_mapping_map_wc(struct io_mapping *mapping,
- unsigned long offset,
- unsigned long size)
+#endif /* HAVE_ATOMIC_IOMAP */
+
+static inline struct io_mapping *
+io_mapping_create_wc(resource_size_t base,
+ unsigned long size)
{
- return ((char __force __iomem *) mapping) + offset;
+ struct io_mapping *iomap;
+
+ iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
+ if (!iomap)
+ return NULL;
+
+ return io_mapping_init_wc(iomap, base, size);
}
static inline void
-io_mapping_unmap(void __iomem *vaddr)
+io_mapping_free(struct io_mapping *iomap)
{
+ io_mapping_fini(iomap);
+ kfree(iomap);
}
-#endif /* HAVE_ATOMIC_IOMAP */
-
#endif /* _LINUX_IO_MAPPING_H */
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
new file mode 100644
index 000000000000..9b7dbd892218
--- /dev/null
+++ b/include/linux/kfence.h
@@ -0,0 +1,86 @@
+/*
+ * kfence.h - library routines for N:M synchronisation points
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifndef _KFENCE_H_
+#define _KFENCE_H_
+
+#include <linux/gfp.h>
+#include <linux/kref.h>
+#include <linux/notifier.h> /* for NOTIFY_DONE */
+#include <linux/wait.h>
+
+struct completion;
+struct fence;
+struct reservation_object;
+enum hrtimer_mode;
+
+/**
+ * struct kfence - used for tracking pending events and a set of listeners.
+ * @wait: a waitqueue of listeners (includes both kfences and tasks)
+ * @flags: a bitmask of interesting bits, mixed in with a notification function
+ * @kref: a reference counter
+ * @pending: an atomic counter of pending events
+ */
+struct kfence {
+ wait_queue_head_t wait;
+ unsigned long flags;
+ struct kref kref;
+ atomic_t pending;
+};
+
+#define KFENCE_CHECKED_BIT 0 /* used internally for DAG checking */
+#define KFENCE_PRIVATE_BIT 1 /* available for use by owner */
+#define KFENCE_MASK (~3)
+
+/**
+ * typedef kfence_notify_t - callback function type
+ *
+ * An optional callback to a fence may be provided that is called first
+ * when the fence is complete, and later when the fence is released. The
+ * callback must of &kfence_notify_t function type, and be aligned using
+ * __kfence_call function attribute.
+ */
+typedef int (*kfence_notify_t)(struct kfence *);
+#define __kfence_call __aligned(4)
+
+void kfence_init(struct kfence *fence, kfence_notify_t fn);
+
+struct kfence *kfence_get(struct kfence *fence);
+void kfence_put(struct kfence *fence);
+
+void kfence_await(struct kfence *fence);
+int kfence_await_kfence(struct kfence *fence,
+ struct kfence *after,
+ gfp_t gfp);
+int kfence_await_completion(struct kfence *fence,
+ struct completion *x,
+ gfp_t gfp);
+int kfence_await_hrtimer(struct kfence *fence,
+ clockid_t clock, enum hrtimer_mode mode,
+ ktime_t delay, u64 slack,
+ gfp_t gfp);
+void kfence_complete(struct kfence *fence);
+void kfence_wake_up_all(struct kfence *fence);
+void kfence_wait(struct kfence *fence);
+
+/**
+ * kfence_done - report when the fence has been passed
+ * @fence: the kfence to query
+ *
+ * kfence_done() reports true when the fence is no longer waiting for any
+ * events and has completed its fence-complete notification.
+ *
+ * Returns true when the fence has been passed, false otherwise.
+ */
+static inline bool kfence_done(const struct kfence *fence)
+{
+ return atomic_read(&fence->pending) < 0;
+}
+
+#endif /* _KFENCE_H_ */
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index b0f305e77b7f..1954bab95db9 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -49,6 +49,8 @@ extern struct ww_class reservation_ww_class;
extern struct lock_class_key reservation_seqcount_class;
extern const char reservation_seqcount_string[];
+struct kfence;
+
/**
* struct reservation_object_list - a list of shared fences
* @rcu: for internal use
@@ -210,4 +212,9 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
bool reservation_object_test_signaled_rcu(struct reservation_object *obj,
bool test_all);
+int kfence_await_reservation(struct kfence *fence,
+ struct reservation_object *resv,
+ bool write,
+ gfp_t gfp);
+
#endif /* _LINUX_RESERVATION_H */
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index c6ffe8b0725c..4ced13b503f8 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -19,12 +19,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/fence.h>
-
-struct sync_file_cb {
- struct fence_cb cb;
- struct fence *fence;
- struct sync_file *sync_file;
-};
+#include <linux/fence-array.h>
/**
* struct sync_file - sync file to export to the userspace
@@ -32,10 +27,10 @@ struct sync_file_cb {
* @kref: reference count on fence.
* @name: name of sync_file. Useful for debugging
* @sync_file_list: membership in global file list
- * @num_fences: number of sync_pts in the fence
* @wq: wait queue for fence signaling
- * @status: 0: signaled, >0:active, <0: error
- * @cbs: sync_pts callback information
+ * @enabled: wether fence signal is enabled or not
+ * @fence: fence with the fences in the sync_file
+ * @cb: fence callback information
*/
struct sync_file {
struct file *file;
@@ -44,14 +39,15 @@ struct sync_file {
#ifdef CONFIG_DEBUG_FS
struct list_head sync_file_list;
#endif
- int num_fences;
wait_queue_head_t wq;
- atomic_t status;
+ atomic_t enabled;
- struct sync_file_cb cbs[];
+ struct fence *fence;
+ struct fence_cb cb;
};
struct sync_file *sync_file_create(struct fence *fence);
+struct fence *sync_file_get_fence(int fd);
#endif /* _LINUX_SYNC_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 452629de7a57..d985abe01c19 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_OVERLAY_PUT_IMAGE 0x27
#define DRM_I915_OVERLAY_ATTRS 0x28
#define DRM_I915_GEM_EXECBUFFER2 0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2
#define DRM_I915_GET_SPRITE_COLORKEY 0x2a
#define DRM_I915_SET_SPRITE_COLORKEY 0x2b
#define DRM_I915_GEM_WAIT 0x2c
@@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -387,6 +389,9 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_EXEC_SOFTPIN 37
#define I915_PARAM_HAS_POOLED_EU 38
#define I915_PARAM_MIN_EU_IN_POOL 39
+#define I915_PARAM_HAS_EXEC_BATCH_FIRST 40
+#define I915_PARAM_HAS_EXEC_ASYNC 41
+#define I915_PARAM_HAS_EXEC_FENCE 42
typedef struct drm_i915_getparam {
__s32 param;
@@ -728,8 +733,9 @@ struct drm_i915_gem_exec_object2 {
#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
#define EXEC_OBJECT_PINNED (1<<4)
#define EXEC_OBJECT_PAD_TO_SIZE (1<<5)
+#define EXEC_OBJECT_ASYNC (1<<6)
/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
__u64 flags;
union {
@@ -819,7 +825,12 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_RESOURCE_STREAMER (1<<15)
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+#define I915_EXEC_BATCH_FIRST (1<<16)
+
+#define I915_EXEC_FENCE_IN (1<<17)
+#define I915_EXEC_FENCE_OUT (1<<18)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
diff --git a/kernel/Makefile b/kernel/Makefile
index e2ec54e2b952..ff11f31b7ec9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o \
extable.o params.o \
kthread.o sys_ni.o nsproxy.o \
notifier.o ksysfs.o cred.o reboot.o \
- async.o range.o smpboot.o
+ async.o kfence.o range.o smpboot.o
obj-$(CONFIG_MULTIUSER) += groups.o
diff --git a/kernel/kfence.c b/kernel/kfence.c
new file mode 100644
index 000000000000..4605eabc2c1b
--- /dev/null
+++ b/kernel/kfence.c
@@ -0,0 +1,497 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/fence.h>
+#include <linux/kfence.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+
+/**
+ * DOC: kfence overview
+ *
+ * kfences provide synchronisation barriers between multiple tasks. They are
+ * very similar to completions, or the OpenGL fence synchronisation object.
+ * Where kfences differ from completions is their ability to track multiple
+ * event sources rather than being a singular "completion event". Similar to
+ * completions multiple processes can wait upon a kfence. However, unlike
+ * completions, a kfence can wait upon other kfences allowing for a graph
+ * of interdependent events.
+ *
+ * Each kfence is a one-shot flag, signaling that work has progressed past
+ * a certain point (as measured by completion of all events the kfence is
+ * listening for) and the waiters upon that kfence may proceed.
+ *
+ * kfences provide both signaling and waiting routines:
+ *
+ * - kfence_await(): indicates that the kfence is asynchronously waiting for
+ * another event.
+ *
+ * - kfence_complete(): undoes the earlier await and marks the fence as done
+ * if all of its pending events have been completed.
+ *
+ * - kfence_done(): reports whether or not the kfence has been passed.
+ *
+ * - kfence_wait(): allows the caller to sleep (uninterruptibly) until the
+ * fence is passed.
+ *
+ * This interface is very similar to completions, with the exception of
+ * allowing the fence to await multiple events. kfences can wait upon other
+ * fences or other hardware events, building an ordered dependency graph:
+ *
+ * - kfence_await_kfence(): the kfence asynchronously waits upon completion
+ * of another kfence
+ *
+ * - kfence_await_completion(): the kfence asynchronously waits upon a
+ * completion
+ *
+ * - kfence_await_hrtimer(): the kfence asynchronously wait for an expiration
+ * of a timer
+ *
+ * - kfence_await_dma_fence(): the kfence asynchronously waits for a DMA
+ * (hardware signaled) fence
+ *
+ * - kfence_await_reservation(): the kfence asynchronously waits for a DMA
+ * reservation object
+ *
+ * A kfence is initialised using kfence_init(), and starts off awaiting an
+ * event. Once you have finished setting up the fence, including adding
+ * all of its asynchronous waits, call kfence_complete().
+ *
+ * Unlike completions, kfences are expected to live inside more complex graphs
+ * and form the basis for parallel execution of interdependent tasks and so are
+ * reference counted. Use kfence_get() and kfence_put() to acquire or release
+ * a reference to the kfence respectively.
+ *
+ * The kfence can be embedded inside a larger structure and be used as part
+ * of its event driven mechanism. As such kfence_init() can be passed a
+ * callback function that will be called first when the kfence is completed,
+ * and again when the kfence is to be freed. If no callback is provided, the
+ * kfence will be freed using kfree() when its reference count hits zero -
+ * if it is embedded inside another structure and no callback is provided,
+ * it must be the first member of its parent struct.
+ *
+ * The fence-completed notification is called before any listeners upon the
+ * fence are signaled, or any waiters woken. You can defer their wake up by
+ * returning NOTIFY_OK from the fence-completed notification and calling
+ * kfence_wake_up_all() later when ready.
+ */
+
+static DEFINE_SPINLOCK(kfence_lock);
+
+static int __kfence_notify(struct kfence *fence)
+{
+ kfence_notify_t fn;
+
+ fn = (kfence_notify_t)(fence->flags & KFENCE_MASK);
+ return fn(fence);
+}
+
+static void kfence_free(struct kref *kref)
+{
+ struct kfence *fence = container_of(kref, typeof(*fence), kref);
+
+ WARN_ON(atomic_read(&fence->pending) > 0);
+
+ if (fence->flags & KFENCE_MASK)
+ WARN_ON(__kfence_notify(fence) != NOTIFY_DONE);
+ else
+ kfree(fence);
+}
+
+/**
+ * kfence_put - release a reference to a kfence
+ * @fence: the kfence being disposed of
+ *
+ * kfence_put() decrements the reference count on the @fence, and when
+ * it hits zero the fence will be freed.
+ */
+void kfence_put(struct kfence *fence)
+{
+ kref_put(&fence->kref, kfence_free);
+}
+EXPORT_SYMBOL_GPL(kfence_put);
+
+/**
+ * kfence_get - acquire a reference to a kfence
+ * @fence: the kfence being used
+ *
+ * Returns the pointer to the kfence, with its reference count incremented.
+ */
+struct kfence *kfence_get(struct kfence *fence)
+{
+ kref_get(&fence->kref);
+ return fence;
+}
+EXPORT_SYMBOL_GPL(kfence_get);
+
+static void __kfence_wake_up_all(struct kfence *fence,
+ struct list_head *continuation)
+{
+ wait_queue_head_t *x = &fence->wait;
+ unsigned long flags;
+
+ atomic_dec(&fence->pending);
+
+ /*
+ * To prevent unbounded recursion as we traverse the graph of kfences,
+ * we move the task_list from this the next ready fence to the tail of
+ * the original fence's task_list (and so added to the list to be
+ * woken).
+ */
+ smp_mb__before_spinlock();
+ spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
+ if (continuation) {
+ list_splice_tail_init(&x->task_list, continuation);
+ } else {
+ while (!list_empty(&x->task_list))
+ __wake_up_locked_key(x, TASK_NORMAL, &x->task_list);
+ }
+ spin_unlock_irqrestore(&x->lock, flags);
+}
+
+/**
+ * kfence_wake_up_all - wake all waiters upon a fence
+ * @fence: the kfence to signal
+ *
+ * If the fence-complete notification is deferred, when the callback is
+ * complete it should call kfence_wake_up_all() to wake up all waiters
+ * upon the fence.
+ *
+ * It is invalid to call kfence_wake_up_all() at any time other than from
+ * inside a deferred fence-complete notification.
+ */
+void kfence_wake_up_all(struct kfence *fence)
+{
+ WARN_ON(atomic_read(&fence->pending) != 0);
+ __kfence_wake_up_all(fence, NULL);
+}
+
+static void __kfence_complete(struct kfence *fence,
+ struct list_head *continuation)
+{
+ if (!atomic_dec_and_test(&fence->pending))
+ return;
+
+ if (fence->flags & KFENCE_MASK && __kfence_notify(fence) != NOTIFY_DONE)
+ return;
+
+ __kfence_wake_up_all(fence, continuation);
+}
+
+/**
+ * kfence_await - increment the count of events being asynchronously waited upon
+ * @fence: the kfence
+ *
+ * kfence_await() indicates that the @fence is waiting upon the completion
+ * of an event. The @fence may wait upon multiple events concurrently.
+ * When that event is complete, a corresponding call to kfence_complete()
+ * should be made.
+ */
+void kfence_await(struct kfence *fence)
+{
+ WARN_ON(atomic_inc_return(&fence->pending) <= 1);
+}
+EXPORT_SYMBOL_GPL(kfence_await);
+
+/**
+ * kfence_complete - decrement the count of events waited upon
+ * @fence: the kfence
+ *
+ * When all event sources for the @fence are completed, i.e. the event count
+ * hits zero, all waiters upon the @fence are woken up.
+ */
+void kfence_complete(struct kfence *fence)
+{
+ if (WARN_ON(kfence_done(fence)))
+ return;
+
+ __kfence_complete(fence, NULL);
+}
+EXPORT_SYMBOL_GPL(kfence_complete);
+
+/**
+ * kfence_wait - wait upon a fence to be completed
+ * @fence: the kfence to wait upon
+ *
+ * Blocks (uninterruptibly waits) until the @fence event counter reaches zero
+ * and then also waits for the fence-completed notification to finish.
+ */
+void kfence_wait(struct kfence *fence)
+{
+ wait_event(fence->wait, kfence_done(fence));
+}
+EXPORT_SYMBOL_GPL(kfence_wait);
+
+/**
+ * kfence_init - initialize a fence for embedded use within a struct
+ * @fence: this kfence
+ * @fn: a callback function for when the fence is complete, and when the
+ * fence is released
+ *
+ * This function initialises the @fence for use embedded within a parent
+ * structure. The optional @fn hook is first called when the fence is completed
+ * (when all its pending event count hits 0) and again when the fence is
+ * to be freed. Note that the @fn will be called from atomic context. The @fn
+ * is stored inside the fence mixed with some flags, and so the @fn must
+ * be aligned using the __kfence_call function attribute.
+ *
+ * If the @fn is not provided, the kfence must be the first member in its
+ * parent struct as it will be freed using kfree().
+ *
+ * fence-complete notification: @fn will be called when the pending event
+ * count hits 0, however the fence is not completed unless the callback
+ * returns NOTIFY_DONE. During this notification callback fence_done() reports
+ * false. You can suspend completion of the fence by returning
+ * NOTIFY_OK instead and then later calling kfence_wake_up_all().
+ *
+ * fence-release notification: @fn will be called when the reference count
+ * hits 0, fence_done() will report true.
+ */
+void kfence_init(struct kfence *fence, kfence_notify_t fn)
+{
+ BUG_ON((unsigned long)fn & ~KFENCE_MASK);
+
+ init_waitqueue_head(&fence->wait);
+ kref_init(&fence->kref);
+ atomic_set(&fence->pending, 1);
+ fence->flags = (unsigned long)fn;
+}
+EXPORT_SYMBOL_GPL(kfence_init);
+
+static int kfence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+{
+ list_del(&wq->task_list);
+ __kfence_complete(wq->private, key);
+ kfence_put(wq->private);
+ kfree(wq);
+ return 0;
+}
+
+static bool __kfence_check_if_after(struct kfence *fence,
+ const struct kfence * const signaler)
+{
+ wait_queue_t *wq;
+
+ if (__test_and_set_bit(KFENCE_CHECKED_BIT, &fence->flags))
+ return false;
+
+ if (fence == signaler)
+ return true;
+
+ list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+ if (wq->func != kfence_wake)
+ continue;
+
+ if (__kfence_check_if_after(wq->private, signaler))
+ return true;
+ }
+
+ return false;
+}
+
+static void __kfence_clear_checked_bit(struct kfence *fence)
+{
+ wait_queue_t *wq;
+
+ if (!__test_and_clear_bit(KFENCE_CHECKED_BIT, &fence->flags))
+ return;
+
+ list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+ if (wq->func != kfence_wake)
+ continue;
+
+ __kfence_clear_checked_bit(wq->private);
+ }
+}
+
+static bool kfence_check_if_after(struct kfence *fence,
+ const struct kfence * const signaler)
+{
+ unsigned long flags;
+ bool err;
+
+ if (!config_enabled(CONFIG_KFENCE_CHECK_DAG))
+ return false;
+
+ spin_lock_irqsave(&kfence_lock, flags);
+ err = __kfence_check_if_after(fence, signaler);
+ __kfence_clear_checked_bit(fence);
+ spin_unlock_irqrestore(&kfence_lock, flags);
+
+ return err;
+}
+
+static wait_queue_t *__kfence_create_wq(struct kfence *fence, gfp_t gfp)
+{
+ wait_queue_t *wq;
+
+ wq = kmalloc(sizeof(*wq), gfp);
+ if (unlikely(!wq))
+ return NULL;
+
+ INIT_LIST_HEAD(&wq->task_list);
+ wq->flags = 0;
+ wq->func = kfence_wake;
+ wq->private = kfence_get(fence);
+
+ kfence_await(fence);
+
+ return wq;
+}
+
+/**
+ * kfence_await_kfence - set one fence to wait upon another
+ * @fence: this kfence
+ * @signaler: target kfence to wait upon
+ * @gfp: the allowed allocation mask
+ *
+ * kfence_await_kfence() causes the @fence to asynchronously wait upon the
+ * completion of @signaler.
+ *
+ * Returns 1 if the @fence was added to the waitqueue of @signaler, 0
+ * if @signaler was already complete, or a negative error code.
+ */
+int kfence_await_kfence(struct kfence *fence,
+ struct kfence *signaler,
+ gfp_t gfp)
+{
+ wait_queue_t *wq;
+ unsigned long flags;
+ int pending;
+
+ if (kfence_done(signaler))
+ return 0;
+
+ /* The dependency graph must be acyclic. */
+ if (unlikely(kfence_check_if_after(fence, signaler)))
+ return -EINVAL;
+
+ wq = __kfence_create_wq(fence, gfp);
+ if (unlikely(!wq)) {
+ if (!gfpflags_allow_blocking(gfp))
+ return -ENOMEM;
+
+ kfence_wait(signaler);
+ return 0;
+ }
+
+ spin_lock_irqsave(&signaler->wait.lock, flags);
+ if (likely(!kfence_done(signaler))) {
+ __add_wait_queue_tail(&signaler->wait, wq);
+ pending = 1;
+ } else {
+ kfence_wake(wq, 0, 0, NULL);
+ pending = 0;
+ }
+ spin_unlock_irqrestore(&signaler->wait.lock, flags);
+
+ return pending;
+}
+EXPORT_SYMBOL_GPL(kfence_await_kfence);
+
+/**
+ * kfence_await_completion - set the fence to wait upon a completion
+ * @fence: this kfence
+ * @x: target completion to wait upon
+ * @gfp: the allowed allocation mask
+ *
+ * kfence_await_completion() causes the @fence to asynchronously wait upon
+ * the completion.
+ *
+ * Returns 1 if the @fence was added to the waitqueue of @x, 0
+ * if @x was already complete, or a negative error code.
+ */
+int kfence_await_completion(struct kfence *fence,
+ struct completion *x,
+ gfp_t gfp)
+{
+ wait_queue_t *wq;
+ unsigned long flags;
+ int pending;
+
+ if (completion_done(x))
+ return 0;
+
+ wq = __kfence_create_wq(fence, gfp);
+ if (unlikely(!wq)) {
+ if (!gfpflags_allow_blocking(gfp))
+ return -ENOMEM;
+
+ wait_for_completion(x);
+ return 0;
+ }
+
+ spin_lock_irqsave(&x->wait.lock, flags);
+ if (likely(!READ_ONCE(x->done))) {
+ __add_wait_queue_tail(&x->wait, wq);
+ pending = 1;
+ } else {
+ kfence_wake(wq, 0, 0, NULL);
+ pending = 0;
+ }
+ spin_unlock_irqrestore(&x->wait.lock, flags);
+
+ return pending;
+}
+EXPORT_SYMBOL_GPL(kfence_await_completion);
+
+struct timer_cb {
+ struct hrtimer timer;
+ struct kfence *fence;
+};
+
+static enum hrtimer_restart
+timer_kfence_wake(struct hrtimer *timer)
+{
+ struct timer_cb *cb = container_of(timer, typeof(*cb), timer);
+
+ kfence_complete(cb->fence);
+ kfence_put(cb->fence);
+ kfree(cb);
+
+ return HRTIMER_NORESTART;
+}
+
+/**
+ * kfence_await_hrtimer - set the fence to wait for a period of time
+ * @fence: this kfence
+ * @clock: which clock to program
+ * @mode: delay given as relative or absolute
+ * @delay: how long or until what time to wait
+ * @slack: how much slack that may be applied to the delay
+ *
+ * kfence_await_hrtimer() causes the @fence to wait for a a period of time, or
+ * until a certain point in time. It is a convenience wrapper around
+ * hrtimer_start_range_ns(). For more details on @clock, @mode, @delay and
+ * @slack please consult the hrtimer documentation.
+ *
+ * Returns 1 if the delay was sucessfuly added to the @fence, or a negative
+ * error code on failure.
+ */
+int kfence_await_hrtimer(struct kfence *fence,
+ clockid_t clock, enum hrtimer_mode mode,
+ ktime_t delay, u64 slack,
+ gfp_t gfp)
+{
+ struct timer_cb *cb;
+
+ cb = kmalloc(sizeof(*cb), gfp);
+ if (!cb)
+ return -ENOMEM;
+
+ cb->fence = kfence_get(fence);
+ kfence_await(fence);
+
+ hrtimer_init(&cb->timer, clock, mode);
+ cb->timer.function = timer_kfence_wake;
+
+ hrtimer_start_range_ns(&cb->timer, delay, slack, mode);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(kfence_await_hrtimer);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index eb8917a71489..d8297d313a2c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1733,6 +1733,29 @@ config KPROBES_SANITY_TEST
Say N if you are unsure.
+config KFENCE_SELFTEST
+ tristate "Kfence self tests"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ This option provides a kernel modules that can be used to test
+ the kfence handling. This option is not useful for distributions
+ or general kernels, but only for kernel developers working on the
+ kfence and async_domain facility.
+
+ Say N if you are unsure.
+
+config KFENCE_CHECK_DAG
+ bool "Check that kfence are only used with directed acyclic graphs"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ This option enforces that kfences are only used with directed acyclic
+ graphs (DAG), as otherwise the cycles in the graph means that they
+ will never be signaled (or the corresponding task executed).
+
+ Say N if you are unsure.
+
config BACKTRACE_SELF_TEST
tristate "Self test for the backtrace code"
depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index cfa68eb269e4..ca9ce8e700eb 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,6 +25,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
+obj-$(CONFIG_KFENCE_SELFTEST) += test-kfence.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
lib-$(CONFIG_HAS_DMA) += dma-noop.o
diff --git a/lib/test-kfence.c b/lib/test-kfence.c
new file mode 100644
index 000000000000..1b0853fda7c3
--- /dev/null
+++ b/lib/test-kfence.c
@@ -0,0 +1,580 @@
+/*
+ * Test cases for kfence facility.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/kfence.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+static struct kfence *alloc_kfence(void)
+{
+ struct kfence *fence;
+
+ fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return NULL;
+
+ kfence_init(fence, NULL);
+ return fence;
+}
+
+static int __init __test_self(struct kfence *fence)
+{
+ if (kfence_done(fence))
+ return -EINVAL;
+
+ kfence_complete(fence);
+ if (!kfence_done(fence))
+ return -EINVAL;
+
+ kfence_wait(fence);
+ if (!kfence_done(fence))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __init test_self(void)
+{
+ struct kfence *fence;
+ int ret;
+
+ /* Test kfence signaling and completion testing */
+ pr_debug("%s\n", __func__);
+
+ fence = alloc_kfence();
+ if (!fence)
+ return -ENOMEM;
+
+ ret = __test_self(fence);
+
+ kfence_put(fence);
+ return ret;
+}
+
+struct test_stack {
+ struct kfence fence;
+ bool seen;
+};
+
+static int __init __kfence_call fence_callback(struct kfence *fence)
+{
+ container_of(fence, typeof(struct test_stack), fence)->seen = true;
+ return NOTIFY_DONE;
+}
+
+static int __init test_stack(void)
+{
+ struct test_stack ts;
+ int ret;
+
+ /* Test kfence signaling and completion testing (on stack) */
+ pr_debug("%s\n", __func__);
+
+ ts.seen = false;
+ kfence_init(&ts.fence, fence_callback);
+
+ ret = __test_self(&ts.fence);
+ if (ret < 0)
+ return ret;
+
+ if (!ts.seen) {
+ pr_err("fence callback not executed\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __init test_dag(void)
+{
+ struct kfence *A, *B, *C;
+
+ /* Test detection of cycles within the kfence graphs */
+ pr_debug("%s\n", __func__);
+
+ if (!config_enabled(CONFIG_KFENCE_CHECK_DAG))
+ return 0;
+
+ A = alloc_kfence();
+ if (kfence_await_kfence(A, A, GFP_KERNEL) != -EINVAL) {
+ pr_err("recursive cycle not detected (AA)\n");
+ return -EINVAL;
+ }
+
+ B = alloc_kfence();
+
+ kfence_await_kfence(A, B, GFP_KERNEL);
+ if (kfence_await_kfence(B, A, GFP_KERNEL) != -EINVAL) {
+ pr_err("single depth cycle not detected (BAB)\n");
+ return -EINVAL;
+ }
+
+ C = alloc_kfence();
+ kfence_await_kfence(B, C, GFP_KERNEL);
+ if (kfence_await_kfence(C, A, GFP_KERNEL) != -EINVAL) {
+ pr_err("cycle not detected (BA, CB, AC)\n");
+ return -EINVAL;
+ }
+
+ kfence_complete(A);
+ kfence_put(A);
+
+ kfence_complete(B);
+ kfence_put(B);
+
+ kfence_complete(C);
+ kfence_put(C);
+
+ return 0;
+}
+
+static int __init test_AB(void)
+{
+ struct kfence *A, *B;
+ int ret;
+
+ /* Test kfence (A) waiting on an event source (B) */
+ pr_debug("%s\n", __func__);
+
+ A = alloc_kfence();
+ B = alloc_kfence();
+ if (!A || !B)
+ return -ENOMEM;
+
+ ret = kfence_await_kfence(A, B, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(A);
+ if (kfence_done(A))
+ return -EINVAL;
+
+ kfence_complete(B);
+ if (!kfence_done(B))
+ return -EINVAL;
+
+ if (!kfence_done(A))
+ return -EINVAL;
+
+ kfence_put(B);
+ kfence_put(A);
+ return 0;
+}
+
+static int __init test_ABC(void)
+{
+ struct kfence *A, *B, *C;
+ int ret;
+
+ /* Test a chain of fences, A waits on B who waits on C */
+ pr_debug("%s\n", __func__);
+
+ A = alloc_kfence();
+ B = alloc_kfence();
+ C = alloc_kfence();
+ if (!A || !B || !C)
+ return -ENOMEM;
+
+ ret = kfence_await_kfence(A, B, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ ret = kfence_await_kfence(B, C, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(A);
+ if (kfence_done(A))
+ return -EINVAL;
+
+ kfence_complete(B);
+ if (kfence_done(B))
+ return -EINVAL;
+
+ if (kfence_done(A))
+ return -EINVAL;
+
+ kfence_complete(C);
+ if (!kfence_done(C))
+ return -EINVAL;
+
+ if (!kfence_done(B))
+ return -EINVAL;
+
+ if (!kfence_done(A))
+ return -EINVAL;
+
+ kfence_put(C);
+ kfence_put(B);
+ kfence_put(A);
+ return 0;
+}
+
+static int __init test_AB_C(void)
+{
+ struct kfence *A, *B, *C;
+ int ret;
+
+ /* Test multiple fences (AB) waiting on a single event (C) */
+ pr_debug("%s\n", __func__);
+
+ A = alloc_kfence();
+ B = alloc_kfence();
+ C = alloc_kfence();
+ if (!A || !B || !C)
+ return -ENOMEM;
+
+ ret = kfence_await_kfence(A, C, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ ret = kfence_await_kfence(B, C, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(A);
+ kfence_complete(B);
+
+ if (kfence_done(A))
+ return -EINVAL;
+
+ if (kfence_done(B))
+ return -EINVAL;
+
+ kfence_complete(C);
+ if (!kfence_done(C))
+ return -EINVAL;
+
+ if (!kfence_done(B))
+ return -EINVAL;
+
+ if (!kfence_done(A))
+ return -EINVAL;
+
+ kfence_put(C);
+ kfence_put(B);
+ kfence_put(A);
+ return 0;
+}
+
+static int __init test_C_AB(void)
+{
+ struct kfence *A, *B, *C;
+ int ret;
+
+ /* Test multiple event sources (A,B) for a single fence (C) */
+ pr_debug("%s\n", __func__);
+
+ A = alloc_kfence();
+ B = alloc_kfence();
+ C = alloc_kfence();
+ if (!A || !B || !C)
+ return -ENOMEM;
+
+ ret = kfence_await_kfence(C, A, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ ret = kfence_await_kfence(C, B, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(C);
+ if (kfence_done(C))
+ return -EINVAL;
+
+ kfence_complete(A);
+ kfence_complete(B);
+
+ if (!kfence_done(A))
+ return -EINVAL;
+
+ if (!kfence_done(B))
+ return -EINVAL;
+
+ if (!kfence_done(C))
+ return -EINVAL;
+
+ kfence_put(C);
+ kfence_put(B);
+ kfence_put(A);
+ return 0;
+}
+
+static int __init test_completion(void)
+{
+ struct kfence *fence;
+ struct completion x;
+ int ret;
+
+ /* Test use of a completion as an event source for kfences */
+ pr_debug("%s\n", __func__);
+
+ init_completion(&x);
+
+ fence = alloc_kfence();
+ if (!fence)
+ return -ENOMEM;
+
+ ret = kfence_await_completion(fence, &x, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(fence);
+ if (kfence_done(fence))
+ return -EINVAL;
+
+ complete_all(&x);
+ if (!kfence_done(fence))
+ return -EINVAL;
+
+ kfence_put(fence);
+ return 0;
+}
+
+static int __init test_delay(void)
+{
+ struct kfence *fence;
+ ktime_t delay;
+ int ret;
+
+ /* Test use of a hrtimer as an event source for kfences */
+ pr_debug("%s\n", __func__);
+
+ fence = alloc_kfence();
+ if (!fence)
+ return -ENOMEM;
+
+ delay = ktime_get();
+
+ ret = kfence_await_hrtimer(fence, CLOCK_MONOTONIC, HRTIMER_MODE_REL,
+ ms_to_ktime(1), 1 << 10,
+ GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ kfence_complete(fence);
+ kfence_wait(fence);
+
+ delay = ktime_sub(ktime_get(), delay);
+ kfence_put(fence);
+
+ if (!ktime_to_ms(delay)) {
+ pr_err("kfence woke too early, delay was only %lldns\n",
+ (long long)ktime_to_ns(delay));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct task_ipc {
+ struct work_struct work;
+ struct completion started;
+ struct kfence *in, *out;
+ int value;
+};
+
+static void __init task_ipc(struct work_struct *work)
+{
+ struct task_ipc *ipc = container_of(work, typeof(*ipc), work);
+
+ complete(&ipc->started);
+
+ kfence_wait(ipc->in);
+ smp_store_mb(ipc->value, 1);
+ kfence_complete(ipc->out);
+}
+
+static int __init test_chain(void)
+{
+ const int nfences = 4096;
+ struct kfence **fences;
+ int ret, i;
+
+ /* Test a long chain of fences */
+ pr_debug("%s\n", __func__);
+
+ fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL);
+ if (!fences)
+ return -ENOMEM;
+
+ for (i = 0; i < nfences; i++) {
+ fences[i] = alloc_kfence();
+ if (!fences[i])
+ return -ENOMEM;
+
+ if (i > 0) {
+ ret = kfence_await_kfence(fences[i],
+ fences[i - 1],
+ GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ for (i = nfences; --i; ) {
+ kfence_complete(fences[i]);
+ if (kfence_done(fences[i]))
+ return -EINVAL;
+ }
+
+ kfence_complete(fences[0]);
+ for (i = 0; i < nfences; i++) {
+ if (!kfence_done(fences[i]))
+ return -EINVAL;
+
+ kfence_put(fences[i]);
+ }
+ kfree(fences);
+ return 0;
+}
+
+static int __init test_ipc(void)
+{
+ struct task_ipc ipc;
+ int ret = 0;
+
+ /* Test use of kfence as an interprocess signaling mechanism */
+ pr_debug("%s\n", __func__);
+
+ ipc.in = alloc_kfence();
+ ipc.out = alloc_kfence();
+ if (!ipc.in || !ipc.out)
+ return -ENOMEM;
+
+ /* use a completion to avoid chicken-and-egg testing for kfence */
+ init_completion(&ipc.started);
+
+ ipc.value = 0;
+ INIT_WORK(&ipc.work, task_ipc);
+ schedule_work(&ipc.work);
+
+ wait_for_completion(&ipc.started);
+
+ usleep_range(1000, 2000);
+ if (READ_ONCE(ipc.value)) {
+ pr_err("worker updated value before kfence was signaled\n");
+ ret = -EINVAL;
+ }
+
+ kfence_complete(ipc.in);
+ kfence_wait(ipc.out);
+
+ if (!READ_ONCE(ipc.value)) {
+ pr_err("worker signaled kfence before value was posted\n");
+ ret = -EINVAL;
+ }
+
+ flush_work(&ipc.work);
+ kfence_put(ipc.in);
+ kfence_put(ipc.out);
+ return ret;
+}
+
+static int __init test_kfence_init(void)
+{
+ int ret;
+
+ pr_info("Testing kfences\n");
+
+ ret = test_self();
+ if (ret < 0) {
+ pr_err("self failed\n");
+ return ret;
+ }
+
+ ret = test_stack();
+ if (ret < 0) {
+ pr_err("stack failed\n");
+ return ret;
+ }
+
+ ret = test_dag();
+ if (ret < 0) {
+ pr_err("DAG checker failed\n");
+ return ret;
+ }
+
+ ret = test_AB();
+ if (ret < 0) {
+ pr_err("AB failed\n");
+ return ret;
+ }
+
+ ret = test_ABC();
+ if (ret < 0) {
+ pr_err("ABC failed\n");
+ return ret;
+ }
+
+ ret = test_AB_C();
+ if (ret < 0) {
+ pr_err("AB_C failed\n");
+ return ret;
+ }
+
+ ret = test_C_AB();
+ if (ret < 0) {
+ pr_err("C_AB failed\n");
+ return ret;
+ }
+
+ ret = test_chain();
+ if (ret < 0) {
+ pr_err("chain failed\n");
+ return ret;
+ }
+
+ ret = test_ipc();
+ if (ret < 0) {
+ pr_err("ipc failed\n");
+ return ret;
+ }
+
+ ret = test_completion();
+ if (ret < 0) {
+ pr_err("completion failed\n");
+ return ret;
+ }
+
+ ret = test_delay();
+ if (ret < 0) {
+ pr_err("delay failed\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit test_kfence_cleanup(void)
+{
+}
+
+module_init(test_kfence_init);
+module_exit(test_kfence_cleanup);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/lib/kfence.sh b/tools/testing/selftests/lib/kfence.sh
new file mode 100755
index 000000000000..487320c70ed1
--- /dev/null
+++ b/tools/testing/selftests/lib/kfence.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# Runs infrastructure tests using test-kfence kernel module
+
+if /sbin/modprobe -q test-kfence; then
+ /sbin/modprobe -q -r test-kfence
+ echo "kfence: ok"
+else
+ echo "kfence: [FAIL]"
+ exit 1
+fi
--
2.8.1
More information about the Intel-gfx-trybot
mailing list