<div dir="ltr">Both of these patches need I915_PARAM_HAS_USERPTR_WHATEVER queries.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Nov 8, 2017 at 10:51 AM, Chris Wilson <span dir="ltr"><<a href="mailto:chris@chris-wilson.co.uk" target="_blank">chris@chris-wilson.co.uk</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Acquiring the backing struct pages for the userptr range is not free;<br>
the first client for userptr would insist on frequently creating userptr<br>
objects ahead of time and not use them. For that first client, deferring<br>
the cost of populating the userptr (calling get_user_pages()) to the<br>
actual execbuf was a substantial improvement. However, not all clients<br>
are the same, and most would like to validate that the userptr is valid<br>
and backed by struct pages upon creation, so offer a<br>
I915_USERPTR_POPULATE flag to do just that.<br>
<br>
Note that big difference between I915_USERPTR_POPULATE and the deferred<br>
scheme is that POPULATE is guaranteed to be synchronous, the result is<br>
known before the ioctl returns (and the handle exposed). However, due to<br>
system memory pressure, the object may be paged out before use,<br>
requiring them to be paged back in on execbuf (as may always happen).<br>
<br>
Testcase: igt/gem_userptr_blits/populate<br>
Signed-off-by: Chris Wilson <<a href="mailto:chris@chris-wilson.co.uk">chris@chris-wilson.co.uk</a>><br>
Cc: Tvrtko Ursulin <<a href="mailto:tvrtko.ursulin@intel.com">tvrtko.ursulin@intel.com</a>><br>
Cc: Michał Winiarski <<a href="mailto:michal.winiarski@intel.com">michal.winiarski@intel.com</a>><br>
Cc: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br>
---<br>
drivers/gpu/drm/i915/i915_gem_<wbr>userptr.c | 63 ++++++++++++++++++++++++++++++<wbr>++-<br>
include/uapi/drm/i915_drm.h | 1 +<br>
2 files changed, 63 insertions(+), 1 deletion(-)<br>
<br>
diff --git a/drivers/gpu/drm/i915/i915_<wbr>gem_userptr.c b/drivers/gpu/drm/i915/i915_<wbr>gem_userptr.c<br>
index dbc5818dd28b..f05f1322ec27 100644<br>
--- a/drivers/gpu/drm/i915/i915_<wbr>gem_userptr.c<br>
+++ b/drivers/gpu/drm/i915/i915_<wbr>gem_userptr.c<br>
@@ -746,6 +746,64 @@ probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len)<br>
return ret;<br>
}<br>
<br>
+static int populate(struct drm_i915_gem_object *obj)<br>
+{<br>
+ const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;<br>
+ const unsigned long addr = obj->userptr.ptr;<br>
+ struct sg_table *pages;<br>
+ struct page **pvec;<br>
+ int pinned;<br>
+ int ret = 0;<br>
+<br>
+ pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL);<br>
+ if (!pvec)<br>
+ return -ENOMEM;<br>
+<br>
+ pinned = __get_user_pages_fast(addr, num_pages,<br>
+ !obj->userptr.read_only,<br>
+ pvec);<br>
+ if (pinned < 0) {<br>
+ ret = pinned;<br>
+ goto err;<br>
+ }<br>
+<br>
+ if (pinned < num_pages) {<br>
+ unsigned int flags;<br>
+<br>
+ flags = 0;<br>
+ if (!obj->userptr.read_only)<br>
+ flags |= FOLL_WRITE;<br>
+<br>
+ down_read(¤t->mm->mmap_<wbr>sem);<br>
+ do {<br>
+ ret = get_user_pages(addr + pinned * PAGE_SIZE,<br>
+ num_pages - pinned,<br>
+ flags,<br>
+ pvec + pinned, NULL);<br>
+ if (ret < 0)<br>
+ break;<br>
+<br>
+ pinned += ret;<br>
+ } while (pinned < num_pages);<br>
+ up_read(¤t->mm->mmap_<wbr>sem);<br>
+ if (ret < 0)<br>
+ goto err;<br>
+ }<br>
+<br>
+ mutex_lock(&obj->mm.lock);<br>
+ pages = __i915_gem_userptr_alloc_<wbr>pages(obj, pvec, num_pages);<br>
+ mutex_unlock(&obj->mm.lock);<br>
+ if (!IS_ERR(pages))<br>
+ pinned = 0;<br>
+ else<br>
+ ret = PTR_ERR(pages);<br>
+<br>
+err:<br>
+ release_pages(pvec, pinned, 0);<br>
+ kvfree(pvec);<br>
+ return ret;<br>
+}<br>
+<br>
/**<br>
* Creates a new mm object that wraps some normal memory from the process<br>
* context - user memory.<br>
@@ -799,6 +857,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file<br>
<br>
if (args->flags & ~(I915_USERPTR_READ_ONLY |<br>
I915_USERPTR_PROBE |<br>
+ I915_USERPTR_POPULATE |<br>
I915_USERPTR_UNSYNCHRONIZED))<br>
return -EINVAL;<br>
<br>
@@ -816,7 +875,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file<br>
return -ENODEV;<br>
}<br>
<br>
- if (args->flags & I915_USERPTR_PROBE) {<br>
+ if (args->flags & (I915_USERPTR_PROBE | I915_USERPTR_POPULATE)) {<br>
/*<br>
* Check that the range pointed to represents real struct<br>
* pages and not iomappings (at this moment in time!)<br>
@@ -846,6 +905,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file<br>
ret = i915_gem_userptr_init__mm_<wbr>struct(obj);<br>
if (ret == 0)<br>
ret = i915_gem_userptr_init__mmu_<wbr>notifier(obj, args->flags);<br>
+ if (ret == 0 && args->flags & I915_USERPTR_POPULATE)<br>
+ ret = populate(obj);<br>
if (ret == 0)<br>
ret = drm_gem_handle_create(file, &obj->base, &handle);<br>
<br>
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h<br>
index 392ede2ca63e..500c408c912a 100644<br>
--- a/include/uapi/drm/i915_drm.h<br>
+++ b/include/uapi/drm/i915_drm.h<br>
@@ -1354,6 +1354,7 @@ struct drm_i915_gem_userptr {<br>
__u32 flags;<br>
#define I915_USERPTR_READ_ONLY 0x1<br>
#define I915_USERPTR_PROBE 0x2<br>
+#define I915_USERPTR_POPULATE 0x4<br>
#define I915_USERPTR_UNSYNCHRONIZED 0x80000000<br>
/**<br>
* Returned handle for the object.<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.15.0<br>
<br>
</font></span></blockquote></div><br></div>