[Intel-gfx] [PATCH 08/11] drm/i915: Support for pread/pwrite from/to non shmem backed objects
ankitprasad.r.sharma at intel.com
ankitprasad.r.sharma at intel.com
Wed Jan 13 22:16:44 PST 2016
From: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
This patch adds support for extending the pread/pwrite functionality
for objects not backed by shmem. The access will be made through
gtt interface. This will cover objects backed by stolen memory as well
as other non-shmem backed objects.
v2: Drop locks around slow_user_access, prefault the pages before
access (Chris)
v3: Rebased to the latest drm-intel-nightly (Ankit)
v4: Moved page base & offset calculations outside the copy loop,
corrected data types for size and offset variables, corrected if-else
braces format (Tvrtko/kerneldocs)
v5: Enabled pread/pwrite for all non-shmem backed objects including
without tiling restrictions (Ankit)
v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy,
added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
v8: Updated v7 commit message, mutex unlock around pwrite slow path for
non-shmem backed objects (Tvrtko)
v9: Corrected check during pread_ioctl, to avoid shmem_pread being
called for non-shmem backed objects (Tvrtko)
v10: Moved the write_domain check to needs_clflush and tiling mode check
to pwrite_fast (Chris)
v11: Use pwrite_fast fallback for all objects (shmem and non-shmem backed),
call fast_user_write regardless of pagefault in previous iteration
Testcase: igt/gem_stolen
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 155 +++++++++++++++++++++++++++++++++-------
1 file changed, 129 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7642b1b..ab1d043 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -55,6 +55,9 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
+ if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return false;
+
if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
return true;
@@ -646,6 +649,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
return ret ? - EFAULT : 0;
}
+static inline uint64_t
+slow_user_access(struct io_mapping *mapping,
+ uint64_t page_base, int page_offset,
+ char __user *user_data,
+ int length, bool pwrite)
+{
+ void __iomem *vaddr_inatomic;
+ void *vaddr;
+ uint64_t unwritten;
+
+ vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
+ /* We can use the cpu mem copy function because this is X86. */
+ vaddr = (void __force *)vaddr_inatomic + page_offset;
+ if (pwrite)
+ unwritten = __copy_from_user(vaddr, user_data, length);
+ else
+ unwritten = __copy_to_user(user_data, vaddr, length);
+
+ io_mapping_unmap(vaddr_inatomic);
+ return unwritten;
+}
+
+static int
+i915_gem_gtt_copy(struct drm_device *dev,
+ struct drm_i915_gem_object *obj, uint64_t size,
+ uint64_t data_offset, uint64_t data_ptr)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ char __user *user_data;
+ uint64_t remain;
+ uint64_t offset, page_base;
+ int page_offset, page_length, ret = 0;
+
+ ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
+ if (ret)
+ goto out;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, false);
+ if (ret)
+ goto out_unpin;
+
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ goto out_unpin;
+
+ user_data = to_user_ptr(data_ptr);
+ remain = size;
+ offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
+
+ mutex_unlock(&dev->struct_mutex);
+ if (likely(!i915.prefault_disable))
+ ret = fault_in_multipages_writeable(user_data, remain);
+
+ /*
+ * page_offset = offset within page
+ * page_base = page offset within aperture
+ */
+ page_offset = offset_in_page(offset);
+ page_base = offset & PAGE_MASK;
+
+ while (remain > 0) {
+ /* page_length = bytes to copy for this page */
+ page_length = remain;
+ if ((page_offset + remain) > PAGE_SIZE)
+ page_length = PAGE_SIZE - page_offset;
+
+ /* This is a slow read/write as it tries to read from
+ * and write to user memory which may result into page
+ * faults
+ */
+ ret = slow_user_access(dev_priv->gtt.mappable, page_base,
+ page_offset, user_data,
+ page_length, false);
+
+ if (ret) {
+ ret = -EFAULT;
+ break;
+ }
+
+ remain -= page_length;
+ user_data += page_length;
+ page_base += page_length;
+ page_offset = 0;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+
+out_unpin:
+ i915_gem_object_ggtt_unpin(obj);
+out:
+ return ret;
+}
+
static int
i915_gem_shmem_pread(struct drm_device *dev,
struct drm_i915_gem_object *obj,
@@ -769,17 +865,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pread(obj, args->offset, args->size);
- ret = i915_gem_shmem_pread(dev, obj, args, file);
+ /* pread for non shmem backed objects */
+ if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
+ ret = i915_gem_gtt_copy(dev, obj, args->size,
+ args->offset, args->data_ptr);
+ else if (obj->base.filp)
+ ret = i915_gem_shmem_pread(dev, obj, args, file);
out:
drm_gem_object_unreference(&obj->base);
@@ -821,10 +914,15 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
struct drm_i915_gem_pwrite *args,
struct drm_file *file)
{
+ struct drm_device *dev = obj->base.dev;
struct drm_mm_node node;
uint64_t remain, offset;
char __user *user_data;
int ret;
+ bool hit_slow_path = false;
+
+ if (obj->tiling_mode != I915_TILING_NONE)
+ return -EFAULT;
ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
if (ret) {
@@ -884,11 +982,23 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
/* If we get a fault while copying data, then (presumably) our
* source page isn't available. Return the error and we'll
* retry in the slow path.
+ * If the object is non-shmem backed, we retry again with the
+ * path that handles page fault.
*/
if (fast_user_write(i915->gtt.mappable, page_base,
page_offset, user_data, page_length)) {
- ret = -EFAULT;
- goto out_flush;
+ hit_slow_path = true;
+ mutex_unlock(&dev->struct_mutex);
+ if (slow_user_access(i915->gtt.mappable,
+ page_base,
+ page_offset, user_data,
+ page_length, true)) {
+ ret = -EFAULT;
+ mutex_lock(&dev->struct_mutex);
+ goto out_flush;
+ }
+
+ mutex_lock(&dev->struct_mutex);
}
remain -= page_length;
@@ -897,6 +1007,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
}
out_flush:
+ if (hit_slow_path)
+ ret = i915_gem_object_set_to_gtt_domain(obj, true);
+
intel_fb_obj_flush(obj, false, ORIGIN_GTT);
out_unpin:
if (node.allocated) {
@@ -1155,14 +1268,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
ret = -EFAULT;
@@ -1172,9 +1277,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* pread/pwrite currently are reading and writing from the CPU
* perspective, requiring manual detiling by the client.
*/
- if (obj->tiling_mode == I915_TILING_NONE &&
- obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
- cpu_write_needs_clflush(obj)) {
+ if (!obj->base.filp || cpu_write_needs_clflush(obj)) {
ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
@@ -1184,8 +1287,10 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret == -EFAULT || ret == -ENOSPC) {
if (obj->phys_handle)
ret = i915_gem_phys_pwrite(obj, args, file);
- else
+ else if (obj->base.filp)
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+ else
+ ret = -ENODEV;
}
out:
@@ -3952,9 +4057,7 @@ out:
* object is now coherent at its new cache level (with respect
* to the access domain).
*/
- if (obj->cache_dirty &&
- obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
- cpu_write_needs_clflush(obj)) {
+ if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
if (i915_gem_clflush_object(obj, true))
i915_gem_chipset_flush(obj->base.dev);
}
--
1.9.1
More information about the Intel-gfx
mailing list