[Intel-gfx] [PATCH 02/21] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
Chris Wilson
chris at chris-wilson.co.uk
Sat Apr 16 11:17:26 CEST 2011
Replace the three nearly identical copies of the code with a single
function. And take advantage of the opportunity to do some
micro-optimisation: avoid the vmalloc if at all possible and also avoid
dropping the lock unless we are forced to acquire the mm semaphore.
Measuring the impact of the optimisations, it turns out they are not so
micro after all. Running x11perf -aa10text on PineView:
before 1.28 Mglyph/sec
after 1.45 Mglyph/sec
(Glyphs, in general and on PineView in particular, are one of the very
few pwrite rate-limited workloads.)
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
Reviewed-by: Ben Widawsky <ben at bwidawsk.net>
Reviewed-by: Paul Menzel <paulepanter at users.sourceforge.net>
---
drivers/gpu/drm/i915/i915_gem.c | 149 ++++++++++++++++++++++++---------------
1 files changed, 92 insertions(+), 57 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 57bc775..140d324 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -257,6 +257,73 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
obj->tiling_mode != I915_TILING_NONE;
}
+/**
+ * Magically retrieves the pages for the user addr whilst holding the
+ * dev->struct_mutex.
+ *
+ * Since we can not take the mm semaphore whilst holding our dev->struct_mutex,
+ * due to the pre-existing lock dependency established by i915_gem_fault(),
+ * we have to perform some sleight-of-hand.
+ *
+ * First, we try the lockless variant of get_user_pages(),
+ * __get_user_pages_fast(), whilst continuing to hold the mutex. If that
+ * fails to get all the user pages, then we no have choice but to acquire
+ * the mm semaphore, thus dropping the lock on dev->struct_mutex to do so.
+ * The dev->struct_mutex is then re-acquired before we return.
+ *
+ * Returns: an error code *and* the number of user pages acquired. Even
+ * on an error, you must iterate over the returned pages and release them.
+ */
+static int
+i915_gem_get_user_pages(struct drm_device *dev,
+ unsigned long addr,
+ bool write,
+ int *num_pages,
+ struct page ***pages_out)
+{
+ struct page **pages;
+ int pinned, ret;
+ int n = *num_pages;
+
+ pages = kmalloc(n*sizeof(struct page *),
+ GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+ if (pages == NULL) {
+ pages = drm_malloc_ab(n, sizeof(struct page *));
+ if (pages == NULL) {
+ *pages_out = NULL;
+ *num_pages = 0;
+ return -ENOMEM;
+ }
+ }
+
+ pinned = __get_user_pages_fast(addr, n, write, pages);
+ if (pinned < n) {
+ struct mm_struct *mm = current->mm;
+
+ mutex_unlock(&dev->struct_mutex);
+ down_read(&mm->mmap_sem);
+ ret = get_user_pages(current, mm,
+ addr + (pinned << PAGE_SHIFT),
+ n - pinned,
+ write, 0,
+ pages + pinned,
+ NULL);
+ up_read(&mm->mmap_sem);
+ mutex_lock(&dev->struct_mutex);
+ if (ret > 0)
+ pinned += ret;
+ }
+
+ ret = 0;
+ if (pinned < n)
+ ret = -EFAULT;
+
+ *num_pages = pinned;
+ *pages_out = pages;
+ return ret;
+}
+
+
static inline void
slow_shmem_copy(struct page *dst_page,
int dst_offset,
@@ -398,11 +465,11 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
struct drm_file *file)
{
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
- struct mm_struct *mm = current->mm;
struct page **user_pages;
ssize_t remain;
- loff_t offset, pinned_pages, i;
- loff_t first_data_page, last_data_page, num_pages;
+ loff_t offset;
+ loff_t first_data_page, last_data_page;
+ int num_pages, i;
int shmem_page_offset;
int data_page_index, data_page_offset;
int page_length;
@@ -420,20 +487,10 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
num_pages = last_data_page - first_data_page + 1;
- user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
- if (user_pages == NULL)
- return -ENOMEM;
-
- mutex_unlock(&dev->struct_mutex);
- down_read(&mm->mmap_sem);
- pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
- num_pages, 1, 0, user_pages, NULL);
- up_read(&mm->mmap_sem);
- mutex_lock(&dev->struct_mutex);
- if (pinned_pages < num_pages) {
- ret = -EFAULT;
+ ret = i915_gem_get_user_pages(dev, data_ptr, true,
+ &num_pages, &user_pages);
+ if (ret)
goto out;
- }
ret = i915_gem_object_set_cpu_read_domain_range(obj,
args->offset,
@@ -494,7 +551,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
}
out:
- for (i = 0; i < pinned_pages; i++) {
+ for (i = 0; i < num_pages; i++) {
SetPageDirty(user_pages[i]);
mark_page_accessed(user_pages[i]);
page_cache_release(user_pages[i]);
@@ -679,10 +736,9 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
drm_i915_private_t *dev_priv = dev->dev_private;
ssize_t remain;
loff_t gtt_page_base, offset;
- loff_t first_data_page, last_data_page, num_pages;
- loff_t pinned_pages, i;
+ loff_t first_data_page, last_data_page;
+ int num_pages, i;
struct page **user_pages;
- struct mm_struct *mm = current->mm;
int gtt_page_offset, data_page_offset, data_page_index, page_length;
int ret;
uint64_t data_ptr = args->data_ptr;
@@ -697,28 +753,18 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
num_pages = last_data_page - first_data_page + 1;
- user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
- if (user_pages == NULL)
- return -ENOMEM;
-
- mutex_unlock(&dev->struct_mutex);
- down_read(&mm->mmap_sem);
- pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
- num_pages, 0, 0, user_pages, NULL);
- up_read(&mm->mmap_sem);
- mutex_lock(&dev->struct_mutex);
- if (pinned_pages < num_pages) {
- ret = -EFAULT;
- goto out_unpin_pages;
- }
+ ret = i915_gem_get_user_pages(dev, data_ptr, false,
+ &num_pages, &user_pages);
+ if (ret)
+ goto out;
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
- goto out_unpin_pages;
+ goto out;
ret = i915_gem_object_put_fence(obj);
if (ret)
- goto out_unpin_pages;
+ goto out;
offset = obj->gtt_offset + args->offset;
@@ -753,8 +799,8 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
data_ptr += page_length;
}
-out_unpin_pages:
- for (i = 0; i < pinned_pages; i++)
+out:
+ for (i = 0; i < num_pages; i++)
page_cache_release(user_pages[i]);
drm_free_large(user_pages);
@@ -803,11 +849,11 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev,
if (IS_ERR(page))
return PTR_ERR(page);
- vaddr = kmap_atomic(page, KM_USER0);
+ vaddr = kmap_atomic(page);
ret = __copy_from_user_inatomic(vaddr + page_offset,
user_data,
page_length);
- kunmap_atomic(vaddr, KM_USER0);
+ kunmap_atomic(vaddr);
set_page_dirty(page);
mark_page_accessed(page);
@@ -842,11 +888,10 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
struct drm_file *file)
{
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
- struct mm_struct *mm = current->mm;
struct page **user_pages;
ssize_t remain;
- loff_t offset, pinned_pages, i;
- loff_t first_data_page, last_data_page, num_pages;
+ loff_t first_data_page, last_data_page, offset;
+ int num_pages, i;
int shmem_page_offset;
int data_page_index, data_page_offset;
int page_length;
@@ -864,20 +909,10 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
num_pages = last_data_page - first_data_page + 1;
- user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
- if (user_pages == NULL)
- return -ENOMEM;
-
- mutex_unlock(&dev->struct_mutex);
- down_read(&mm->mmap_sem);
- pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
- num_pages, 0, 0, user_pages, NULL);
- up_read(&mm->mmap_sem);
- mutex_lock(&dev->struct_mutex);
- if (pinned_pages < num_pages) {
- ret = -EFAULT;
+ ret = i915_gem_get_user_pages(dev, data_ptr, false,
+ &num_pages, &user_pages);
+ if (ret)
goto out;
- }
ret = i915_gem_object_set_to_cpu_domain(obj, 1);
if (ret)
@@ -940,7 +975,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
}
out:
- for (i = 0; i < pinned_pages; i++)
+ for (i = 0; i < num_pages; i++)
page_cache_release(user_pages[i]);
drm_free_large(user_pages);
--
1.7.4.1
More information about the Intel-gfx
mailing list