[Intel-gfx] [PATCH v2] drm/i915: Pre-allocation of shmem pages of a GEM object
akash.goel at intel.com
akash.goel at intel.com
Mon May 5 06:25:29 CEST 2014
From: Akash Goel <akash.goel at intel.com>
This patch could help to reduce the time, 'struct_mutex' is kept
locked during either the exec-buffer path or Page fault
handling path as now the backing pages are requested from shmem layer
without holding the 'struct_mutex'.
v2: Fixed the merge issue, due to which 'exec_lock' mutex was not released.
Signed-off-by: Akash Goel <akash.goel at intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 9 +++-
drivers/gpu/drm/i915/i915_dma.c | 1 +
drivers/gpu/drm/i915/i915_drv.h | 5 ++
drivers/gpu/drm/i915/i915_gem.c | 75 ++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 87 ++++++++++++++++++++++++++----
drivers/gpu/drm/i915/i915_trace.h | 35 ++++++++++++
6 files changed, 200 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 18b3565..70c752b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -247,10 +247,16 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
LIST_HEAD(stolen);
int count, ret;
- ret = mutex_lock_interruptible(&dev->struct_mutex);
+ ret = mutex_lock_interruptible(&dev_priv->exec_lock);
if (ret)
return ret;
+ ret = mutex_lock_interruptible(&dev->struct_mutex);
+ if (ret) {
+ mutex_unlock(&dev_priv->exec_lock);
+ return ret;
+ }
+
total_obj_size = total_gtt_size = count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (obj->stolen == NULL)
@@ -281,6 +287,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
list_del_init(&obj->obj_exec_link);
}
mutex_unlock(&dev->struct_mutex);
+ mutex_unlock(&dev_priv->exec_lock);
seq_printf(m, "Total %d objects, %zu bytes, %zu GTT size\n",
count, total_obj_size, total_gtt_size);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e393a14..e4d1cb0 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
dev_priv->ring_index = 0;
mutex_init(&dev_priv->dpio_lock);
mutex_init(&dev_priv->modeset_restore_lock);
+ mutex_init(&dev_priv->exec_lock);
intel_pm_setup(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5f4f631..6dc579a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1475,6 +1475,8 @@ struct drm_i915_private {
struct i915_ums_state ums;
/* the indicator for dispatch video commands on two BSD rings */
int ring_index;
+ /* for concurrent execbuffer protection */
+ struct mutex exec_lock;
};
static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -2116,6 +2118,9 @@ int i915_gem_dumb_create(struct drm_file *file_priv,
struct drm_mode_create_dumb *args);
int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
uint32_t handle, uint64_t *offset);
+void
+i915_gem_object_shmem_preallocate(struct drm_i915_gem_object *obj);
+
/**
* Returns true if seq1 is later than seq2.
*/
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dae51c3..b19ccb8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1408,6 +1408,8 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
PAGE_SHIFT;
+ i915_gem_object_shmem_preallocate(obj);
+
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto out;
@@ -1873,6 +1875,79 @@ i915_gem_shrink_all(struct drm_i915_private *dev_priv)
return __i915_gem_shrink(dev_priv, LONG_MAX, false);
}
+void
+i915_gem_object_shmem_preallocate(struct drm_i915_gem_object *obj)
+{
+ int page_count, i;
+ struct address_space *mapping;
+ struct page *page;
+ gfp_t gfp;
+
+ if (obj->pages)
+ return;
+
+ if (obj->madv != I915_MADV_WILLNEED) {
+ DRM_ERROR("Attempting to preallocate a purgeable object\n");
+ return;
+ }
+
+ if (obj->base.filp) {
+ int ret;
+ struct inode *inode = file_inode(obj->base.filp);
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ if (!inode)
+ return;
+ /* The alloced field stores how many data pages are allocated
+ * to the file. If already shmem space has been allocated for
+ * the object, then we can simply return */
+ spin_lock(&info->lock);
+ ret = info->alloced;
+ spin_unlock(&info->lock);
+ if (ret > 0) {
+ DRM_DEBUG_DRIVER("Already shmem space alloced for obj %p, %d pages\n",
+ obj, ret);
+ return;
+ }
+ } else
+ return;
+
+ BUG_ON(obj->pages_pin_count);
+
+ /* Assert that the object is not currently in any GPU domain. As it
+ * wasn't in the GTT, there shouldn't be any way it could have been in
+ * a GPU cache
+ */
+ BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
+ BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
+
+ trace_i915_gem_obj_prealloc_start(obj, obj->base.size);
+
+ page_count = obj->base.size / PAGE_SIZE;
+
+ /* Get the list of pages out of our struct file
+ * Fail silently without starting the shrinker
+ */
+ mapping = file_inode(obj->base.filp)->i_mapping;
+ gfp = mapping_gfp_mask(mapping);
+ gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
+ gfp &= ~(__GFP_IO | __GFP_WAIT);
+ for (i = 0; i < page_count; i++) {
+ page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+ if (IS_ERR(page)) {
+ DRM_DEBUG_DRIVER("Failure for obj(%p), size(%x) at page(%d)\n",
+ obj, obj->base.size, i);
+ break;
+ }
+ /* Decrement the extra ref count on the returned page,
+ otherwise when 'get_pages_gtt' will be called later on
+ in the regular path, it will also increment the ref count,
+ which will disturb the ref count management */
+ page_cache_release(page);
+ }
+
+ trace_i915_gem_obj_prealloc_end(obj);
+}
+
static int
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
{
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6cc004f..da3cbdc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -38,6 +38,7 @@
struct eb_vmas {
struct list_head vmas;
+ struct list_head objects;
int and;
union {
struct i915_vma *lut[0];
@@ -93,10 +94,9 @@ eb_lookup_vmas(struct eb_vmas *eb,
{
struct drm_i915_private *dev_priv = vm->dev->dev_private;
struct drm_i915_gem_object *obj;
- struct list_head objects;
int i, ret;
- INIT_LIST_HEAD(&objects);
+ INIT_LIST_HEAD(&eb->objects);
spin_lock(&file->table_lock);
/* Grab a reference to the object and release the lock so we can lookup
* or create the VMA without using GFP_ATOMIC */
@@ -119,12 +119,12 @@ eb_lookup_vmas(struct eb_vmas *eb,
}
drm_gem_object_reference(&obj->base);
- list_add_tail(&obj->obj_exec_link, &objects);
+ list_add_tail(&obj->obj_exec_link, &eb->objects);
}
spin_unlock(&file->table_lock);
i = 0;
- while (!list_empty(&objects)) {
+ while (!list_empty(&eb->objects)) {
struct i915_vma *vma;
struct i915_address_space *bind_vm = vm;
@@ -141,7 +141,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
(i == (args->buffer_count - 1))))
bind_vm = &dev_priv->gtt.base;
- obj = list_first_entry(&objects,
+ obj = list_first_entry(&eb->objects,
struct drm_i915_gem_object,
obj_exec_link);
@@ -162,7 +162,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
/* Transfer ownership from the objects list to the vmas list. */
list_add_tail(&vma->exec_list, &eb->vmas);
- list_del_init(&obj->obj_exec_link);
vma->exec_entry = &exec[i];
if (eb->and < 0) {
@@ -180,8 +179,8 @@ eb_lookup_vmas(struct eb_vmas *eb,
err:
- while (!list_empty(&objects)) {
- obj = list_first_entry(&objects,
+ while (!list_empty(&eb->objects)) {
+ obj = list_first_entry(&eb->objects,
struct drm_i915_gem_object,
obj_exec_link);
list_del_init(&obj->obj_exec_link);
@@ -249,6 +248,15 @@ static void eb_destroy(struct eb_vmas *eb)
i915_gem_execbuffer_unreserve_vma(vma);
drm_gem_object_unreference(&vma->obj->base);
}
+
+ while (!list_empty(&eb->objects)) {
+ struct drm_i915_gem_object *obj;
+ obj = list_first_entry(&eb->objects,
+ struct drm_i915_gem_object,
+ obj_exec_link);
+ list_del_init(&obj->obj_exec_link);
+ }
+
kfree(eb);
}
@@ -712,6 +720,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
struct eb_vmas *eb,
struct drm_i915_gem_exec_object2 *exec)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_relocation_entry *reloc;
struct i915_address_space *vm;
struct i915_vma *vma;
@@ -786,12 +795,24 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
total += exec[i].relocation_count;
}
+ /* First acquire the 'exec_lock' mutex to prevent the concurrent
+ * access to the 'obj_exec_link' field of the objects, by the
+ * preallocation routine from the context of a new execbuffer ioctl */
+ ret = mutex_lock_interruptible(&dev_priv->exec_lock);
+ if (ret) {
+ mutex_lock(&dev->struct_mutex);
+ goto err;
+ }
+
ret = i915_mutex_lock_interruptible(dev);
if (ret) {
mutex_lock(&dev->struct_mutex);
goto err;
}
+ /* Now release the 'exec_lock' after acquiring the 'struct mutex' */
+ mutex_unlock(&dev_priv->exec_lock);
+
/* reacquire the objects */
eb_reset(eb);
ret = eb_lookup_vmas(eb, exec, args, vm, file);
@@ -856,6 +877,21 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
return intel_ring_invalidate_all_caches(ring);
}
+static void
+i915_gem_execbuffer_preallocate_objs(struct list_head *objects)
+{
+ struct drm_i915_gem_object *obj;
+
+ /* Try to get the obj pages atomically */
+ while (!list_empty(objects)) {
+ obj = list_first_entry(objects,
+ struct drm_i915_gem_object,
+ obj_exec_link);
+ i915_gem_object_shmem_preallocate(obj);
+ list_del_init(&obj->obj_exec_link);
+ }
+}
+
static bool
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
{
@@ -1173,12 +1209,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
intel_runtime_pm_get(dev_priv);
- ret = i915_mutex_lock_interruptible(dev);
+ ret = mutex_lock_interruptible(&dev_priv->exec_lock);
if (ret)
goto pre_mutex_err;
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret) {
+ mutex_unlock(&dev_priv->exec_lock);
+ goto pre_mutex_err;
+ }
+
if (dev_priv->ums.mm_suspended) {
mutex_unlock(&dev->struct_mutex);
+ mutex_unlock(&dev_priv->exec_lock);
ret = -EBUSY;
goto pre_mutex_err;
}
@@ -1200,14 +1243,36 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (eb == NULL) {
i915_gem_context_unreference(ctx);
mutex_unlock(&dev->struct_mutex);
+ mutex_unlock(&dev_priv->exec_lock);
ret = -ENOMEM;
goto pre_mutex_err;
}
/* Look up object handles */
ret = eb_lookup_vmas(eb, exec, args, vm, file);
- if (ret)
- goto err;
+ if (ret) {
+ eb_destroy(eb);
+ i915_gem_context_unreference(ctx);
+ mutex_unlock(&dev->struct_mutex);
+ mutex_unlock(&dev_priv->exec_lock);
+ goto pre_mutex_err;
+ }
+
+ /*
+ * Release the 'struct_mutex' before extracting the backing
+ * pages of the objects, so as to allow other ioctls to get serviced,
+ * while backing pages are being allocated (which is generally
+ * the most time consuming phase). The 'exec_lock' mutex will provide
+ * the protection meanwhile.
+ */
+ mutex_unlock(&dev->struct_mutex);
+
+ i915_gem_execbuffer_preallocate_objs(&eb->objects);
+
+ /* Reacquire the 'struct_mutex' post preallocation */
+ ret = i915_mutex_lock_interruptible(dev);
+
+ mutex_unlock(&dev_priv->exec_lock);
/* take note of the batch buffer before we might reorder the lists */
batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index b29d7b1..21bf10d 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -245,6 +245,41 @@ TRACE_EVENT(i915_gem_object_fault,
__entry->write ? ", writable" : "")
);
+TRACE_EVENT(i915_gem_obj_prealloc_start,
+ TP_PROTO(struct drm_i915_gem_object *obj, u32 size),
+ TP_ARGS(obj, size),
+
+ TP_STRUCT__entry(
+ __field(struct drm_i915_gem_object *, obj)
+ __field(u32, size)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ __entry->size = size;
+ ),
+
+ TP_printk("obj=%p, size=%x",
+ __entry->obj,
+ __entry->size)
+);
+
+TRACE_EVENT(i915_gem_obj_prealloc_end,
+ TP_PROTO(struct drm_i915_gem_object *obj),
+ TP_ARGS(obj),
+
+ TP_STRUCT__entry(
+ __field(struct drm_i915_gem_object *, obj)
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj;
+ ),
+
+ TP_printk("obj=%p",
+ __entry->obj)
+);
+
DECLARE_EVENT_CLASS(i915_gem_object,
TP_PROTO(struct drm_i915_gem_object *obj),
TP_ARGS(obj),
--
1.9.2
More information about the Intel-gfx
mailing list