[Intel-gfx] [PATCH 67/70] drm/i915: Start passing around i915_vma from execbuffer
Chris Wilson
chris at chris-wilson.co.uk
Tue Apr 7 09:28:25 PDT 2015
During execbuffer we look up the i915_vma in order to reserver them in
the VM. However, we then do a double lookup of the vma in order to then
pin them, all because we lack the necessary interfaces to operate on
i915_vma.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 13 +++
drivers/gpu/drm/i915/i915_gem.c | 170 ++++++++++++++---------------
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 114 ++++++++++---------
3 files changed, 154 insertions(+), 143 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2c72ee0214b5..ba593ee78863 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2637,6 +2637,19 @@ void i915_gem_close_object(struct drm_gem_object *obj,
void i915_gem_free_object(struct drm_gem_object *obj);
void i915_gem_vma_destroy(struct i915_vma *vma);
+int __must_check
+i915_vma_pin(struct i915_vma *vma,
+ uint32_t size,
+ uint32_t alignment,
+ uint64_t flags);
+
+static inline void i915_vma_unpin(struct i915_vma *vma)
+{
+ WARN_ON(vma->pin_count == 0);
+ WARN_ON(!drm_mm_node_allocated(&vma->node));
+ vma->pin_count--;
+}
+
#define PIN_MAPPABLE 0x1
#define PIN_NONBLOCK 0x2
#define PIN_GLOBAL 0x4
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3d4463930267..7b27236f2c29 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3644,10 +3644,9 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
/**
* Finds free space in the GTT aperture and binds the object there.
*/
-static struct i915_vma *
+static int
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *ggtt_view,
+ struct i915_vma *vma,
uint32_t size,
unsigned alignment,
uint64_t flags)
@@ -3658,13 +3657,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
unsigned long start =
flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
unsigned long end =
- flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
- struct i915_vma *vma;
+ flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vma->vm->total;
int ret;
- if (WARN_ON(vm->is_ggtt != !!ggtt_view))
- return ERR_PTR(-EINVAL);
-
fence_size = i915_gem_get_gtt_size(dev,
obj->base.size,
obj->tiling_mode);
@@ -3681,7 +3676,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
unfenced_alignment;
if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
size = max_t(u32, size, obj->base.size);
@@ -3696,57 +3691,51 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
size, obj->base.size,
flags & PIN_MAPPABLE ? "mappable" : "total",
end);
- return ERR_PTR(-E2BIG);
+ return -E2BIG;
}
ret = i915_gem_object_get_pages(obj);
if (ret)
- return ERR_PTR(ret);
+ return ret;
i915_gem_object_pin_pages(obj);
- vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
- i915_gem_obj_lookup_or_create_vma(obj, vm);
-
- if (IS_ERR(vma))
- goto err_unpin;
-
if (flags & PIN_OFFSET_FIXED) {
uint64_t offset = flags & PIN_OFFSET_MASK;
if (offset & (alignment - 1)) {
- vma = ERR_PTR(-EINVAL);
- goto err_free_vma;
+ ret = -EINVAL;
+ goto err_unpin;
}
vma->node.start = offset;
vma->node.size = size;
vma->node.color = obj->cache_level;
- ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
if (ret) {
- ret = i915_gem_evict_range(dev, vm, start, end);
+ ret = i915_gem_evict_range(dev, vma->vm, start, end);
if (ret == 0)
- ret = drm_mm_reserve_node(&vm->mm, &vma->node);
- }
- if (ret) {
- vma = ERR_PTR(ret);
- goto err_free_vma;
+ ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
+ if (ret)
+ goto err_unpin;
}
} else {
search_free:
- ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+ ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
+ &vma->node,
size, alignment,
obj->cache_level,
start, end,
DRM_MM_SEARCH_DEFAULT,
DRM_MM_CREATE_DEFAULT);
if (ret) {
- ret = i915_gem_evict_something(dev, vm, size, alignment,
+ ret = i915_gem_evict_something(dev, vma->vm,
+ size, alignment,
obj->cache_level,
start, end,
flags);
if (ret == 0)
goto search_free;
- goto err_free_vma;
+ goto err_unpin;
}
}
if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
@@ -3775,20 +3764,17 @@ search_free:
goto err_finish_gtt;
list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
- list_add_tail(&vma->mm_list, &vm->inactive_list);
+ list_add_tail(&vma->mm_list, &vma->vm->inactive_list);
- return vma;
+ return 0;
err_finish_gtt:
i915_gem_gtt_finish_object(obj);
err_remove_node:
drm_mm_remove_node(&vma->node);
-err_free_vma:
- i915_gem_vma_destroy(vma);
- vma = ERR_PTR(ret);
err_unpin:
i915_gem_object_unpin_pages(obj);
- return vma;
+ return ret;
}
bool
@@ -4347,6 +4333,65 @@ i915_vma_misplaced(struct i915_vma *vma,
return false;
}
+int
+i915_vma_pin(struct i915_vma *vma,
+ uint32_t size,
+ uint32_t alignment,
+ uint64_t flags)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ unsigned bound = vma->bound;
+ int ret;
+
+ if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
+ return -EBUSY;
+
+
+ if (!drm_mm_node_allocated(&vma->node)) {
+ /* In true PPGTT, bind has possibly changed PDEs, which
+ * means we must do a context switch before the GPU can
+ * accurately read some of the VMAs.
+ */
+ ret = i915_gem_object_bind_to_vm(obj, vma,
+ size, alignment, flags);
+ if (ret)
+ return ret;
+ }
+
+ if (flags & PIN_GLOBAL && !(vma->bound & GLOBAL_BIND)) {
+ ret = i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND);
+ if (ret)
+ return ret;
+ }
+
+ if ((bound ^ vma->bound) & GLOBAL_BIND) {
+ bool mappable, fenceable;
+ u32 fence_size, fence_alignment;
+
+ fence_size = i915_gem_get_gtt_size(obj->base.dev,
+ obj->base.size,
+ obj->tiling_mode);
+ fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
+ obj->base.size,
+ obj->tiling_mode,
+ true);
+
+ fenceable = (vma->node.size >= fence_size &&
+ (vma->node.start & (fence_alignment - 1)) == 0);
+
+ mappable = (vma->node.start + fence_size <=
+ dev_priv->gtt.mappable_end);
+
+ obj->map_and_fenceable = mappable && fenceable;
+ }
+
+ WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
+
+ vma->pin_count++;
+ return 0;
+}
+
static int
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -4357,7 +4402,6 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
struct i915_vma *vma;
- unsigned bound;
int ret;
if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
@@ -4379,9 +4423,6 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
return PTR_ERR(vma);
if (vma) {
- if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
- return -EBUSY;
-
if (i915_vma_misplaced(vma, size, alignment, flags)) {
unsigned long offset;
offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) :
@@ -4403,49 +4444,14 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
}
}
- bound = vma ? vma->bound : 0;
- if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
- /* In true PPGTT, bind has possibly changed PDEs, which
- * means we must do a context switch before the GPU can
- * accurately read some of the VMAs.
- */
- vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
- size, alignment, flags);
+ if (vma == NULL) {
+ vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
+ i915_gem_obj_lookup_or_create_vma(obj, vm);
if (IS_ERR(vma))
return PTR_ERR(vma);
}
- if (flags & PIN_GLOBAL && !(vma->bound & GLOBAL_BIND)) {
- ret = i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND);
- if (ret)
- return ret;
- }
-
- if ((bound ^ vma->bound) & GLOBAL_BIND) {
- bool mappable, fenceable;
- u32 fence_size, fence_alignment;
-
- fence_size = i915_gem_get_gtt_size(obj->base.dev,
- obj->base.size,
- obj->tiling_mode);
- fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
- obj->base.size,
- obj->tiling_mode,
- true);
-
- fenceable = (vma->node.size >= fence_size &&
- (vma->node.start & (fence_alignment - 1)) == 0);
-
- mappable = (vma->node.start + fence_size <=
- dev_priv->gtt.mappable_end);
-
- obj->map_and_fenceable = mappable && fenceable;
- }
-
- WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
-
- vma->pin_count++;
- return 0;
+ return i915_vma_pin(vma, size, alignment, flags);
}
int
@@ -4478,13 +4484,7 @@ void
i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view)
{
- struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
-
- BUG_ON(!vma);
- WARN_ON(vma->pin_count == 0);
- WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
-
- --vma->pin_count;
+ i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
}
bool
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bd48393fb91f..734a7ef56a93 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -42,6 +42,7 @@
struct eb_vmas {
struct list_head vmas;
+ struct i915_vma *batch_vma;
int and;
union {
struct i915_vma *lut[0];
@@ -88,6 +89,26 @@ eb_reset(struct eb_vmas *eb)
memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
}
+static struct i915_vma *
+eb_get_batch(struct eb_vmas *eb)
+{
+ struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
+
+ /*
+ * SNA is doing fancy tricks with compressing batch buffers, which leads
+ * to negative relocation deltas. Usually that works out ok since the
+ * relocate address is still positive, except when the batch is placed
+ * very low in the GTT. Ensure this doesn't happen.
+ *
+ * Note that actual hangs have only been observed on gen7, but for
+ * paranoia do it everywhere.
+ */
+ if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+ vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+
+ return vma;
+}
+
static int
eb_lookup_vmas(struct eb_vmas *eb,
struct drm_i915_gem_exec_object2 *exec,
@@ -165,6 +186,9 @@ eb_lookup_vmas(struct eb_vmas *eb,
++i;
}
+ /* take note of the batch buffer before we might reorder the lists */
+ eb->batch_vma = eb_get_batch(eb);
+
return 0;
@@ -644,16 +668,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
flags |= entry->offset | PIN_OFFSET_FIXED;
}
- ret = i915_gem_object_pin(obj, vma->vm,
- entry->pad_to_size,
- entry->alignment,
- flags);
- if ((ret == -ENOSPC || ret == -E2BIG) &&
+ ret = i915_vma_pin(vma,
+ entry->pad_to_size,
+ entry->alignment,
+ flags);
+ if ((ret == -ENOSPC || ret == -E2BIG) &&
only_mappable_for_reloc(entry->flags))
- ret = i915_gem_object_pin(obj, vma->vm,
- entry->pad_to_size,
- entry->alignment,
- flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
+ ret = i915_vma_pin(vma,
+ entry->pad_to_size,
+ entry->alignment,
+ flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
if (ret)
return ret;
@@ -1194,11 +1218,10 @@ i915_emit_box(struct intel_engine_cs *ring,
return 0;
}
-static struct drm_i915_gem_object*
+static struct i915_vma*
i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
struct drm_i915_gem_exec_object2 *shadow_exec_entry,
struct eb_vmas *eb,
- struct drm_i915_gem_object *batch_obj,
u32 batch_start_offset,
u32 batch_len,
bool is_master)
@@ -1210,10 +1233,10 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
PAGE_ALIGN(batch_len));
if (IS_ERR(shadow_batch_obj))
- return shadow_batch_obj;
+ return ERR_CAST(shadow_batch_obj);
ret = i915_parse_cmds(ring,
- batch_obj,
+ eb->batch_vma->obj,
shadow_batch_obj,
batch_start_offset,
batch_len,
@@ -1235,14 +1258,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
drm_gem_object_reference(&shadow_batch_obj->base);
list_add_tail(&vma->exec_list, &eb->vmas);
- shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
-
- return shadow_batch_obj;
+ return vma;
err:
i915_gem_object_unpin_pages(shadow_batch_obj);
if (ret == -EACCES) /* unhandled chained batch */
- return batch_obj;
+ return NULL;
else
return ERR_PTR(ret);
}
@@ -1442,26 +1463,6 @@ static int gen8_dispatch_bsd_ring(struct drm_device *dev,
}
}
-static struct drm_i915_gem_object *
-eb_get_batch(struct eb_vmas *eb)
-{
- struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
-
- /*
- * SNA is doing fancy tricks with compressing batch buffers, which leads
- * to negative relocation deltas. Usually that works out ok since the
- * relocate address is still positive, except when the batch is placed
- * very low in the GTT. Ensure this doesn't happen.
- *
- * Note that actual hangs have only been observed on gen7, but for
- * paranoia do it everywhere.
- */
- if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
- vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
-
- return vma->obj;
-}
-
static int
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file,
@@ -1470,7 +1471,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct eb_vmas *eb;
- struct drm_i915_gem_object *batch_obj;
struct drm_i915_gem_exec_object2 shadow_exec_entry;
struct intel_engine_cs *ring;
struct intel_context *ctx;
@@ -1582,9 +1582,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (ret)
goto err;
- /* take note of the batch buffer before we might reorder the lists */
- batch_obj = eb_get_batch(eb);
-
/* Move the objects en-masse into the GTT, evicting if necessary. */
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
@@ -1605,24 +1602,25 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
/* Set the pending read domains for the batch buffer to COMMAND */
- if (batch_obj->base.pending_write_domain) {
+ if (eb->batch_vma->obj->base.pending_write_domain) {
DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
ret = -EINVAL;
goto err;
}
if (i915_needs_cmd_parser(ring) && args->batch_len) {
- batch_obj = i915_gem_execbuffer_parse(ring,
- &shadow_exec_entry,
- eb,
- batch_obj,
- args->batch_start_offset,
- args->batch_len,
- file->is_master);
- if (IS_ERR(batch_obj)) {
- ret = PTR_ERR(batch_obj);
+ struct i915_vma *vma;
+
+ vma = i915_gem_execbuffer_parse(ring, &shadow_exec_entry, eb,
+ args->batch_start_offset,
+ args->batch_len,
+ file->is_master);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto err;
}
+ if (vma)
+ eb->batch_vma = vma;
/*
* Set the DISPATCH_SECURE bit to remove the NON_SECURE
@@ -1641,7 +1639,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
exec_start = 0;
}
- batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
+ eb->batch_vma->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
@@ -1657,17 +1655,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* fitting due to fragmentation.
* So this is actually safe.
*/
- ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+ ret = i915_gem_obj_ggtt_pin(eb->batch_vma->obj, 0, 0);
if (ret)
goto err;
- exec_start += i915_gem_obj_ggtt_offset(batch_obj);
+ exec_start += i915_gem_obj_ggtt_offset(eb->batch_vma->obj);
} else
- exec_start += i915_gem_obj_offset(batch_obj, vm);
+ exec_start += eb->batch_vma->node.start;
ret = dev_priv->gt.execbuf_submit(dev, file, ring, ctx, args,
- &eb->vmas, batch_obj, exec_start,
- dispatch_flags);
+ &eb->vmas, eb->batch_vma->obj,
+ exec_start, dispatch_flags);
/*
* FIXME: We crucially rely upon the active tracking for the (ppgtt)
@@ -1676,7 +1674,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* active.
*/
if (dispatch_flags & I915_DISPATCH_SECURE)
- i915_gem_object_ggtt_unpin(batch_obj);
+ i915_vma_unpin(eb->batch_vma);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);
--
2.1.4
More information about the Intel-gfx
mailing list