[Intel-gfx] [PATCH 108/190] drm/i915: Start passing around i915_vma from execbuffer
Chris Wilson
chris at chris-wilson.co.uk
Mon Jan 11 02:44:52 PST 2016
During execbuffer we look up the i915_vma in order to reserver them in
the VM. However, we then do a double lookup of the vma in order to then
pin them, all because we lack the necessary interfaces to operate on
i915_vma.
v2: Tidy parameter lists to remove one level of redirection in the hot
path.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 28 +++--
drivers/gpu/drm/i915/i915_gem.c | 159 ++++++++++++-----------------
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 127 +++++++++++------------
drivers/gpu/drm/i915/i915_gem_gtt.c | 3 -
4 files changed, 144 insertions(+), 173 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7df6cfabe7fa..f6e508e5aa5b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2680,6 +2680,11 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data(
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
void i915_gem_free_object(struct drm_gem_object *obj);
+int __must_check
+i915_vma_pin(struct i915_vma *vma,
+ uint64_t size,
+ uint64_t alignment,
+ uint64_t flags);
/* Flags used by pin/bind&friends. */
#define PIN_MAPPABLE (1<<0)
#define PIN_NONBLOCK (1<<1)
@@ -2691,12 +2696,19 @@ void i915_gem_free_object(struct drm_gem_object *obj);
#define PIN_HIGH (1<<7)
#define PIN_OFFSET_FIXED (1<<8)
#define PIN_OFFSET_MASK (~4095)
-int __must_check
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- uint64_t size,
- uint32_t alignment,
- uint64_t flags);
+
+static inline void __i915_vma_unpin(struct i915_vma *vma)
+{
+ vma->pin_count--;
+}
+
+static inline void i915_vma_unpin(struct i915_vma *vma)
+{
+ GEM_BUG_ON(vma->pin_count == 0);
+ GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+ __i915_vma_unpin(vma);
+}
+
int __must_check
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
@@ -2933,8 +2945,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
uint32_t alignment,
unsigned flags)
{
- return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), 0, alignment,
- flags | PIN_GLOBAL);
+ return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal,
+ 0, alignment, flags);
}
static inline int
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0d4f358f4067..c6d7a78ab605 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2746,26 +2746,21 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
* Finds free space in the GTT aperture and binds the object or a view of it
* there.
*/
-static struct i915_vma *
-i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *ggtt_view,
- uint64_t size,
- uint64_t alignment,
- uint64_t flags)
+static int
+i915_vma_insert(struct i915_vma *vma,
+ uint64_t size,
+ uint64_t alignment,
+ uint64_t flags)
{
+ struct drm_i915_gem_object *obj = vma->obj;
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- struct i915_vma *vma;
u64 start, end;
u64 min_alignment;
int ret;
- vma = ggtt_view ?
- i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
- i915_gem_obj_lookup_or_create_vma(obj, vm);
- if (IS_ERR(vma))
- return vma;
+ GEM_BUG_ON(vma->bound);
+ GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
size = max(size, vma->size);
if (flags & PIN_MAPPABLE)
@@ -2779,7 +2774,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
if (alignment & (min_alignment - 1)) {
DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
alignment, min_alignment);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
@@ -2799,12 +2794,12 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
size, obj->base.size,
flags & PIN_MAPPABLE ? "mappable" : "total",
end);
- return ERR_PTR(-E2BIG);
+ return -E2BIG;
}
ret = i915_gem_object_get_pages(obj);
if (ret)
- return ERR_PTR(ret);
+ return ret;
i915_gem_object_pin_pages(obj);
@@ -2866,13 +2861,13 @@ search_free:
list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
obj->bind_count++;
- return vma;
+ return 0;
err_remove_node:
drm_mm_remove_node(&vma->node);
err_unpin:
i915_gem_object_unpin_pages(obj);
- return ERR_PTR(ret);
+ return ret;
}
bool
@@ -3435,6 +3430,9 @@ i915_vma_misplaced(struct i915_vma *vma,
{
struct drm_i915_gem_object *obj = vma->obj;
+ if (!drm_mm_node_allocated(&vma->node))
+ return false;
+
if (vma->node.size < size)
return true;
@@ -3478,94 +3476,45 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
obj->map_and_fenceable = mappable && fenceable;
}
-static int
-i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *ggtt_view,
- uint64_t size,
- uint32_t alignment,
- uint64_t flags)
+int
+i915_vma_pin(struct i915_vma *vma,
+ uint64_t size,
+ uint64_t alignment,
+ uint64_t flags)
{
- struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
- struct i915_vma *vma;
- unsigned bound;
+ unsigned bound = vma->bound;
int ret;
- if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
- return -ENODEV;
-
- if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
- return -EINVAL;
-
- if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
- return -EINVAL;
+ GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
+ GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt);
- if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
- return -EINVAL;
-
- vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
- i915_gem_obj_to_vma(obj, vm);
-
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- if (vma) {
- if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
- return -EBUSY;
-
- if (i915_vma_misplaced(vma, size, alignment, flags)) {
- WARN(vma->pin_count,
- "bo is already pinned in %s with incorrect alignment:"
- " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
- " obj->map_and_fenceable=%d\n",
- ggtt_view ? "ggtt" : "ppgtt",
- upper_32_bits(vma->node.start),
- lower_32_bits(vma->node.start),
- alignment,
- !!(flags & PIN_MAPPABLE),
- obj->map_and_fenceable);
- ret = i915_vma_unbind(vma);
- if (ret)
- return ret;
+ if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
+ return -EBUSY;
- vma = NULL;
- }
- }
+ /* Pin early to prevent the shrinker/eviction logic from destroying
+ * our vma as we insert and bind.
+ */
+ vma->pin_count++;
- if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
- vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view,
- size, alignment, flags);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ if (!bound) {
+ ret = i915_vma_insert(vma, size, alignment, flags);
+ if (ret)
+ goto err;
}
- bound = vma->bound;
- ret = i915_vma_bind(vma, obj->cache_level, flags);
+ ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
if (ret)
- return ret;
+ goto err;
- if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
- (bound ^ vma->bound) & GLOBAL_BIND) {
+ if ((bound ^ vma->bound) & GLOBAL_BIND)
__i915_vma_set_map_and_fenceable(vma);
- WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
- }
GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-
- vma->pin_count++;
return 0;
-}
-int
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- uint64_t size,
- uint32_t alignment,
- uint64_t flags)
-{
- return i915_gem_object_do_pin(obj, vm,
- i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
- size, alignment, flags);
+err:
+ vma->pin_count--;
+ return ret;
}
int
@@ -3575,11 +3524,35 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
uint32_t alignment,
uint64_t flags)
{
+ struct i915_vma *vma;
+ int ret;
+
if (WARN_ONCE(!view, "no view specified"))
return -EINVAL;
- return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
- size, alignment, flags | PIN_GLOBAL);
+ vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ if (i915_vma_misplaced(vma, size, alignment, flags)) {
+ if (flags & PIN_NONBLOCK && (vma->pin_count | vma->active))
+ return -ENOSPC;
+
+ WARN(vma->pin_count,
+ "bo is already pinned in ggtt with incorrect alignment:"
+ " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
+ " obj->map_and_fenceable=%d\n",
+ upper_32_bits(vma->node.start),
+ lower_32_bits(vma->node.start),
+ alignment,
+ !!(flags & PIN_MAPPABLE),
+ obj->map_and_fenceable);
+ ret = i915_vma_unbind(vma);
+ if (ret)
+ return ret;
+ }
+
+ return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
}
void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 899220139a8a..d4dcc3e5d080 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -44,11 +44,10 @@
struct i915_execbuffer_params {
struct drm_device *dev;
struct drm_file *file;
+ struct i915_vma *batch_vma;
uint32_t dispatch_flags;
uint32_t args_batch_start_offset;
- uint64_t batch_obj_vm_offset;
struct intel_engine_cs *ring;
- struct drm_i915_gem_object *batch_obj;
struct intel_context *ctx;
struct drm_i915_gem_request *request;
};
@@ -101,6 +100,26 @@ eb_reset(struct eb_vmas *eb)
memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
}
+static struct i915_vma *
+eb_get_batch(struct eb_vmas *eb)
+{
+ struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
+
+ /*
+ * SNA is doing fancy tricks with compressing batch buffers, which leads
+ * to negative relocation deltas. Usually that works out ok since the
+ * relocate address is still positive, except when the batch is placed
+ * very low in the GTT. Ensure this doesn't happen.
+ *
+ * Note that actual hangs have only been observed on gen7, but for
+ * paranoia do it everywhere.
+ */
+ if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+ vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+
+ return vma;
+}
+
static int
eb_lookup_vmas(struct eb_vmas *eb,
struct drm_i915_gem_exec_object2 *exec,
@@ -642,16 +661,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
flags |= PIN_HIGH;
}
- ret = i915_gem_object_pin(obj, vma->vm,
- entry->pad_to_size,
- entry->alignment,
- flags);
- if ((ret == -ENOSPC || ret == -E2BIG) &&
+ ret = i915_vma_pin(vma,
+ entry->pad_to_size,
+ entry->alignment,
+ flags);
+ if ((ret == -ENOSPC || ret == -E2BIG) &&
only_mappable_for_reloc(entry->flags))
- ret = i915_gem_object_pin(obj, vma->vm,
- entry->pad_to_size,
- entry->alignment,
- flags & ~PIN_MAPPABLE);
+ ret = i915_vma_pin(vma,
+ entry->pad_to_size,
+ entry->alignment,
+ flags & ~PIN_MAPPABLE);
if (ret)
return ret;
@@ -1203,11 +1222,11 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
return 0;
}
-static struct drm_i915_gem_object*
+static struct i915_vma*
i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
struct drm_i915_gem_exec_object2 *shadow_exec_entry,
- struct eb_vmas *eb,
struct drm_i915_gem_object *batch_obj,
+ struct eb_vmas *eb,
u32 batch_start_offset,
u32 batch_len,
bool is_master)
@@ -1219,7 +1238,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
PAGE_ALIGN(batch_len));
if (IS_ERR(shadow_batch_obj))
- return shadow_batch_obj;
+ return ERR_CAST(shadow_batch_obj);
ret = i915_parse_cmds(ring,
batch_obj,
@@ -1244,14 +1263,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
drm_gem_object_reference(&shadow_batch_obj->base);
list_add_tail(&vma->exec_list, &eb->vmas);
- shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
-
- return shadow_batch_obj;
+ return vma;
err:
i915_gem_object_unpin_pages(shadow_batch_obj);
if (ret == -EACCES) /* unhandled chained batch */
- return batch_obj;
+ return NULL;
else
return ERR_PTR(ret);
}
@@ -1331,7 +1348,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
}
exec_len = args->batch_len;
- exec_start = params->batch_obj_vm_offset +
+ exec_start = params->batch_vma->node.start +
params->args_batch_start_offset;
ret = params->ring->emit_bb_start(params->request,
@@ -1378,26 +1395,6 @@ static int gen8_dispatch_bsd_ring(struct drm_device *dev,
}
}
-static struct drm_i915_gem_object *
-eb_get_batch(struct eb_vmas *eb)
-{
- struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
-
- /*
- * SNA is doing fancy tricks with compressing batch buffers, which leads
- * to negative relocation deltas. Usually that works out ok since the
- * relocate address is still positive, except when the batch is placed
- * very low in the GTT. Ensure this doesn't happen.
- *
- * Note that actual hangs have only been observed on gen7, but for
- * paranoia do it everywhere.
- */
- if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
- vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
-
- return vma->obj;
-}
-
static int
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file,
@@ -1406,7 +1403,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct eb_vmas *eb;
- struct drm_i915_gem_object *batch_obj;
struct drm_i915_gem_exec_object2 shadow_exec_entry;
struct intel_engine_cs *ring;
struct intel_context *ctx;
@@ -1542,7 +1538,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
/* take note of the batch buffer before we might reorder the lists */
- batch_obj = eb_get_batch(eb);
+ params->batch_vma = eb_get_batch(eb);
/* Move the objects en-masse into the GTT, evicting if necessary. */
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
@@ -1564,7 +1560,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
/* Set the pending read domains for the batch buffer to COMMAND */
- if (batch_obj->base.pending_write_domain) {
+ if (params->batch_vma->obj->base.pending_write_domain) {
DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
ret = -EINVAL;
goto err;
@@ -1572,26 +1568,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
params->args_batch_start_offset = args->batch_start_offset;
if (i915_needs_cmd_parser(ring) && args->batch_len) {
- struct drm_i915_gem_object *parsed_batch_obj;
-
- parsed_batch_obj = i915_gem_execbuffer_parse(ring,
- &shadow_exec_entry,
- eb,
- batch_obj,
- args->batch_start_offset,
- args->batch_len,
- file->is_master);
- if (IS_ERR(parsed_batch_obj)) {
- ret = PTR_ERR(parsed_batch_obj);
+ struct i915_vma *vma;
+
+ vma = i915_gem_execbuffer_parse(ring, &shadow_exec_entry,
+ params->batch_vma->obj,
+ eb,
+ args->batch_start_offset,
+ args->batch_len,
+ file->is_master);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto err;
}
- /*
- * parsed_batch_obj == batch_obj means batch not fully parsed:
- * Accept, but don't promote to secure.
- */
-
- if (parsed_batch_obj != batch_obj) {
+ if (vma) {
/*
* Batch parsed and accepted:
*
@@ -1603,16 +1593,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
*/
dispatch_flags |= I915_DISPATCH_SECURE;
params->args_batch_start_offset = 0;
- batch_obj = parsed_batch_obj;
+ params->batch_vma = vma;
}
}
- batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
+ params->batch_vma->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
if (dispatch_flags & I915_DISPATCH_SECURE) {
+ struct drm_i915_gem_object *obj = params->batch_vma->obj;
+
/*
* So on first glance it looks freaky that we pin the batch here
* outside of the reservation loop. But:
@@ -1623,13 +1615,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* fitting due to fragmentation.
* So this is actually safe.
*/
- ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+ ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
if (ret)
goto err;
- params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj);
- } else
- params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
+ params->batch_vma = i915_gem_obj_to_ggtt(obj);
+ }
/* Allocate a request for this batch buffer nice and early. */
params->request = i915_gem_request_alloc(ring, ctx);
@@ -1654,11 +1645,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
params->file = file;
params->ring = ring;
params->dispatch_flags = dispatch_flags;
- params->batch_obj = batch_obj;
params->ctx = ctx;
ret = execbuf_submit(params, args, &eb->vmas);
- __i915_add_request(params->request, params->batch_obj, ret == 0);
+ __i915_add_request(params->request, params->batch_vma->obj, ret == 0);
err_batch_unpin:
/*
@@ -1668,8 +1658,7 @@ err_batch_unpin:
* active.
*/
if (dispatch_flags & I915_DISPATCH_SECURE)
- i915_gem_object_ggtt_unpin(batch_obj);
-
+ i915_vma_unpin(params->batch_vma);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 98b9730f4066..8f3b2f051918 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3549,13 +3549,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
return 0;
if (vma->bound == 0 && vma->vm->allocate_va_range) {
- /* XXX: i915_vma_pin() will fix this +- hack */
- vma->pin_count++;
trace_i915_va_alloc(vma);
ret = vma->vm->allocate_va_range(vma->vm,
vma->node.start,
vma->node.size);
- vma->pin_count--;
if (ret)
return ret;
}
--
2.7.0.rc3
More information about the Intel-gfx
mailing list