[Intel-gfx] [PATCH 11/11] drm/i915: Allow userspace to request an object at a specific offset
Imre Deak
imre.deak at intel.com
Wed Jan 16 11:26:28 CET 2013
On Tue, 2013-01-08 at 10:53 +0000, Chris Wilson wrote:
> Certain workarounds and workloads require objects at specific or at
> least known offsets. Privileged users could pin an object into the GTT,
> but that has obvious limitations for the general case. Instead, the user
> can construct a batch assuming a particular layout for an object and
> request that the kernel try its utmost to provide the object at that
> location. This has the advantage that not only can it fail, but also
> such allocations are transitory - although contention should be rare and
> the object persist at the same location between batches. The benefit for
> userspace is that it can then avoid all relocations referencing this
> object as it resides at a known space - this becomes even more useful
> with per-process GTT spaces where there will be virtually no contention
> between applications.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 8 ++
> drivers/gpu/drm/i915/i915_gem.c | 10 +-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 139 +++++++++++++++++++++++++++-
> include/uapi/drm/i915_drm.h | 3 +-
> 4 files changed, 151 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 97e2049..7da4953 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1429,6 +1429,14 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> size_t size);
> void i915_gem_free_object(struct drm_gem_object *obj);
>
> +uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev,
> + uint32_t size, int tiling_mode);
> +uint32_t i915_gem_get_gtt_size(struct drm_device *dev,
> + uint32_t size, int tiling_mode);
> +bool i915_gem_valid_gtt_space(struct drm_device *dev,
> + struct drm_mm_node *gtt_space,
> + unsigned long cache_level);
> +
> int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
> uint32_t alignment,
> bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b7661e1..f1a23bb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1435,7 +1435,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
> obj->fault_mappable = false;
> }
>
> -static uint32_t
> +uint32_t
> i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
> {
> uint32_t gtt_size;
> @@ -1463,7 +1463,7 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
> * Return the required GTT alignment for an object, taking into account
> * potential fence register mapping.
> */
> -static uint32_t
> +uint32_t
> i915_gem_get_gtt_alignment(struct drm_device *dev,
> uint32_t size,
> int tiling_mode)
> @@ -2833,9 +2833,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
> return 0;
> }
>
> -static bool i915_gem_valid_gtt_space(struct drm_device *dev,
> - struct drm_mm_node *gtt_space,
> - unsigned long cache_level)
> +bool i915_gem_valid_gtt_space(struct drm_device *dev,
> + struct drm_mm_node *gtt_space,
> + unsigned long cache_level)
> {
> struct drm_mm_node *other;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index f6bd92c..bb8b0d6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -403,6 +403,126 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
> return ret;
> }
>
> +static struct drm_mm_node *
> +get_pinned_block(struct drm_i915_gem_object *obj, u32 size)
> +{
> + struct drm_device *dev = obj->base.dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_mm_node *gtt;
> +
> + gtt = drm_mm_create_block(&dev_priv->mm.gtt_space,
> + obj->exec_entry->offset,
> + size,
> + false);
> + if (gtt == NULL)
> + return NULL;
> +
> + if (!i915_gem_valid_gtt_space(dev, gtt, obj->cache_level)) {
> + drm_mm_put_block(gtt);
> + return NULL;
> + }
> +
> + gtt->color = obj->cache_level;
> + return gtt;
> +}
> +
> +static int
> +__i915_gem_evict_range(struct drm_device *dev, u32 start, u32 end, u32 color)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_i915_gem_object *obj, *next;
> +
> + list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list, gtt_list) {
> + u32 node_start = obj->gtt_space->start;
> + u32 node_end = obj->gtt_space->start + obj->gtt_space->size;
> +
> + if (!HAS_LLC(dev)) {
> + if (node_end <= start && obj->tiling_mode != color)
> + node_end += 4096;
> + if (node_start >= end && obj->tiling_mode != color)
> + node_start -= 4096;
> + }
> +
> + if (node_end > start && node_start < end) {
> + int ret = i915_gem_object_unbind(obj);
> + if (ret)
> + return ret;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int
> +i915_gem_execbuffer_pinned_object(struct drm_i915_gem_object *obj)
> +{
> + struct drm_device *dev = obj->base.dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
> + struct drm_mm_node *gtt;
> + bool fenceable;
> + u32 size;
> + int ret;
> +
> + if (entry->alignment && entry->offset & (entry->alignment - 1))
> + return -EINVAL;
> +
> + if (obj->gtt_offset == entry->offset)
> + return 0;
> +
> + if (entry->offset & (i915_gem_get_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1)) {
> + fenceable = false;
> + if (entry->offset & (i915_gem_get_unfenced_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1))
> + return -EINVAL;
> + }
> +
> + i915_gem_object_pin_pages(obj);
> +
> + ret = i915_gem_object_unbind(obj);
> + if (ret)
> + goto unpin_pages;
> +
> + size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode);
> + gtt = get_pinned_block(obj, size);
> + if (gtt == NULL) {
> + ret = __i915_gem_evict_range(dev,
> + entry->offset,
> + entry->offset + size,
> + obj->tiling_mode);
A typo as discussed on IRC, tiling_mode should be cache_level. The same
goes for __i915_gem_evict_range(). Otherwise on the series:
Reviewed-by: Imre Deak <imre.deak at intel.com>
--Imre
More information about the Intel-gfx
mailing list