[Intel-gfx] [PATCH] drm/i915: execbuf2 support
Chris Wilson
chris at chris-wilson.co.uk
Wed Jul 15 18:18:30 CEST 2009
On Tue, 2009-07-14 at 13:49 -0700, Jesse Barnes wrote:
...
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 876b65c..52c7fb3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2910,7 +2910,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
> static int
> i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
> struct drm_file *file_priv,
> - struct drm_i915_gem_exec_object *entry,
> + struct drm_i915_gem_exec_object2 *entry,
> struct drm_i915_gem_relocation_entry *relocs)
> {
> struct drm_device *dev = obj->dev;
> @@ -2918,12 +2918,37 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
> struct drm_i915_gem_object *obj_priv = obj->driver_private;
> int i, ret;
> void __iomem *reloc_page;
> + bool need_fence;
> +
> + need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE;
> +
> + /* Check fence reg constraints and unpin if necessary */
> + if (need_fence && obj_priv->pin_count && !i915_obj_fenceable(dev, obj))
> + i915_gem_object_unpin(obj);
>
> /* Choose the GTT offset for our buffer and put it there. */
> ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
> if (ret)
> return ret;
>
> + /*
> + * Pre-965 chips need a fence register set up in order to
> + * properly handle blits to/from tiled surfaces.
> + */
> + if (need_fence && obj_priv->fence_reg == I915_FENCE_REG_NONE &&
> + obj_priv->tiling_mode != I915_TILING_NONE) {
> + ret = i915_gem_object_get_fence_reg(obj);
> + if (ret != 0) {
> + if (ret != -EBUSY && ret != -ERESTARTSYS)
> + DRM_ERROR("Failure to install fence: %d\n",
> + ret);
> + i915_gem_object_unpin(obj);
> + return ret;
> + }
> + } else if (!need_fence && obj_priv->fence_reg != I915_FENCE_REG_NONE) {
> + i915_gem_clear_fence_reg(obj);
> + }
> +
I don't think the clear_fence_reg() here is desirable, as this will
cause a stall if the user is mixing tiled and untiled GPU commands. The
best solution would seem to be ensuring that we have enough information
to make the best guess in the search-and-evict code within pin() - so I
think performing a LRU search over last_tiled_rendering_seqno instead of
last_rendering_seqno would offer a modicum of improvement. And similar
logic could be added to clear_fence_reg) to limit the duration of any
stall there.
-ickle
More information about the Intel-gfx
mailing list