[Intel-gfx] [PATCH 2/2] drm/i915: allow sync points within batches
Chris Wilson
chris at chris-wilson.co.uk
Wed Sep 3 09:01:55 CEST 2014
On Tue, Sep 02, 2014 at 02:32:41PM -0700, Jesse Barnes wrote:
> Use a new reloc type to allow userspace to insert sync points within
> batches before they're submitted. The corresponding fence fds are
> returned in the offset field of the returned reloc tree, and can be
> operated on with the sync fence APIs.
>
> Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 4 +
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 125 ++++++++++++++++++++++++-----
> drivers/gpu/drm/i915/i915_sync.c | 58 ++++++++++---
> include/uapi/drm/i915_drm.h | 11 ++-
> 4 files changed, 167 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6eb119e..410eedf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2284,6 +2284,10 @@ int i915_sync_init(struct drm_i915_private *dev_priv);
> void i915_sync_fini(struct drm_i915_private *dev_priv);
> int i915_sync_create_fence_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
> +int i915_sync_fence_create(struct intel_engine_cs *ring,
> + struct intel_context *ctx,
> + u32 seqno);
> +
>
> #define PIN_MAPPABLE 0x1
> #define PIN_NONBLOCK 0x2
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 60998fc..32ec599 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -32,6 +32,7 @@
> #include "i915_trace.h"
> #include "intel_drv.h"
> #include <linux/dma_remapping.h>
> +#include "../../../staging/android/sync.h"
>
> #define __EXEC_OBJECT_HAS_PIN (1<<31)
> #define __EXEC_OBJECT_HAS_FENCE (1<<30)
> @@ -262,6 +263,67 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
> !obj->map_and_fenceable ||
> obj->cache_level != I915_CACHE_NONE);
> }
> +static int
> +emit_sync_obj_cpu(struct drm_i915_gem_object *obj,
> + struct drm_i915_gem_relocation_entry *reloc)
> +{
> + uint32_t page_offset = offset_in_page(reloc->offset);
> + char *vaddr;
> + int ret;
> +
> + ret = i915_gem_object_set_to_cpu_domain(obj, true);
> + if (ret)
> + return ret;
> +
> + vaddr = kmap_atomic(i915_gem_object_get_page(obj,
> + reloc->offset >> PAGE_SHIFT));
> + *(uint32_t *)(vaddr + page_offset) = MI_STORE_DWORD_INDEX;
> + *(uint32_t *)(vaddr + page_offset + 4) =
> + I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
> + *(uint32_t *)(vaddr + page_offset + 8) =
> + obj->ring->outstanding_lazy_seqno;
> + *(uint32_t *)(vaddr + page_offset + 12) = MI_USER_INTERRUPT;
> +
> + kunmap_atomic(vaddr);
> +
> + return 0;
> +}
> +
> +static int
> +emit_sync_obj_gtt(struct drm_i915_gem_object *obj,
> + struct drm_i915_gem_relocation_entry *reloc)
> +{
> + struct drm_device *dev = obj->base.dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + uint32_t __iomem *reloc_entry;
> + void __iomem *reloc_page;
> + int ret;
> +
> + ret = i915_gem_object_set_to_gtt_domain(obj, true);
> + if (ret)
> + return ret;
> +
> + ret = i915_gem_object_put_fence(obj);
> + if (ret)
> + return ret;
> +
> + /* Map the page containing the relocation we're going to perform. */
> + reloc->offset += i915_gem_obj_ggtt_offset(obj);
> + reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
> + reloc->offset & PAGE_MASK);
> +
> + reloc_entry = (uint32_t __iomem *)
> + (reloc_page + offset_in_page(reloc->offset));
> + iowrite32(MI_STORE_DWORD_INDEX, reloc_entry);
> + iowrite32(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT,
> + reloc_entry);
> + iowrite32(obj->ring->outstanding_lazy_seqno, reloc_entry);
> + iowrite32(MI_USER_INTERRUPT, reloc_entry);
> +
> + io_mapping_unmap_atomic(reloc_page);
These commands are illegal/invalid inside the object, only valid inside
the ring.
> + return 0;
> +}
>
> static int
> relocate_entry_cpu(struct drm_i915_gem_object *obj,
> @@ -349,7 +411,8 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
> static int
> i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
> struct eb_vmas *eb,
> - struct drm_i915_gem_relocation_entry *reloc)
> + struct drm_i915_gem_relocation_entry *reloc,
> + struct intel_context *ctx)
Hmm. That's a nuisance. But no, you only use it to automatically create
a fence not to patch the batch, so you can just use an object-flag.
This fits neatly into requests.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list