[Intel-gfx] [PATCH 2/2] drm/i915: allow sync points within batches
Jesse Barnes
jbarnes at virtuousgeek.org
Wed Sep 3 17:41:06 CEST 2014
On Wed, 3 Sep 2014 08:01:55 +0100
Chris Wilson <chris at chris-wilson.co.uk> wrote:
> On Tue, Sep 02, 2014 at 02:32:41PM -0700, Jesse Barnes wrote:
> > Use a new reloc type to allow userspace to insert sync points within
> > batches before they're submitted. The corresponding fence fds are
> > returned in the offset field of the returned reloc tree, and can be
> > operated on with the sync fence APIs.
> >
> > Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
> > ---
> > drivers/gpu/drm/i915/i915_drv.h | 4 +
> > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 125 ++++++++++++++++++++++++-----
> > drivers/gpu/drm/i915/i915_sync.c | 58 ++++++++++---
> > include/uapi/drm/i915_drm.h | 11 ++-
> > 4 files changed, 167 insertions(+), 31 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 6eb119e..410eedf 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2284,6 +2284,10 @@ int i915_sync_init(struct drm_i915_private *dev_priv);
> > void i915_sync_fini(struct drm_i915_private *dev_priv);
> > int i915_sync_create_fence_ioctl(struct drm_device *dev, void *data,
> > struct drm_file *file);
> > +int i915_sync_fence_create(struct intel_engine_cs *ring,
> > + struct intel_context *ctx,
> > + u32 seqno);
> > +
> >
> > #define PIN_MAPPABLE 0x1
> > #define PIN_NONBLOCK 0x2
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 60998fc..32ec599 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -32,6 +32,7 @@
> > #include "i915_trace.h"
> > #include "intel_drv.h"
> > #include <linux/dma_remapping.h>
> > +#include "../../../staging/android/sync.h"
> >
> > #define __EXEC_OBJECT_HAS_PIN (1<<31)
> > #define __EXEC_OBJECT_HAS_FENCE (1<<30)
> > @@ -262,6 +263,67 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
> > !obj->map_and_fenceable ||
> > obj->cache_level != I915_CACHE_NONE);
> > }
> > +static int
> > +emit_sync_obj_cpu(struct drm_i915_gem_object *obj,
> > + struct drm_i915_gem_relocation_entry *reloc)
> > +{
> > + uint32_t page_offset = offset_in_page(reloc->offset);
> > + char *vaddr;
> > + int ret;
> > +
> > + ret = i915_gem_object_set_to_cpu_domain(obj, true);
> > + if (ret)
> > + return ret;
> > +
> > + vaddr = kmap_atomic(i915_gem_object_get_page(obj,
> > + reloc->offset >> PAGE_SHIFT));
> > + *(uint32_t *)(vaddr + page_offset) = MI_STORE_DWORD_INDEX;
> > + *(uint32_t *)(vaddr + page_offset + 4) =
> > + I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
> > + *(uint32_t *)(vaddr + page_offset + 8) =
> > + obj->ring->outstanding_lazy_seqno;
> > + *(uint32_t *)(vaddr + page_offset + 12) = MI_USER_INTERRUPT;
> > +
> > + kunmap_atomic(vaddr);
> > +
> > + return 0;
> > +}
> > +
> > +static int
> > +emit_sync_obj_gtt(struct drm_i915_gem_object *obj,
> > + struct drm_i915_gem_relocation_entry *reloc)
> > +{
> > + struct drm_device *dev = obj->base.dev;
> > + struct drm_i915_private *dev_priv = dev->dev_private;
> > + uint32_t __iomem *reloc_entry;
> > + void __iomem *reloc_page;
> > + int ret;
> > +
> > + ret = i915_gem_object_set_to_gtt_domain(obj, true);
> > + if (ret)
> > + return ret;
> > +
> > + ret = i915_gem_object_put_fence(obj);
> > + if (ret)
> > + return ret;
> > +
> > + /* Map the page containing the relocation we're going to perform. */
> > + reloc->offset += i915_gem_obj_ggtt_offset(obj);
> > + reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
> > + reloc->offset & PAGE_MASK);
> > +
> > + reloc_entry = (uint32_t __iomem *)
> > + (reloc_page + offset_in_page(reloc->offset));
> > + iowrite32(MI_STORE_DWORD_INDEX, reloc_entry);
> > + iowrite32(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT,
> > + reloc_entry);
> > + iowrite32(obj->ring->outstanding_lazy_seqno, reloc_entry);
> > + iowrite32(MI_USER_INTERRUPT, reloc_entry);
> > +
> > + io_mapping_unmap_atomic(reloc_page);
>
> These commands are illegal/invalid inside the object, only valid inside
> the ring.
Hm, we ought to be able to write to no privileged space with
STORE_DWORD, but that does mean moving to context specific pages in
process space, or at least adding them to our existing scheme.
I haven't tried MI_USER_INTERRUPT from a batch, if we can't do it from
a non-privileged batch that nixes one of the other neat features we
could have (fine grained intra-batch userspace synchronization).
> > + return 0;
> > +}
> >
> > static int
> > relocate_entry_cpu(struct drm_i915_gem_object *obj,
> > @@ -349,7 +411,8 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
> > static int
> > i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
> > struct eb_vmas *eb,
> > - struct drm_i915_gem_relocation_entry *reloc)
> > + struct drm_i915_gem_relocation_entry *reloc,
> > + struct intel_context *ctx)
>
> Hmm. That's a nuisance. But no, you only use it to automatically create
> a fence not to patch the batch, so you can just use an object-flag.
>
> This fits neatly into requests.
Most definitely. What do you think of the potential upside in the DDX
for this, assuming we get dword writes from batches working?
--
Jesse Barnes, Intel Open Source Technology Center
More information about the Intel-gfx
mailing list