[Intel-gfx] [PATCH 2/2] drm/i915: allow sync points within batches

Chris Wilson chris at chris-wilson.co.uk
Wed Sep 3 09:01:55 CEST 2014


On Tue, Sep 02, 2014 at 02:32:41PM -0700, Jesse Barnes wrote:
> Use a new reloc type to allow userspace to insert sync points within
> batches before they're submitted.  The corresponding fence fds are
> returned in the offset field of the returned reloc tree, and can be
> operated on with the sync fence APIs.
> 
> Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |   4 +
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 125 ++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_sync.c           |  58 ++++++++++---
>  include/uapi/drm/i915_drm.h                |  11 ++-
>  4 files changed, 167 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6eb119e..410eedf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2284,6 +2284,10 @@ int i915_sync_init(struct drm_i915_private *dev_priv);
>  void i915_sync_fini(struct drm_i915_private *dev_priv);
>  int i915_sync_create_fence_ioctl(struct drm_device *dev, void *data,
>  				 struct drm_file *file);
> +int i915_sync_fence_create(struct intel_engine_cs *ring,
> +			   struct intel_context *ctx,
> +			   u32 seqno);
> +
>  
>  #define PIN_MAPPABLE 0x1
>  #define PIN_NONBLOCK 0x2
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 60998fc..32ec599 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -32,6 +32,7 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  #include <linux/dma_remapping.h>
> +#include "../../../staging/android/sync.h"
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> @@ -262,6 +263,67 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
>  		!obj->map_and_fenceable ||
>  		obj->cache_level != I915_CACHE_NONE);
>  }
> +static int
> +emit_sync_obj_cpu(struct drm_i915_gem_object *obj,
> +		  struct drm_i915_gem_relocation_entry *reloc)
> +{
> +	uint32_t page_offset = offset_in_page(reloc->offset);
> +	char *vaddr;
> +	int ret;
> +
> +	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> +	if (ret)
> +		return ret;
> +
> +	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
> +				reloc->offset >> PAGE_SHIFT));
> +	*(uint32_t *)(vaddr + page_offset) = MI_STORE_DWORD_INDEX;
> +	*(uint32_t *)(vaddr + page_offset + 4) =
> +		I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
> +	*(uint32_t *)(vaddr + page_offset + 8) =
> +		obj->ring->outstanding_lazy_seqno;
> +	*(uint32_t *)(vaddr + page_offset + 12) = MI_USER_INTERRUPT;
> +
> +	kunmap_atomic(vaddr);
> +
> +	return 0;
> +}
> +
> +static int
> +emit_sync_obj_gtt(struct drm_i915_gem_object *obj,
> +		  struct drm_i915_gem_relocation_entry *reloc)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	uint32_t __iomem *reloc_entry;
> +	void __iomem *reloc_page;
> +	int ret;
> +
> +	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> +	if (ret)
> +		return ret;
> +
> +	ret = i915_gem_object_put_fence(obj);
> +	if (ret)
> +		return ret;
> +
> +	/* Map the page containing the relocation we're going to perform.  */
> +	reloc->offset += i915_gem_obj_ggtt_offset(obj);
> +	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
> +			reloc->offset & PAGE_MASK);
> +
> +	reloc_entry = (uint32_t __iomem *)
> +		(reloc_page + offset_in_page(reloc->offset));
> +	iowrite32(MI_STORE_DWORD_INDEX, reloc_entry);
> +	iowrite32(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT,
> +		  reloc_entry);
> +	iowrite32(obj->ring->outstanding_lazy_seqno, reloc_entry);
> +	iowrite32(MI_USER_INTERRUPT, reloc_entry);
> +
> +	io_mapping_unmap_atomic(reloc_page);

These commands are illegal/invalid inside the object, only valid inside
the ring.

> +	return 0;
> +}
>  
>  static int
>  relocate_entry_cpu(struct drm_i915_gem_object *obj,
> @@ -349,7 +411,8 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
>  static int
>  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  				   struct eb_vmas *eb,
> -				   struct drm_i915_gem_relocation_entry *reloc)
> +				   struct drm_i915_gem_relocation_entry *reloc,
> +				   struct intel_context *ctx)

Hmm. That's a nuisance. But no, you only use it to automatically create
a fence not to patch the batch, so you can just use an object-flag.

This fits neatly into requests.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre



More information about the Intel-gfx mailing list