[Intel-gfx] [PATCH] drm/i915/gt: Split intel_ring_submission

Thu Oct 24 07:33:26 UTC 2019

Chris Wilson <chris at chris-wilson.co.uk> writes:

> Split the legacy submission backend from the common ring buffer.

Aye.

Didn't spot anything out of ordinary.

Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>

>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/Makefile                 |   5 +-
>  drivers/gpu/drm/i915/display/intel_overlay.c  |   1 +
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   |   3 +-
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |   1 +
>  .../gpu/drm/i915/gem/i915_gem_object_blt.c    |   1 +
>  .../i915/gem/selftests/i915_gem_coherency.c   |   1 +
>  drivers/gpu/drm/i915/gt/intel_context.c       |   1 +
>  drivers/gpu/drm/i915/gt/intel_context.h       |   1 +
>  drivers/gpu/drm/i915/gt/intel_engine.h        | 114 -------
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   1 +
>  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   1 +
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  27 +-
>  drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
>  drivers/gpu/drm/i915/gt/intel_mocs.c          |   1 +
>  drivers/gpu/drm/i915/gt/intel_renderstate.c   |   1 +
>  drivers/gpu/drm/i915/gt/intel_ring.c          | 314 ++++++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_ring.h          | 131 ++++++++
>  drivers/gpu/drm/i915/gt/intel_timeline.c      |   6 +-
>  drivers/gpu/drm/i915/gt/intel_workarounds.c   |   1 +
>  drivers/gpu/drm/i915/gt/mock_engine.c         |   1 +
>  drivers/gpu/drm/i915/gt/selftest_timeline.c   |   1 +
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   3 +-
>  drivers/gpu/drm/i915/gvt/cmd_parser.c         |   2 +
>  drivers/gpu/drm/i915/gvt/mmio_context.c       |   1 +
>  drivers/gpu/drm/i915/gvt/scheduler.c          |   1 +
>  drivers/gpu/drm/i915/i915_active.c            |   1 +
>  drivers/gpu/drm/i915/i915_perf.c              |   1 +
>  drivers/gpu/drm/i915/i915_request.c           |   1 +
>  28 files changed, 477 insertions(+), 147 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.c
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index a91e0a487a79..5021aa7fa187 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -90,11 +90,12 @@ gt-y += \
>  	gt/intel_gt_requests.o \
>  	gt/intel_llc.o \
>  	gt/intel_lrc.o \
> +	gt/intel_mocs.o \
>  	gt/intel_rc6.o \
>  	gt/intel_renderstate.o \
>  	gt/intel_reset.o \
> -	gt/intel_ringbuffer.o \
> -	gt/intel_mocs.o \
> +	gt/intel_ring.o \
> +	gt/intel_ring_submission.o \
>  	gt/intel_sseu.o \
>  	gt/intel_timeline.o \
>  	gt/intel_workarounds.o
> diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
> index 2360f19f9694..848ce07a8ec2 100644
> --- a/drivers/gpu/drm/i915/display/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/display/intel_overlay.c
> @@ -30,6 +30,7 @@
>  #include <drm/i915_drm.h>
>  
>  #include "gem/i915_gem_pm.h"
> +#include "gt/intel_ring.h"
>  
>  #include "i915_drv.h"
>  #include "i915_reg.h"
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index df528e48e566..a03bee30fac1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -69,9 +69,10 @@
>  
>  #include <drm/i915_drm.h>
>  
> -#include "gt/intel_lrc_reg.h"
>  #include "gt/intel_engine_heartbeat.h"
>  #include "gt/intel_engine_user.h"
> +#include "gt/intel_lrc_reg.h"
> +#include "gt/intel_ring.h"
>  
>  #include "i915_gem_context.h"
>  #include "i915_globals.h"
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 99d79f94e641..c88948e4094b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -19,6 +19,7 @@
>  #include "gt/intel_engine_pool.h"
>  #include "gt/intel_gt.h"
>  #include "gt/intel_gt_pm.h"
> +#include "gt/intel_ring.h"
>  
>  #include "i915_drv.h"
>  #include "i915_gem_clflush.h"
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> index 5bd8de124d74..516e61e99212 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> @@ -8,6 +8,7 @@
>  #include "gt/intel_engine_pm.h"
>  #include "gt/intel_engine_pool.h"
>  #include "gt/intel_gt.h"
> +#include "gt/intel_ring.h"
>  #include "i915_gem_clflush.h"
>  #include "i915_gem_object_blt.h"
>  
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> index 549810f70aeb..0877ef4dff63 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> @@ -8,6 +8,7 @@
>  
>  #include "gt/intel_gt.h"
>  #include "gt/intel_gt_pm.h"
> +#include "gt/intel_ring.h"
>  
>  #include "i915_selftest.h"
>  #include "selftests/i915_random.h"
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 59c3083c1ec1..ee9d2bcd2c13 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -13,6 +13,7 @@
>  #include "intel_context.h"
>  #include "intel_engine.h"
>  #include "intel_engine_pm.h"
> +#include "intel_ring.h"
>  
>  static struct i915_global_context {
>  	struct i915_global base;
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index dd742ac2fbdb..68b3d317d959 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -12,6 +12,7 @@
>  #include "i915_active.h"
>  #include "intel_context_types.h"
>  #include "intel_engine_types.h"
> +#include "intel_ring_types.h"
>  #include "intel_timeline_types.h"
>  
>  void intel_context_init(struct intel_context *ce,
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index c7f93d05c8e0..d77b9f9f096c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -19,7 +19,6 @@
>  #include "intel_workarounds.h"
>  
>  struct drm_printer;
> -
>  struct intel_gt;
>  
>  /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
> @@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
>  #define I915_HWS_CSB_WRITE_INDEX	0x1f
>  #define CNL_HWS_CSB_WRITE_INDEX		0x2f
>  
> -struct intel_ring *
> -intel_engine_create_ring(struct intel_engine_cs *engine, int size);
> -int intel_ring_pin(struct intel_ring *ring);
> -void intel_ring_reset(struct intel_ring *ring, u32 tail);
> -unsigned int intel_ring_update_space(struct intel_ring *ring);
> -void intel_ring_unpin(struct intel_ring *ring);
> -void intel_ring_free(struct kref *ref);
> -
> -static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
> -{
> -	kref_get(&ring->ref);
> -	return ring;
> -}
> -
> -static inline void intel_ring_put(struct intel_ring *ring)
> -{
> -	kref_put(&ring->ref, intel_ring_free);
> -}
> -
>  void intel_engine_stop(struct intel_engine_cs *engine);
>  void intel_engine_cleanup(struct intel_engine_cs *engine);
>  
> -int __must_check intel_ring_cacheline_align(struct i915_request *rq);
> -
> -u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
> -
> -static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
> -{
> -	/* Dummy function.
> -	 *
> -	 * This serves as a placeholder in the code so that the reader
> -	 * can compare against the preceding intel_ring_begin() and
> -	 * check that the number of dwords emitted matches the space
> -	 * reserved for the command packet (i.e. the value passed to
> -	 * intel_ring_begin()).
> -	 */
> -	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
> -}
> -
> -static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
> -{
> -	return pos & (ring->size - 1);
> -}
> -
> -static inline bool
> -intel_ring_offset_valid(const struct intel_ring *ring,
> -			unsigned int pos)
> -{
> -	if (pos & -ring->size) /* must be strictly within the ring */
> -		return false;
> -
> -	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
> -		return false;
> -
> -	return true;
> -}
> -
> -static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
> -{
> -	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
> -	u32 offset = addr - rq->ring->vaddr;
> -	GEM_BUG_ON(offset > rq->ring->size);
> -	return intel_ring_wrap(rq->ring, offset);
> -}
> -
> -static inline void
> -assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
> -{
> -	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
> -
> -	/*
> -	 * "Ring Buffer Use"
> -	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
> -	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
> -	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
> -	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> -	 * same cacheline, the Head Pointer must not be greater than the Tail
> -	 * Pointer."
> -	 *
> -	 * We use ring->head as the last known location of the actual RING_HEAD,
> -	 * it may have advanced but in the worst case it is equally the same
> -	 * as ring->head and so we should never program RING_TAIL to advance
> -	 * into the same cacheline as ring->head.
> -	 */
> -#define cacheline(a) round_down(a, CACHELINE_BYTES)
> -	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
> -		   tail < ring->head);
> -#undef cacheline
> -}
> -
> -static inline unsigned int
> -intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
> -{
> -	/* Whilst writes to the tail are strictly order, there is no
> -	 * serialisation between readers and the writers. The tail may be
> -	 * read by i915_request_retire() just as it is being updated
> -	 * by execlists, as although the breadcrumb is complete, the context
> -	 * switch hasn't been seen.
> -	 */
> -	assert_ring_tail_valid(ring, tail);
> -	ring->tail = tail;
> -	return tail;
> -}
> -
> -static inline unsigned int
> -__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
> -{
> -	/*
> -	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> -	 * same cacheline, the Head Pointer must not be greater than the Tail
> -	 * Pointer."
> -	 */
> -	GEM_BUG_ON(!is_power_of_2(size));
> -	return (head - tail - CACHELINE_BYTES) & (size - 1);
> -}
> -
>  int intel_engines_init_mmio(struct intel_gt *gt);
>  int intel_engines_setup(struct intel_gt *gt);
>  int intel_engines_init(struct intel_gt *gt);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index cd4caf54c59c..2afa2ef90482 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -37,6 +37,7 @@
>  #include "intel_context.h"
>  #include "intel_lrc.h"
>  #include "intel_reset.h"
> +#include "intel_ring.h"
>  
>  /* Haswell does have the CXT_SIZE register however it does not appear to be
>   * valid. Now, docs explain in dwords what is in the context object. The full
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 6fbfa2162e54..3c0f490ff2c7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -13,6 +13,7 @@
>  #include "intel_gt.h"
>  #include "intel_gt_pm.h"
>  #include "intel_rc6.h"
> +#include "intel_ring.h"
>  
>  static int __engine_unpark(struct intel_wakeref *wf)
>  {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index fbe89bfd3d4f..c5d1047a4bc5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -59,6 +59,7 @@ struct i915_gem_context;
>  struct i915_request;
>  struct i915_sched_attr;
>  struct intel_gt;
> +struct intel_ring;
>  struct intel_uncore;
>  
>  typedef u8 intel_engine_mask_t;
> @@ -77,32 +78,6 @@ struct intel_instdone {
>  	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
>  };
>  
> -struct intel_ring {
> -	struct kref ref;
> -	struct i915_vma *vma;
> -	void *vaddr;
> -
> -	/*
> -	 * As we have two types of rings, one global to the engine used
> -	 * by ringbuffer submission and those that are exclusive to a
> -	 * context used by execlists, we have to play safe and allow
> -	 * atomic updates to the pin_count. However, the actual pinning
> -	 * of the context is either done during initialisation for
> -	 * ringbuffer submission or serialised as part of the context
> -	 * pinning for execlists, and so we do not need a mutex ourselves
> -	 * to serialise intel_ring_pin/intel_ring_unpin.
> -	 */
> -	atomic_t pin_count;
> -
> -	u32 head;
> -	u32 tail;
> -	u32 emit;
> -
> -	u32 space;
> -	u32 size;
> -	u32 effective_size;
> -};
> -
>  /*
>   * we use a single page to load ctx workarounds so all of these
>   * values are referred in terms of dwords
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 8d8fecc69809..9d59debfd168 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -145,6 +145,7 @@
>  #include "intel_lrc_reg.h"
>  #include "intel_mocs.h"
>  #include "intel_reset.h"
> +#include "intel_ring.h"
>  #include "intel_workarounds.h"
>  
>  #define RING_EXECLIST_QFULL		(1 << 0x2)
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 06dba7ff294e..6d4c665a997d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -26,6 +26,7 @@
>  #include "intel_gt.h"
>  #include "intel_mocs.h"
>  #include "intel_lrc.h"
> +#include "intel_ring.h"
>  
>  /* structures required */
>  struct drm_i915_mocs_entry {
> diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
> index 6d05f9c64178..c4edc35e7d89 100644
> --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
> @@ -27,6 +27,7 @@
>  
>  #include "i915_drv.h"
>  #include "intel_renderstate.h"
> +#include "intel_ring.h"
>  
>  struct intel_renderstate {
>  	const struct intel_renderstate_rodata *rodata;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
> new file mode 100644
> index 000000000000..98876b55f851
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_ring.c
> @@ -0,0 +1,314 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "gem/i915_gem_object.h"
> +#include "i915_drv.h"
> +#include "i915_vma.h"
> +#include "intel_engine.h"
> +#include "intel_ring.h"
> +#include "intel_timeline.h"
> +
> +unsigned int intel_ring_update_space(struct intel_ring *ring)
> +{
> +	unsigned int space;
> +
> +	space = __intel_ring_space(ring->head, ring->emit, ring->size);
> +
> +	ring->space = space;
> +	return space;
> +}
> +
> +int intel_ring_pin(struct intel_ring *ring)
> +{
> +	struct i915_vma *vma = ring->vma;
> +	unsigned int flags;
> +	void *addr;
> +	int ret;
> +
> +	if (atomic_fetch_inc(&ring->pin_count))
> +		return 0;
> +
> +	flags = PIN_GLOBAL;
> +
> +	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
> +	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> +
> +	if (vma->obj->stolen)
> +		flags |= PIN_MAPPABLE;
> +	else
> +		flags |= PIN_HIGH;
> +
> +	ret = i915_vma_pin(vma, 0, 0, flags);
> +	if (unlikely(ret))
> +		goto err_unpin;
> +
> +	if (i915_vma_is_map_and_fenceable(vma))
> +		addr = (void __force *)i915_vma_pin_iomap(vma);
> +	else
> +		addr = i915_gem_object_pin_map(vma->obj,
> +					       i915_coherent_map_type(vma->vm->i915));
> +	if (IS_ERR(addr)) {
> +		ret = PTR_ERR(addr);
> +		goto err_ring;
> +	}
> +
> +	i915_vma_make_unshrinkable(vma);
> +
> +	GEM_BUG_ON(ring->vaddr);
> +	ring->vaddr = addr;
> +
> +	return 0;
> +
> +err_ring:
> +	i915_vma_unpin(vma);
> +err_unpin:
> +	atomic_dec(&ring->pin_count);
> +	return ret;
> +}
> +
> +void intel_ring_reset(struct intel_ring *ring, u32 tail)
> +{
> +	tail = intel_ring_wrap(ring, tail);
> +	ring->tail = tail;
> +	ring->head = tail;
> +	ring->emit = tail;
> +	intel_ring_update_space(ring);
> +}
> +
> +void intel_ring_unpin(struct intel_ring *ring)
> +{
> +	if (!atomic_dec_and_test(&ring->pin_count))
> +		return;
> +
> +	/* Discard any unused bytes beyond that submitted to hw. */
> +	intel_ring_reset(ring, ring->emit);
> +
> +	GEM_BUG_ON(!ring->vma);
> +	i915_vma_unset_ggtt_write(ring->vma);
> +	if (i915_vma_is_map_and_fenceable(ring->vma))
> +		i915_vma_unpin_iomap(ring->vma);
> +	else
> +		i915_gem_object_unpin_map(ring->vma->obj);
> +
> +	GEM_BUG_ON(!ring->vaddr);
> +	ring->vaddr = NULL;
> +
> +	i915_vma_unpin(ring->vma);
> +	i915_vma_make_purgeable(ring->vma);
> +}
> +
> +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
> +{
> +	struct i915_address_space *vm = &ggtt->vm;
> +	struct drm_i915_private *i915 = vm->i915;
> +	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
> +
> +	obj = i915_gem_object_create_stolen(i915, size);
> +	if (!obj)
> +		obj = i915_gem_object_create_internal(i915, size);
> +	if (IS_ERR(obj))
> +		return ERR_CAST(obj);
> +
> +	/*
> +	 * Mark ring buffers as read-only from GPU side (so no stray overwrites)
> +	 * if supported by the platform's GGTT.
> +	 */
> +	if (vm->has_read_only)
> +		i915_gem_object_set_readonly(obj);
> +
> +	vma = i915_vma_instance(obj, vm, NULL);
> +	i915_gem_object_put(obj);
> +
> +	return vma;
> +}
> +
> +struct intel_ring *
> +intel_engine_create_ring(struct intel_engine_cs *engine, int size)
> +{
> +	struct drm_i915_private *i915 = engine->i915;
> +	struct intel_ring *ring;
> +	struct i915_vma *vma;
> +
> +	GEM_BUG_ON(!is_power_of_2(size));
> +	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
> +
> +	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
> +	if (!ring)
> +		return ERR_PTR(-ENOMEM);
> +
> +	kref_init(&ring->ref);
> +
> +	ring->size = size;
> +	/* Workaround an erratum on the i830 which causes a hang if
> +	 * the TAIL pointer points to within the last 2 cachelines
> +	 * of the buffer.
> +	 */
> +	ring->effective_size = size;
> +	if (IS_I830(i915) || IS_I845G(i915))
> +		ring->effective_size -= 2 * CACHELINE_BYTES;
> +
> +	intel_ring_update_space(ring);
> +
> +	vma = create_ring_vma(engine->gt->ggtt, size);
> +	if (IS_ERR(vma)) {
> +		kfree(ring);
> +		return ERR_CAST(vma);
> +	}
> +	ring->vma = vma;
> +
> +	return ring;
> +}
> +
> +void intel_ring_free(struct kref *ref)
> +{
> +	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
> +
> +	i915_vma_put(ring->vma);
> +
> +	kfree(ring);
> +}
> +
> +static noinline int
> +wait_for_space(struct intel_ring *ring,
> +	       struct intel_timeline *tl,
> +	       unsigned int bytes)
> +{
> +	struct i915_request *target;
> +	long timeout;
> +
> +	if (intel_ring_update_space(ring) >= bytes)
> +		return 0;
> +
> +	GEM_BUG_ON(list_empty(&tl->requests));
> +	list_for_each_entry(target, &tl->requests, link) {
> +		if (target->ring != ring)
> +			continue;
> +
> +		/* Would completion of this request free enough space? */
> +		if (bytes <= __intel_ring_space(target->postfix,
> +						ring->emit, ring->size))
> +			break;
> +	}
> +
> +	if (GEM_WARN_ON(&target->link == &tl->requests))
> +		return -ENOSPC;
> +
> +	timeout = i915_request_wait(target,
> +				    I915_WAIT_INTERRUPTIBLE,
> +				    MAX_SCHEDULE_TIMEOUT);
> +	if (timeout < 0)
> +		return timeout;
> +
> +	i915_request_retire_upto(target);
> +
> +	intel_ring_update_space(ring);
> +	GEM_BUG_ON(ring->space < bytes);
> +	return 0;
> +}
> +
> +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
> +{
> +	struct intel_ring *ring = rq->ring;
> +	const unsigned int remain_usable = ring->effective_size - ring->emit;
> +	const unsigned int bytes = num_dwords * sizeof(u32);
> +	unsigned int need_wrap = 0;
> +	unsigned int total_bytes;
> +	u32 *cs;
> +
> +	/* Packets must be qword aligned. */
> +	GEM_BUG_ON(num_dwords & 1);
> +
> +	total_bytes = bytes + rq->reserved_space;
> +	GEM_BUG_ON(total_bytes > ring->effective_size);
> +
> +	if (unlikely(total_bytes > remain_usable)) {
> +		const int remain_actual = ring->size - ring->emit;
> +
> +		if (bytes > remain_usable) {
> +			/*
> +			 * Not enough space for the basic request. So need to
> +			 * flush out the remainder and then wait for
> +			 * base + reserved.
> +			 */
> +			total_bytes += remain_actual;
> +			need_wrap = remain_actual | 1;
> +		} else  {
> +			/*
> +			 * The base request will fit but the reserved space
> +			 * falls off the end. So we don't need an immediate
> +			 * wrap and only need to effectively wait for the
> +			 * reserved size from the start of ringbuffer.
> +			 */
> +			total_bytes = rq->reserved_space + remain_actual;
> +		}
> +	}
> +
> +	if (unlikely(total_bytes > ring->space)) {
> +		int ret;
> +
> +		/*
> +		 * Space is reserved in the ringbuffer for finalising the
> +		 * request, as that cannot be allowed to fail. During request
> +		 * finalisation, reserved_space is set to 0 to stop the
> +		 * overallocation and the assumption is that then we never need
> +		 * to wait (which has the risk of failing with EINTR).
> +		 *
> +		 * See also i915_request_alloc() and i915_request_add().
> +		 */
> +		GEM_BUG_ON(!rq->reserved_space);
> +
> +		ret = wait_for_space(ring, rq->timeline, total_bytes);
> +		if (unlikely(ret))
> +			return ERR_PTR(ret);
> +	}
> +
> +	if (unlikely(need_wrap)) {
> +		need_wrap &= ~1;
> +		GEM_BUG_ON(need_wrap > ring->space);
> +		GEM_BUG_ON(ring->emit + need_wrap > ring->size);
> +		GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
> +
> +		/* Fill the tail with MI_NOOP */
> +		memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
> +		ring->space -= need_wrap;
> +		ring->emit = 0;
> +	}
> +
> +	GEM_BUG_ON(ring->emit > ring->size - bytes);
> +	GEM_BUG_ON(ring->space < bytes);
> +	cs = ring->vaddr + ring->emit;
> +	GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
> +	ring->emit += bytes;
> +	ring->space -= bytes;
> +
> +	return cs;
> +}
> +
> +/* Align the ring tail to a cacheline boundary */
> +int intel_ring_cacheline_align(struct i915_request *rq)
> +{
> +	int num_dwords;
> +	void *cs;
> +
> +	num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
> +	if (num_dwords == 0)
> +		return 0;
> +
> +	num_dwords = CACHELINE_DWORDS - num_dwords;
> +	GEM_BUG_ON(num_dwords & 1);
> +
> +	cs = intel_ring_begin(rq, num_dwords);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
> +	intel_ring_advance(rq, cs + num_dwords);
> +
> +	GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
> +	return 0;
> +}
> +
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
> new file mode 100644
> index 000000000000..ea2839d9e044
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_ring.h
> @@ -0,0 +1,131 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_RING_H
> +#define INTEL_RING_H
> +
> +#include "i915_gem.h" /* GEM_BUG_ON */
> +#include "i915_request.h"
> +#include "intel_ring_types.h"
> +
> +struct intel_engine_cs;
> +
> +struct intel_ring *
> +intel_engine_create_ring(struct intel_engine_cs *engine, int size);
> +
> +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
> +int intel_ring_cacheline_align(struct i915_request *rq);
> +
> +unsigned int intel_ring_update_space(struct intel_ring *ring);
> +
> +int intel_ring_pin(struct intel_ring *ring);
> +void intel_ring_unpin(struct intel_ring *ring);
> +void intel_ring_reset(struct intel_ring *ring, u32 tail);
> +
> +void intel_ring_free(struct kref *ref);
> +
> +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
> +{
> +	kref_get(&ring->ref);
> +	return ring;
> +}
> +
> +static inline void intel_ring_put(struct intel_ring *ring)
> +{
> +	kref_put(&ring->ref, intel_ring_free);
> +}
> +
> +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
> +{
> +	/* Dummy function.
> +	 *
> +	 * This serves as a placeholder in the code so that the reader
> +	 * can compare against the preceding intel_ring_begin() and
> +	 * check that the number of dwords emitted matches the space
> +	 * reserved for the command packet (i.e. the value passed to
> +	 * intel_ring_begin()).
> +	 */
> +	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
> +}
> +
> +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
> +{
> +	return pos & (ring->size - 1);
> +}
> +
> +static inline bool
> +intel_ring_offset_valid(const struct intel_ring *ring,
> +			unsigned int pos)
> +{
> +	if (pos & -ring->size) /* must be strictly within the ring */
> +		return false;
> +
> +	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
> +		return false;
> +
> +	return true;
> +}
> +
> +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
> +{
> +	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
> +	u32 offset = addr - rq->ring->vaddr;
> +	GEM_BUG_ON(offset > rq->ring->size);
> +	return intel_ring_wrap(rq->ring, offset);
> +}
> +
> +static inline void
> +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
> +{
> +	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
> +
> +	/*
> +	 * "Ring Buffer Use"
> +	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
> +	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
> +	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
> +	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> +	 * same cacheline, the Head Pointer must not be greater than the Tail
> +	 * Pointer."
> +	 *
> +	 * We use ring->head as the last known location of the actual RING_HEAD,
> +	 * it may have advanced but in the worst case it is equally the same
> +	 * as ring->head and so we should never program RING_TAIL to advance
> +	 * into the same cacheline as ring->head.
> +	 */
> +#define cacheline(a) round_down(a, CACHELINE_BYTES)
> +	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
> +		   tail < ring->head);
> +#undef cacheline
> +}
> +
> +static inline unsigned int
> +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
> +{
> +	/* Whilst writes to the tail are strictly order, there is no
> +	 * serialisation between readers and the writers. The tail may be
> +	 * read by i915_request_retire() just as it is being updated
> +	 * by execlists, as although the breadcrumb is complete, the context
> +	 * switch hasn't been seen.
> +	 */
> +	assert_ring_tail_valid(ring, tail);
> +	ring->tail = tail;
> +	return tail;
> +}
> +
> +static inline unsigned int
> +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
> +{
> +	/*
> +	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> +	 * same cacheline, the Head Pointer must not be greater than the Tail
> +	 * Pointer."
> +	 */
> +	GEM_BUG_ON(!is_power_of_2(size));
> +	return (head - tail - CACHELINE_BYTES) & (size - 1);
> +}
> +
> +#endif /* INTEL_RING_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 0f959694303c..14ad10acd548 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -4,13 +4,13 @@
>   * Copyright © 2016-2018 Intel Corporation
>   */
>  
> -#include "gt/intel_gt_types.h"
> -
>  #include "i915_drv.h"
>  
>  #include "i915_active.h"
>  #include "i915_syncmap.h"
> -#include "gt/intel_timeline.h"
> +#include "intel_gt.h"
> +#include "intel_ring.h"
> +#include "intel_timeline.h"
>  
>  #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
>  #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index af8a8183154a..7cb6dab4399d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -7,6 +7,7 @@
>  #include "i915_drv.h"
>  #include "intel_context.h"
>  #include "intel_gt.h"
> +#include "intel_ring.h"
>  #include "intel_workarounds.h"
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index 123db2c3f956..83f549d203a0 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -23,6 +23,7 @@
>   */
>  
>  #include "gem/i915_gem_context.h"
> +#include "gt/intel_ring.h"
>  
>  #include "i915_drv.h"
>  #include "intel_context.h"
> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> index dac86f699a4c..f04a59fe5d2c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> @@ -9,6 +9,7 @@
>  #include "intel_engine_pm.h"
>  #include "intel_gt.h"
>  #include "intel_gt_requests.h"
> +#include "intel_ring.h"
>  
>  #include "../selftests/i915_random.h"
>  #include "../i915_selftest.h"
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 009e54a3764f..1b1691aaed28 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -6,12 +6,13 @@
>  #include <linux/circ_buf.h>
>  
>  #include "gem/i915_gem_context.h"
> -
>  #include "gt/intel_context.h"
>  #include "gt/intel_engine_pm.h"
>  #include "gt/intel_gt.h"
>  #include "gt/intel_gt_pm.h"
>  #include "gt/intel_lrc_reg.h"
> +#include "gt/intel_ring.h"
> +
>  #include "intel_guc_submission.h"
>  
>  #include "i915_drv.h"
> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> index e753b1e706e2..6a3ac8cde95d 100644
> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> @@ -35,7 +35,9 @@
>   */
>  
>  #include <linux/slab.h>
> +
>  #include "i915_drv.h"
> +#include "gt/intel_ring.h"
>  #include "gvt.h"
>  #include "i915_pvinfo.h"
>  #include "trace.h"
> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
> index 4208e40445b1..aaf15916d29a 100644
> --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
> @@ -35,6 +35,7 @@
>  
>  #include "i915_drv.h"
>  #include "gt/intel_context.h"
> +#include "gt/intel_ring.h"
>  #include "gvt.h"
>  #include "trace.h"
>  
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index a5b942ee3ceb..377811f8853f 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -38,6 +38,7 @@
>  #include "gem/i915_gem_context.h"
>  #include "gem/i915_gem_pm.h"
>  #include "gt/intel_context.h"
> +#include "gt/intel_ring.h"
>  
>  #include "i915_drv.h"
>  #include "gvt.h"
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 4c190a548ba7..3e3495838a93 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -8,6 +8,7 @@
>  
>  #include "gt/intel_engine_heartbeat.h"
>  #include "gt/intel_engine_pm.h"
> +#include "gt/intel_ring.h"
>  
>  #include "i915_drv.h"
>  #include "i915_active.h"
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 3130b0c7ed83..38d3de2dfaa6 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -200,6 +200,7 @@
>  #include "gt/intel_engine_user.h"
>  #include "gt/intel_gt.h"
>  #include "gt/intel_lrc_reg.h"
> +#include "gt/intel_ring.h"
>  
>  #include "i915_drv.h"
>  #include "i915_perf.h"
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 4575f368455d..932c5cf190b5 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -31,6 +31,7 @@
>  
>  #include "gem/i915_gem_context.h"
>  #include "gt/intel_context.h"
> +#include "gt/intel_ring.h"
>  
>  #include "i915_active.h"
>  #include "i915_drv.h"
> -- 
> 2.24.0.rc0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx