[Intel-gfx] [PATCH] drm/i915/gt: Split intel_ring_submission
Mika Kuoppala
mika.kuoppala at linux.intel.com
Thu Oct 24 07:33:26 UTC 2019
Chris Wilson <chris at chris-wilson.co.uk> writes:
> Split the legacy submission backend from the common ring buffer.
Aye.
Didn't spot anything out of ordinary.
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/Makefile | 5 +-
> drivers/gpu/drm/i915/display/intel_overlay.c | 1 +
> drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +-
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 1 +
> .../gpu/drm/i915/gem/i915_gem_object_blt.c | 1 +
> .../i915/gem/selftests/i915_gem_coherency.c | 1 +
> drivers/gpu/drm/i915/gt/intel_context.c | 1 +
> drivers/gpu/drm/i915/gt/intel_context.h | 1 +
> drivers/gpu/drm/i915/gt/intel_engine.h | 114 -------
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 +
> drivers/gpu/drm/i915/gt/intel_engine_pm.c | 1 +
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 27 +-
> drivers/gpu/drm/i915/gt/intel_lrc.c | 1 +
> drivers/gpu/drm/i915/gt/intel_mocs.c | 1 +
> drivers/gpu/drm/i915/gt/intel_renderstate.c | 1 +
> drivers/gpu/drm/i915/gt/intel_ring.c | 314 ++++++++++++++++++
> drivers/gpu/drm/i915/gt/intel_ring.h | 131 ++++++++
> drivers/gpu/drm/i915/gt/intel_timeline.c | 6 +-
> drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 +
> drivers/gpu/drm/i915/gt/mock_engine.c | 1 +
> drivers/gpu/drm/i915/gt/selftest_timeline.c | 1 +
> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 3 +-
> drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +
> drivers/gpu/drm/i915/gvt/mmio_context.c | 1 +
> drivers/gpu/drm/i915/gvt/scheduler.c | 1 +
> drivers/gpu/drm/i915/i915_active.c | 1 +
> drivers/gpu/drm/i915/i915_perf.c | 1 +
> drivers/gpu/drm/i915/i915_request.c | 1 +
> 28 files changed, 477 insertions(+), 147 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.c
> create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index a91e0a487a79..5021aa7fa187 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -90,11 +90,12 @@ gt-y += \
> gt/intel_gt_requests.o \
> gt/intel_llc.o \
> gt/intel_lrc.o \
> + gt/intel_mocs.o \
> gt/intel_rc6.o \
> gt/intel_renderstate.o \
> gt/intel_reset.o \
> - gt/intel_ringbuffer.o \
> - gt/intel_mocs.o \
> + gt/intel_ring.o \
> + gt/intel_ring_submission.o \
> gt/intel_sseu.o \
> gt/intel_timeline.o \
> gt/intel_workarounds.o
> diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
> index 2360f19f9694..848ce07a8ec2 100644
> --- a/drivers/gpu/drm/i915/display/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/display/intel_overlay.c
> @@ -30,6 +30,7 @@
> #include <drm/i915_drm.h>
>
> #include "gem/i915_gem_pm.h"
> +#include "gt/intel_ring.h"
>
> #include "i915_drv.h"
> #include "i915_reg.h"
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index df528e48e566..a03bee30fac1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -69,9 +69,10 @@
>
> #include <drm/i915_drm.h>
>
> -#include "gt/intel_lrc_reg.h"
> #include "gt/intel_engine_heartbeat.h"
> #include "gt/intel_engine_user.h"
> +#include "gt/intel_lrc_reg.h"
> +#include "gt/intel_ring.h"
>
> #include "i915_gem_context.h"
> #include "i915_globals.h"
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 99d79f94e641..c88948e4094b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -19,6 +19,7 @@
> #include "gt/intel_engine_pool.h"
> #include "gt/intel_gt.h"
> #include "gt/intel_gt_pm.h"
> +#include "gt/intel_ring.h"
>
> #include "i915_drv.h"
> #include "i915_gem_clflush.h"
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> index 5bd8de124d74..516e61e99212 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> @@ -8,6 +8,7 @@
> #include "gt/intel_engine_pm.h"
> #include "gt/intel_engine_pool.h"
> #include "gt/intel_gt.h"
> +#include "gt/intel_ring.h"
> #include "i915_gem_clflush.h"
> #include "i915_gem_object_blt.h"
>
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> index 549810f70aeb..0877ef4dff63 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
> @@ -8,6 +8,7 @@
>
> #include "gt/intel_gt.h"
> #include "gt/intel_gt_pm.h"
> +#include "gt/intel_ring.h"
>
> #include "i915_selftest.h"
> #include "selftests/i915_random.h"
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 59c3083c1ec1..ee9d2bcd2c13 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -13,6 +13,7 @@
> #include "intel_context.h"
> #include "intel_engine.h"
> #include "intel_engine_pm.h"
> +#include "intel_ring.h"
>
> static struct i915_global_context {
> struct i915_global base;
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index dd742ac2fbdb..68b3d317d959 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -12,6 +12,7 @@
> #include "i915_active.h"
> #include "intel_context_types.h"
> #include "intel_engine_types.h"
> +#include "intel_ring_types.h"
> #include "intel_timeline_types.h"
>
> void intel_context_init(struct intel_context *ce,
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index c7f93d05c8e0..d77b9f9f096c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -19,7 +19,6 @@
> #include "intel_workarounds.h"
>
> struct drm_printer;
> -
> struct intel_gt;
>
> /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
> @@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
> #define I915_HWS_CSB_WRITE_INDEX 0x1f
> #define CNL_HWS_CSB_WRITE_INDEX 0x2f
>
> -struct intel_ring *
> -intel_engine_create_ring(struct intel_engine_cs *engine, int size);
> -int intel_ring_pin(struct intel_ring *ring);
> -void intel_ring_reset(struct intel_ring *ring, u32 tail);
> -unsigned int intel_ring_update_space(struct intel_ring *ring);
> -void intel_ring_unpin(struct intel_ring *ring);
> -void intel_ring_free(struct kref *ref);
> -
> -static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
> -{
> - kref_get(&ring->ref);
> - return ring;
> -}
> -
> -static inline void intel_ring_put(struct intel_ring *ring)
> -{
> - kref_put(&ring->ref, intel_ring_free);
> -}
> -
> void intel_engine_stop(struct intel_engine_cs *engine);
> void intel_engine_cleanup(struct intel_engine_cs *engine);
>
> -int __must_check intel_ring_cacheline_align(struct i915_request *rq);
> -
> -u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
> -
> -static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
> -{
> - /* Dummy function.
> - *
> - * This serves as a placeholder in the code so that the reader
> - * can compare against the preceding intel_ring_begin() and
> - * check that the number of dwords emitted matches the space
> - * reserved for the command packet (i.e. the value passed to
> - * intel_ring_begin()).
> - */
> - GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
> -}
> -
> -static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
> -{
> - return pos & (ring->size - 1);
> -}
> -
> -static inline bool
> -intel_ring_offset_valid(const struct intel_ring *ring,
> - unsigned int pos)
> -{
> - if (pos & -ring->size) /* must be strictly within the ring */
> - return false;
> -
> - if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
> - return false;
> -
> - return true;
> -}
> -
> -static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
> -{
> - /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
> - u32 offset = addr - rq->ring->vaddr;
> - GEM_BUG_ON(offset > rq->ring->size);
> - return intel_ring_wrap(rq->ring, offset);
> -}
> -
> -static inline void
> -assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
> -{
> - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
> -
> - /*
> - * "Ring Buffer Use"
> - * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
> - * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
> - * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
> - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> - * same cacheline, the Head Pointer must not be greater than the Tail
> - * Pointer."
> - *
> - * We use ring->head as the last known location of the actual RING_HEAD,
> - * it may have advanced but in the worst case it is equally the same
> - * as ring->head and so we should never program RING_TAIL to advance
> - * into the same cacheline as ring->head.
> - */
> -#define cacheline(a) round_down(a, CACHELINE_BYTES)
> - GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
> - tail < ring->head);
> -#undef cacheline
> -}
> -
> -static inline unsigned int
> -intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
> -{
> - /* Whilst writes to the tail are strictly order, there is no
> - * serialisation between readers and the writers. The tail may be
> - * read by i915_request_retire() just as it is being updated
> - * by execlists, as although the breadcrumb is complete, the context
> - * switch hasn't been seen.
> - */
> - assert_ring_tail_valid(ring, tail);
> - ring->tail = tail;
> - return tail;
> -}
> -
> -static inline unsigned int
> -__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
> -{
> - /*
> - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> - * same cacheline, the Head Pointer must not be greater than the Tail
> - * Pointer."
> - */
> - GEM_BUG_ON(!is_power_of_2(size));
> - return (head - tail - CACHELINE_BYTES) & (size - 1);
> -}
> -
> int intel_engines_init_mmio(struct intel_gt *gt);
> int intel_engines_setup(struct intel_gt *gt);
> int intel_engines_init(struct intel_gt *gt);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index cd4caf54c59c..2afa2ef90482 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -37,6 +37,7 @@
> #include "intel_context.h"
> #include "intel_lrc.h"
> #include "intel_reset.h"
> +#include "intel_ring.h"
>
> /* Haswell does have the CXT_SIZE register however it does not appear to be
> * valid. Now, docs explain in dwords what is in the context object. The full
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 6fbfa2162e54..3c0f490ff2c7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -13,6 +13,7 @@
> #include "intel_gt.h"
> #include "intel_gt_pm.h"
> #include "intel_rc6.h"
> +#include "intel_ring.h"
>
> static int __engine_unpark(struct intel_wakeref *wf)
> {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index fbe89bfd3d4f..c5d1047a4bc5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -59,6 +59,7 @@ struct i915_gem_context;
> struct i915_request;
> struct i915_sched_attr;
> struct intel_gt;
> +struct intel_ring;
> struct intel_uncore;
>
> typedef u8 intel_engine_mask_t;
> @@ -77,32 +78,6 @@ struct intel_instdone {
> u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
> };
>
> -struct intel_ring {
> - struct kref ref;
> - struct i915_vma *vma;
> - void *vaddr;
> -
> - /*
> - * As we have two types of rings, one global to the engine used
> - * by ringbuffer submission and those that are exclusive to a
> - * context used by execlists, we have to play safe and allow
> - * atomic updates to the pin_count. However, the actual pinning
> - * of the context is either done during initialisation for
> - * ringbuffer submission or serialised as part of the context
> - * pinning for execlists, and so we do not need a mutex ourselves
> - * to serialise intel_ring_pin/intel_ring_unpin.
> - */
> - atomic_t pin_count;
> -
> - u32 head;
> - u32 tail;
> - u32 emit;
> -
> - u32 space;
> - u32 size;
> - u32 effective_size;
> -};
> -
> /*
> * we use a single page to load ctx workarounds so all of these
> * values are referred in terms of dwords
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 8d8fecc69809..9d59debfd168 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -145,6 +145,7 @@
> #include "intel_lrc_reg.h"
> #include "intel_mocs.h"
> #include "intel_reset.h"
> +#include "intel_ring.h"
> #include "intel_workarounds.h"
>
> #define RING_EXECLIST_QFULL (1 << 0x2)
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 06dba7ff294e..6d4c665a997d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -26,6 +26,7 @@
> #include "intel_gt.h"
> #include "intel_mocs.h"
> #include "intel_lrc.h"
> +#include "intel_ring.h"
>
> /* structures required */
> struct drm_i915_mocs_entry {
> diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
> index 6d05f9c64178..c4edc35e7d89 100644
> --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
> @@ -27,6 +27,7 @@
>
> #include "i915_drv.h"
> #include "intel_renderstate.h"
> +#include "intel_ring.h"
>
> struct intel_renderstate {
> const struct intel_renderstate_rodata *rodata;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
> new file mode 100644
> index 000000000000..98876b55f851
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_ring.c
> @@ -0,0 +1,314 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "gem/i915_gem_object.h"
> +#include "i915_drv.h"
> +#include "i915_vma.h"
> +#include "intel_engine.h"
> +#include "intel_ring.h"
> +#include "intel_timeline.h"
> +
> +unsigned int intel_ring_update_space(struct intel_ring *ring)
> +{
> + unsigned int space;
> +
> + space = __intel_ring_space(ring->head, ring->emit, ring->size);
> +
> + ring->space = space;
> + return space;
> +}
> +
> +int intel_ring_pin(struct intel_ring *ring)
> +{
> + struct i915_vma *vma = ring->vma;
> + unsigned int flags;
> + void *addr;
> + int ret;
> +
> + if (atomic_fetch_inc(&ring->pin_count))
> + return 0;
> +
> + flags = PIN_GLOBAL;
> +
> + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
> + flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> +
> + if (vma->obj->stolen)
> + flags |= PIN_MAPPABLE;
> + else
> + flags |= PIN_HIGH;
> +
> + ret = i915_vma_pin(vma, 0, 0, flags);
> + if (unlikely(ret))
> + goto err_unpin;
> +
> + if (i915_vma_is_map_and_fenceable(vma))
> + addr = (void __force *)i915_vma_pin_iomap(vma);
> + else
> + addr = i915_gem_object_pin_map(vma->obj,
> + i915_coherent_map_type(vma->vm->i915));
> + if (IS_ERR(addr)) {
> + ret = PTR_ERR(addr);
> + goto err_ring;
> + }
> +
> + i915_vma_make_unshrinkable(vma);
> +
> + GEM_BUG_ON(ring->vaddr);
> + ring->vaddr = addr;
> +
> + return 0;
> +
> +err_ring:
> + i915_vma_unpin(vma);
> +err_unpin:
> + atomic_dec(&ring->pin_count);
> + return ret;
> +}
> +
> +void intel_ring_reset(struct intel_ring *ring, u32 tail)
> +{
> + tail = intel_ring_wrap(ring, tail);
> + ring->tail = tail;
> + ring->head = tail;
> + ring->emit = tail;
> + intel_ring_update_space(ring);
> +}
> +
> +void intel_ring_unpin(struct intel_ring *ring)
> +{
> + if (!atomic_dec_and_test(&ring->pin_count))
> + return;
> +
> + /* Discard any unused bytes beyond that submitted to hw. */
> + intel_ring_reset(ring, ring->emit);
> +
> + GEM_BUG_ON(!ring->vma);
> + i915_vma_unset_ggtt_write(ring->vma);
> + if (i915_vma_is_map_and_fenceable(ring->vma))
> + i915_vma_unpin_iomap(ring->vma);
> + else
> + i915_gem_object_unpin_map(ring->vma->obj);
> +
> + GEM_BUG_ON(!ring->vaddr);
> + ring->vaddr = NULL;
> +
> + i915_vma_unpin(ring->vma);
> + i915_vma_make_purgeable(ring->vma);
> +}
> +
> +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
> +{
> + struct i915_address_space *vm = &ggtt->vm;
> + struct drm_i915_private *i915 = vm->i915;
> + struct drm_i915_gem_object *obj;
> + struct i915_vma *vma;
> +
> + obj = i915_gem_object_create_stolen(i915, size);
> + if (!obj)
> + obj = i915_gem_object_create_internal(i915, size);
> + if (IS_ERR(obj))
> + return ERR_CAST(obj);
> +
> + /*
> + * Mark ring buffers as read-only from GPU side (so no stray overwrites)
> + * if supported by the platform's GGTT.
> + */
> + if (vm->has_read_only)
> + i915_gem_object_set_readonly(obj);
> +
> + vma = i915_vma_instance(obj, vm, NULL);
> + i915_gem_object_put(obj);
> +
> + return vma;
> +}
> +
> +struct intel_ring *
> +intel_engine_create_ring(struct intel_engine_cs *engine, int size)
> +{
> + struct drm_i915_private *i915 = engine->i915;
> + struct intel_ring *ring;
> + struct i915_vma *vma;
> +
> + GEM_BUG_ON(!is_power_of_2(size));
> + GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
> +
> + ring = kzalloc(sizeof(*ring), GFP_KERNEL);
> + if (!ring)
> + return ERR_PTR(-ENOMEM);
> +
> + kref_init(&ring->ref);
> +
> + ring->size = size;
> + /* Workaround an erratum on the i830 which causes a hang if
> + * the TAIL pointer points to within the last 2 cachelines
> + * of the buffer.
> + */
> + ring->effective_size = size;
> + if (IS_I830(i915) || IS_I845G(i915))
> + ring->effective_size -= 2 * CACHELINE_BYTES;
> +
> + intel_ring_update_space(ring);
> +
> + vma = create_ring_vma(engine->gt->ggtt, size);
> + if (IS_ERR(vma)) {
> + kfree(ring);
> + return ERR_CAST(vma);
> + }
> + ring->vma = vma;
> +
> + return ring;
> +}
> +
> +void intel_ring_free(struct kref *ref)
> +{
> + struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
> +
> + i915_vma_put(ring->vma);
> +
> + kfree(ring);
> +}
> +
> +static noinline int
> +wait_for_space(struct intel_ring *ring,
> + struct intel_timeline *tl,
> + unsigned int bytes)
> +{
> + struct i915_request *target;
> + long timeout;
> +
> + if (intel_ring_update_space(ring) >= bytes)
> + return 0;
> +
> + GEM_BUG_ON(list_empty(&tl->requests));
> + list_for_each_entry(target, &tl->requests, link) {
> + if (target->ring != ring)
> + continue;
> +
> + /* Would completion of this request free enough space? */
> + if (bytes <= __intel_ring_space(target->postfix,
> + ring->emit, ring->size))
> + break;
> + }
> +
> + if (GEM_WARN_ON(&target->link == &tl->requests))
> + return -ENOSPC;
> +
> + timeout = i915_request_wait(target,
> + I915_WAIT_INTERRUPTIBLE,
> + MAX_SCHEDULE_TIMEOUT);
> + if (timeout < 0)
> + return timeout;
> +
> + i915_request_retire_upto(target);
> +
> + intel_ring_update_space(ring);
> + GEM_BUG_ON(ring->space < bytes);
> + return 0;
> +}
> +
> +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
> +{
> + struct intel_ring *ring = rq->ring;
> + const unsigned int remain_usable = ring->effective_size - ring->emit;
> + const unsigned int bytes = num_dwords * sizeof(u32);
> + unsigned int need_wrap = 0;
> + unsigned int total_bytes;
> + u32 *cs;
> +
> + /* Packets must be qword aligned. */
> + GEM_BUG_ON(num_dwords & 1);
> +
> + total_bytes = bytes + rq->reserved_space;
> + GEM_BUG_ON(total_bytes > ring->effective_size);
> +
> + if (unlikely(total_bytes > remain_usable)) {
> + const int remain_actual = ring->size - ring->emit;
> +
> + if (bytes > remain_usable) {
> + /*
> + * Not enough space for the basic request. So need to
> + * flush out the remainder and then wait for
> + * base + reserved.
> + */
> + total_bytes += remain_actual;
> + need_wrap = remain_actual | 1;
> + } else {
> + /*
> + * The base request will fit but the reserved space
> + * falls off the end. So we don't need an immediate
> + * wrap and only need to effectively wait for the
> + * reserved size from the start of ringbuffer.
> + */
> + total_bytes = rq->reserved_space + remain_actual;
> + }
> + }
> +
> + if (unlikely(total_bytes > ring->space)) {
> + int ret;
> +
> + /*
> + * Space is reserved in the ringbuffer for finalising the
> + * request, as that cannot be allowed to fail. During request
> + * finalisation, reserved_space is set to 0 to stop the
> + * overallocation and the assumption is that then we never need
> + * to wait (which has the risk of failing with EINTR).
> + *
> + * See also i915_request_alloc() and i915_request_add().
> + */
> + GEM_BUG_ON(!rq->reserved_space);
> +
> + ret = wait_for_space(ring, rq->timeline, total_bytes);
> + if (unlikely(ret))
> + return ERR_PTR(ret);
> + }
> +
> + if (unlikely(need_wrap)) {
> + need_wrap &= ~1;
> + GEM_BUG_ON(need_wrap > ring->space);
> + GEM_BUG_ON(ring->emit + need_wrap > ring->size);
> + GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
> +
> + /* Fill the tail with MI_NOOP */
> + memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
> + ring->space -= need_wrap;
> + ring->emit = 0;
> + }
> +
> + GEM_BUG_ON(ring->emit > ring->size - bytes);
> + GEM_BUG_ON(ring->space < bytes);
> + cs = ring->vaddr + ring->emit;
> + GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
> + ring->emit += bytes;
> + ring->space -= bytes;
> +
> + return cs;
> +}
> +
> +/* Align the ring tail to a cacheline boundary */
> +int intel_ring_cacheline_align(struct i915_request *rq)
> +{
> + int num_dwords;
> + void *cs;
> +
> + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
> + if (num_dwords == 0)
> + return 0;
> +
> + num_dwords = CACHELINE_DWORDS - num_dwords;
> + GEM_BUG_ON(num_dwords & 1);
> +
> + cs = intel_ring_begin(rq, num_dwords);
> + if (IS_ERR(cs))
> + return PTR_ERR(cs);
> +
> + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
> + intel_ring_advance(rq, cs + num_dwords);
> +
> + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
> + return 0;
> +}
> +
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
> new file mode 100644
> index 000000000000..ea2839d9e044
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_ring.h
> @@ -0,0 +1,131 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_RING_H
> +#define INTEL_RING_H
> +
> +#include "i915_gem.h" /* GEM_BUG_ON */
> +#include "i915_request.h"
> +#include "intel_ring_types.h"
> +
> +struct intel_engine_cs;
> +
> +struct intel_ring *
> +intel_engine_create_ring(struct intel_engine_cs *engine, int size);
> +
> +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
> +int intel_ring_cacheline_align(struct i915_request *rq);
> +
> +unsigned int intel_ring_update_space(struct intel_ring *ring);
> +
> +int intel_ring_pin(struct intel_ring *ring);
> +void intel_ring_unpin(struct intel_ring *ring);
> +void intel_ring_reset(struct intel_ring *ring, u32 tail);
> +
> +void intel_ring_free(struct kref *ref);
> +
> +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
> +{
> + kref_get(&ring->ref);
> + return ring;
> +}
> +
> +static inline void intel_ring_put(struct intel_ring *ring)
> +{
> + kref_put(&ring->ref, intel_ring_free);
> +}
> +
> +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
> +{
> + /* Dummy function.
> + *
> + * This serves as a placeholder in the code so that the reader
> + * can compare against the preceding intel_ring_begin() and
> + * check that the number of dwords emitted matches the space
> + * reserved for the command packet (i.e. the value passed to
> + * intel_ring_begin()).
> + */
> + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
> +}
> +
> +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
> +{
> + return pos & (ring->size - 1);
> +}
> +
> +static inline bool
> +intel_ring_offset_valid(const struct intel_ring *ring,
> + unsigned int pos)
> +{
> + if (pos & -ring->size) /* must be strictly within the ring */
> + return false;
> +
> + if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
> + return false;
> +
> + return true;
> +}
> +
> +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
> +{
> + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
> + u32 offset = addr - rq->ring->vaddr;
> + GEM_BUG_ON(offset > rq->ring->size);
> + return intel_ring_wrap(rq->ring, offset);
> +}
> +
> +static inline void
> +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
> +{
> + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
> +
> + /*
> + * "Ring Buffer Use"
> + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
> + * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
> + * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
> + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> + * same cacheline, the Head Pointer must not be greater than the Tail
> + * Pointer."
> + *
> + * We use ring->head as the last known location of the actual RING_HEAD,
> + * it may have advanced but in the worst case it is equally the same
> + * as ring->head and so we should never program RING_TAIL to advance
> + * into the same cacheline as ring->head.
> + */
> +#define cacheline(a) round_down(a, CACHELINE_BYTES)
> + GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
> + tail < ring->head);
> +#undef cacheline
> +}
> +
> +static inline unsigned int
> +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
> +{
> + /* Whilst writes to the tail are strictly order, there is no
> + * serialisation between readers and the writers. The tail may be
> + * read by i915_request_retire() just as it is being updated
> + * by execlists, as although the breadcrumb is complete, the context
> + * switch hasn't been seen.
> + */
> + assert_ring_tail_valid(ring, tail);
> + ring->tail = tail;
> + return tail;
> +}
> +
> +static inline unsigned int
> +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
> +{
> + /*
> + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> + * same cacheline, the Head Pointer must not be greater than the Tail
> + * Pointer."
> + */
> + GEM_BUG_ON(!is_power_of_2(size));
> + return (head - tail - CACHELINE_BYTES) & (size - 1);
> +}
> +
> +#endif /* INTEL_RING_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 0f959694303c..14ad10acd548 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -4,13 +4,13 @@
> * Copyright © 2016-2018 Intel Corporation
> */
>
> -#include "gt/intel_gt_types.h"
> -
> #include "i915_drv.h"
>
> #include "i915_active.h"
> #include "i915_syncmap.h"
> -#include "gt/intel_timeline.h"
> +#include "intel_gt.h"
> +#include "intel_ring.h"
> +#include "intel_timeline.h"
>
> #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
> #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index af8a8183154a..7cb6dab4399d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -7,6 +7,7 @@
> #include "i915_drv.h"
> #include "intel_context.h"
> #include "intel_gt.h"
> +#include "intel_ring.h"
> #include "intel_workarounds.h"
>
> /**
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index 123db2c3f956..83f549d203a0 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -23,6 +23,7 @@
> */
>
> #include "gem/i915_gem_context.h"
> +#include "gt/intel_ring.h"
>
> #include "i915_drv.h"
> #include "intel_context.h"
> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> index dac86f699a4c..f04a59fe5d2c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> @@ -9,6 +9,7 @@
> #include "intel_engine_pm.h"
> #include "intel_gt.h"
> #include "intel_gt_requests.h"
> +#include "intel_ring.h"
>
> #include "../selftests/i915_random.h"
> #include "../i915_selftest.h"
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 009e54a3764f..1b1691aaed28 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -6,12 +6,13 @@
> #include <linux/circ_buf.h>
>
> #include "gem/i915_gem_context.h"
> -
> #include "gt/intel_context.h"
> #include "gt/intel_engine_pm.h"
> #include "gt/intel_gt.h"
> #include "gt/intel_gt_pm.h"
> #include "gt/intel_lrc_reg.h"
> +#include "gt/intel_ring.h"
> +
> #include "intel_guc_submission.h"
>
> #include "i915_drv.h"
> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> index e753b1e706e2..6a3ac8cde95d 100644
> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> @@ -35,7 +35,9 @@
> */
>
> #include <linux/slab.h>
> +
> #include "i915_drv.h"
> +#include "gt/intel_ring.h"
> #include "gvt.h"
> #include "i915_pvinfo.h"
> #include "trace.h"
> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
> index 4208e40445b1..aaf15916d29a 100644
> --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
> @@ -35,6 +35,7 @@
>
> #include "i915_drv.h"
> #include "gt/intel_context.h"
> +#include "gt/intel_ring.h"
> #include "gvt.h"
> #include "trace.h"
>
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index a5b942ee3ceb..377811f8853f 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -38,6 +38,7 @@
> #include "gem/i915_gem_context.h"
> #include "gem/i915_gem_pm.h"
> #include "gt/intel_context.h"
> +#include "gt/intel_ring.h"
>
> #include "i915_drv.h"
> #include "gvt.h"
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 4c190a548ba7..3e3495838a93 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -8,6 +8,7 @@
>
> #include "gt/intel_engine_heartbeat.h"
> #include "gt/intel_engine_pm.h"
> +#include "gt/intel_ring.h"
>
> #include "i915_drv.h"
> #include "i915_active.h"
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 3130b0c7ed83..38d3de2dfaa6 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -200,6 +200,7 @@
> #include "gt/intel_engine_user.h"
> #include "gt/intel_gt.h"
> #include "gt/intel_lrc_reg.h"
> +#include "gt/intel_ring.h"
>
> #include "i915_drv.h"
> #include "i915_perf.h"
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 4575f368455d..932c5cf190b5 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -31,6 +31,7 @@
>
> #include "gem/i915_gem_context.h"
> #include "gt/intel_context.h"
> +#include "gt/intel_ring.h"
>
> #include "i915_active.h"
> #include "i915_drv.h"
> --
> 2.24.0.rc0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list