[Intel-gfx] [PATCH 15/38] drm/i915: Allocate a status page for each timeline
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jan 21 11:18:30 UTC 2019
On 18/01/2019 14:00, Chris Wilson wrote:
> Allocate a page for use as a status page by a group of timelines, as we
> only need a dword of storage for each (rounded up to the cacheline for
> safety) we can pack multiple timelines into the same page. Each timeline
> will then be able to track its own HW seqno.
>
> v2: Reuse the common per-engine HWSP for the solitary ringbuffer
> timeline, so that we do not have to emit (using per-gen specialised
> vfuncs) the breadcrumb into the distinct timeline HWSP and instead can
> keep on using the common MI_STORE_DWORD_INDEX. However, to maintain the
> sleight-of-hand for the global/per-context seqno switchover, we will
> store both temporarily (and so use a custom offset for the shared timeline
> HWSP until the switch over).
>
> v3: Keep things simple and allocate a page for each timeline, page
> sharing comes next.
>
> v4: I was caught repeating the same MI_STORE_DWORD_IMM over and over
> again in selftests.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_timeline.c | 106 +++++-
> drivers/gpu/drm/i915/i915_timeline.h | 21 +-
> drivers/gpu/drm/i915/intel_engine_cs.c | 64 ++--
> drivers/gpu/drm/i915/intel_lrc.c | 22 +-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +-
> drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +-
> .../drm/i915/selftests/i915_live_selftests.h | 1 +
> .../drm/i915/selftests/i915_mock_selftests.h | 2 +-
> .../gpu/drm/i915/selftests/i915_timeline.c | 314 +++++++++++++++++-
> drivers/gpu/drm/i915/selftests/mock_engine.c | 17 +-
> 10 files changed, 512 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index 84550f17d3df..a7d902e9eaf1 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -9,11 +9,38 @@
> #include "i915_timeline.h"
> #include "i915_syncmap.h"
>
> -void i915_timeline_init(struct drm_i915_private *i915,
> - struct i915_timeline *timeline,
> - const char *name)
> +static int hwsp_alloc(struct i915_timeline *timeline)
> +{
> + struct drm_i915_private *i915 = timeline->i915;
> + struct drm_i915_gem_object *obj;
> + struct i915_vma *vma;
> +
> + obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
> + if (IS_ERR(obj))
> + return PTR_ERR(obj);
> +
> + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> +
> + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
> + if (IS_ERR(vma)) {
> + i915_gem_object_put(obj);
> + return PTR_ERR(vma);
> + }
> +
> + timeline->hwsp_ggtt = vma;
> + timeline->hwsp_offset = 0;
> +
> + return 0;
> +}
> +
> +int i915_timeline_init(struct drm_i915_private *i915,
> + struct i915_timeline *timeline,
> + const char *name,
> + struct i915_vma *global_hwsp)
> {
> struct i915_gt_timelines *gt = &i915->gt.timelines;
> + void *vaddr;
> + int err;
>
> /*
> * Ideally we want a set of engines on a single leaf as we expect
> @@ -25,10 +52,27 @@ void i915_timeline_init(struct drm_i915_private *i915,
>
> timeline->i915 = i915;
> timeline->name = name;
> + timeline->pin_count = 0;
> +
> + if (global_hwsp) {
> + timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
> + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
> + } else {
> + err = hwsp_alloc(timeline);
> + if (err)
> + return err;
> + }
>
> - mutex_lock(>->mutex);
> - list_add(&timeline->link, >->list);
> - mutex_unlock(>->mutex);
> + vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
> + if (IS_ERR(vaddr)) {
> + i915_vma_put(timeline->hwsp_ggtt);
> + return PTR_ERR(vaddr);
> + }
> +
> + timeline->hwsp_seqno =
> + memset(vaddr + timeline->hwsp_offset,
> + 0,
> + sizeof(*timeline->hwsp_seqno));
>
> /* Called during early_init before we know how many engines there are */
>
> @@ -40,6 +84,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
> INIT_LIST_HEAD(&timeline->requests);
>
> i915_syncmap_init(&timeline->sync);
> +
> + mutex_lock(>->mutex);
> + list_add(&timeline->link, >->list);
> + mutex_unlock(>->mutex);
> +
> + return 0;
> }
>
> void i915_timelines_init(struct drm_i915_private *i915)
> @@ -85,6 +135,7 @@ void i915_timeline_fini(struct i915_timeline *timeline)
> {
> struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
>
> + GEM_BUG_ON(timeline->pin_count);
> GEM_BUG_ON(!list_empty(&timeline->requests));
>
> i915_syncmap_free(&timeline->sync);
> @@ -92,23 +143,62 @@ void i915_timeline_fini(struct i915_timeline *timeline)
> mutex_lock(>->mutex);
> list_del(&timeline->link);
> mutex_unlock(>->mutex);
> +
> + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
> + i915_vma_put(timeline->hwsp_ggtt);
> }
>
> struct i915_timeline *
> -i915_timeline_create(struct drm_i915_private *i915, const char *name)
> +i915_timeline_create(struct drm_i915_private *i915,
> + const char *name,
> + struct i915_vma *global_hwsp)
> {
> struct i915_timeline *timeline;
> + int err;
>
> timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
> if (!timeline)
> return ERR_PTR(-ENOMEM);
>
> - i915_timeline_init(i915, timeline, name);
> + err = i915_timeline_init(i915, timeline, name, global_hwsp);
> + if (err) {
> + kfree(timeline);
> + return ERR_PTR(err);
> + }
> +
> kref_init(&timeline->kref);
>
> return timeline;
> }
>
> +int i915_timeline_pin(struct i915_timeline *tl)
> +{
> + int err;
> +
> + if (tl->pin_count++)
> + return 0;
> + GEM_BUG_ON(!tl->pin_count);
> +
> + err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
> + if (err)
> + goto unpin;
> +
> + return 0;
> +
> +unpin:
> + tl->pin_count = 0;
> + return err;
> +}
> +
> +void i915_timeline_unpin(struct i915_timeline *tl)
> +{
> + GEM_BUG_ON(!tl->pin_count);
> + if (--tl->pin_count)
> + return;
> +
> + __i915_vma_unpin(tl->hwsp_ggtt);
> +}
> +
> void __i915_timeline_free(struct kref *kref)
> {
> struct i915_timeline *timeline =
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index 87ad2dd31c20..0c3739d53d79 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -32,6 +32,8 @@
> #include "i915_syncmap.h"
> #include "i915_utils.h"
>
> +struct i915_vma;
> +
> struct i915_timeline {
> u64 fence_context;
> u32 seqno;
> @@ -40,6 +42,11 @@ struct i915_timeline {
> #define TIMELINE_CLIENT 0 /* default subclass */
> #define TIMELINE_ENGINE 1
>
> + unsigned int pin_count;
> + const u32 *hwsp_seqno;
> + struct i915_vma *hwsp_ggtt;
> + u32 hwsp_offset;
> +
> /**
> * List of breadcrumbs associated with GPU requests currently
> * outstanding.
> @@ -71,9 +78,10 @@ struct i915_timeline {
> struct kref kref;
> };
>
> -void i915_timeline_init(struct drm_i915_private *i915,
> - struct i915_timeline *tl,
> - const char *name);
> +int i915_timeline_init(struct drm_i915_private *i915,
> + struct i915_timeline *tl,
> + const char *name,
> + struct i915_vma *hwsp);
> void i915_timeline_fini(struct i915_timeline *tl);
>
> static inline void
> @@ -96,7 +104,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline,
> }
>
> struct i915_timeline *
> -i915_timeline_create(struct drm_i915_private *i915, const char *name);
> +i915_timeline_create(struct drm_i915_private *i915,
> + const char *name,
> + struct i915_vma *global_hwsp);
>
> static inline struct i915_timeline *
> i915_timeline_get(struct i915_timeline *timeline)
> @@ -135,6 +145,9 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
> return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
> }
>
> +int i915_timeline_pin(struct i915_timeline *tl);
> +void i915_timeline_unpin(struct i915_timeline *tl);
> +
> void i915_timelines_init(struct drm_i915_private *i915);
> void i915_timelines_park(struct drm_i915_private *i915);
> void i915_timelines_fini(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 4b4b7358c482..c850d131d8c3 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -484,26 +484,6 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> execlists->queue = RB_ROOT_CACHED;
> }
>
> -/**
> - * intel_engines_setup_common - setup engine state not requiring hw access
> - * @engine: Engine to setup.
> - *
> - * Initializes @engine@ structure members shared between legacy and execlists
> - * submission modes which do not require hardware access.
> - *
> - * Typically done early in the submission mode specific engine setup stage.
> - */
> -void intel_engine_setup_common(struct intel_engine_cs *engine)
> -{
> - i915_timeline_init(engine->i915, &engine->timeline, engine->name);
> - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
> -
> - intel_engine_init_execlist(engine);
> - intel_engine_init_hangcheck(engine);
> - intel_engine_init_batch_pool(engine);
> - intel_engine_init_cmd_parser(engine);
> -}
> -
> static void cleanup_status_page(struct intel_engine_cs *engine)
> {
> struct i915_vma *vma;
> @@ -601,6 +581,44 @@ static int init_status_page(struct intel_engine_cs *engine)
> return ret;
> }
>
> +/**
> + * intel_engines_setup_common - setup engine state not requiring hw access
> + * @engine: Engine to setup.
> + *
> + * Initializes @engine@ structure members shared between legacy and execlists
> + * submission modes which do not require hardware access.
> + *
> + * Typically done early in the submission mode specific engine setup stage.
> + */
> +int intel_engine_setup_common(struct intel_engine_cs *engine)
> +{
> + int err;
> +
> + err = init_status_page(engine);
> + if (err)
> + return err;
> +
> + err = i915_timeline_init(engine->i915,
> + &engine->timeline,
> + engine->name,
> + engine->status_page.vma);
> + if (err)
> + goto err_hwsp;
> +
> + i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
> +
> + intel_engine_init_execlist(engine);
> + intel_engine_init_hangcheck(engine);
> + intel_engine_init_batch_pool(engine);
> + intel_engine_init_cmd_parser(engine);
> +
> + return 0;
> +
> +err_hwsp:
> + cleanup_status_page(engine);
> + return err;
> +}
> +
> static void __intel_context_unpin(struct i915_gem_context *ctx,
> struct intel_engine_cs *engine)
> {
> @@ -653,14 +671,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
> if (ret)
> goto err_unpin_preempt;
>
> - ret = init_status_page(engine);
> - if (ret)
> - goto err_breadcrumbs;
> -
> return 0;
>
> -err_breadcrumbs:
> - intel_engine_fini_breadcrumbs(engine);
> err_unpin_preempt:
> if (i915->preempt_context)
> __intel_context_unpin(i915->preempt_context, engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index dabebb68759f..ba59241add47 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2199,10 +2199,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
> engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
> }
>
> -static void
> +static int
> logical_ring_setup(struct intel_engine_cs *engine)
> {
> - intel_engine_setup_common(engine);
> + int err;
> +
> + err = intel_engine_setup_common(engine);
> + if (err)
> + return err;
>
> /* Intentionally left blank. */
> engine->buffer = NULL;
> @@ -2212,6 +2216,8 @@ logical_ring_setup(struct intel_engine_cs *engine)
>
> logical_ring_default_vfuncs(engine);
> logical_ring_default_irqs(engine);
> +
> + return 0;
> }
>
> static int logical_ring_init(struct intel_engine_cs *engine)
> @@ -2260,7 +2266,9 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
> {
> int ret;
>
> - logical_ring_setup(engine);
> + ret = logical_ring_setup(engine);
> + if (ret)
> + return ret;
>
> /* Override some for render ring. */
> engine->init_context = gen8_init_rcs_context;
> @@ -2290,7 +2298,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
>
> int logical_xcs_ring_init(struct intel_engine_cs *engine)
> {
> - logical_ring_setup(engine);
> + int err;
> +
> + err = logical_ring_setup(engine);
> + if (err)
> + return err;
>
> return logical_ring_init(engine);
> }
> @@ -2624,7 +2636,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
> goto error_deref_obj;
> }
>
> - timeline = i915_timeline_create(ctx->i915, ctx->name);
> + timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
> if (IS_ERR(timeline)) {
> ret = PTR_ERR(timeline);
> goto error_deref_obj;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index d72012b42f20..5887304bc3ae 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1539,9 +1539,13 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
> struct intel_ring *ring;
> int err;
>
> - intel_engine_setup_common(engine);
> + err = intel_engine_setup_common(engine);
> + if (err)
> + return err;
>
> - timeline = i915_timeline_create(engine->i915, engine->name);
> + timeline = i915_timeline_create(engine->i915,
> + engine->name,
> + engine->status_page.vma);
> if (IS_ERR(timeline)) {
> err = PTR_ERR(timeline);
> goto err;
> @@ -1565,6 +1569,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
> if (err)
> goto err_unpin;
>
> + GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
> +
> return 0;
>
> err_unpin:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d1a82610e0c1..de0099ea926b 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -720,7 +720,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
> #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
> #define I915_GEM_HWS_PREEMPT_INDEX 0x32
> #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
> -#define I915_GEM_HWS_SCRATCH_INDEX 0x40
> +#define I915_GEM_HWS_SEQNO 0x40
> +#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO << MI_STORE_DWORD_INDEX_SHIFT)
> +#define I915_GEM_HWS_SCRATCH_INDEX 0x80
> #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
>
> #define I915_HWS_CSB_BUF0_INDEX 0x10
> @@ -826,7 +828,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
>
> void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
>
> -void intel_engine_setup_common(struct intel_engine_cs *engine);
> +int intel_engine_setup_common(struct intel_engine_cs *engine);
> int intel_engine_init_common(struct intel_engine_cs *engine);
> void intel_engine_cleanup_common(struct intel_engine_cs *engine);
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> index a15713cae3b3..76b4f87fc853 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> @@ -13,6 +13,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
> selftest(uncore, intel_uncore_live_selftests)
> selftest(workarounds, intel_workarounds_live_selftests)
> selftest(requests, i915_request_live_selftests)
> +selftest(timelines, i915_timeline_live_selftests)
> selftest(objects, i915_gem_object_live_selftests)
> selftest(dmabuf, i915_gem_dmabuf_live_selftests)
> selftest(coherency, i915_gem_coherency_live_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 1b70208eeea7..4a83a1c6c406 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -16,7 +16,7 @@ selftest(syncmap, i915_syncmap_mock_selftests)
> selftest(uncore, intel_uncore_mock_selftests)
> selftest(engine, intel_engine_cs_mock_selftests)
> selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
> -selftest(timelines, i915_gem_timeline_mock_selftests)
> +selftest(timelines, i915_timeline_mock_selftests)
> selftest(requests, i915_request_mock_selftests)
> selftest(objects, i915_gem_object_mock_selftests)
> selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
> index 19f1c6a5c8fb..15a33ec7217d 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
> @@ -256,7 +256,7 @@ static int bench_sync(void *arg)
> return 0;
> }
>
> -int i915_gem_timeline_mock_selftests(void)
> +int i915_timeline_mock_selftests(void)
> {
> static const struct i915_subtest tests[] = {
> SUBTEST(igt_sync),
> @@ -265,3 +265,315 @@ int i915_gem_timeline_mock_selftests(void)
>
> return i915_subtests(tests, NULL);
> }
> +
> +static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
> +{
> + u32 *cs;
> +
> + cs = intel_ring_begin(rq, 4);
> + if (IS_ERR(cs))
> + return PTR_ERR(cs);
> +
> + if (INTEL_GEN(rq->i915) >= 8) {
> + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> + *cs++ = addr;
> + *cs++ = 0;
> + *cs++ = value;
> + } else if (INTEL_GEN(rq->i915) >= 4) {
> + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> + *cs++ = 0;
> + *cs++ = addr;
> + *cs++ = value;
> + } else {
> + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
> + *cs++ = addr;
> + *cs++ = value;
> + *cs++ = MI_NOOP;
> + }
> +
> + intel_ring_advance(rq, cs);
> +
> + return 0;
> +}
> +
> +static u32 hwsp_address(const struct i915_timeline *tl)
> +{
> + return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
> +}
> +
> +static struct i915_request *
> +tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
> +{
> + struct i915_request *rq;
> + int err;
> +
> + lockdep_assert_held(tl->i915->drm.struct_mutex); /* lazy with rq refs */
> +
> + err = i915_timeline_pin(tl);
> + if (err)
> + return ERR_PTR(err);
> +
> + rq = i915_request_alloc(engine, engine->i915->kernel_context);
> + if (IS_ERR(rq))
> + goto out_unpin;
> +
> + err = emit_ggtt_store_dw(rq, hwsp_address(tl), value);
> + i915_request_add(rq);
> + if (err)
> + rq = ERR_PTR(err);
> +
> +out_unpin:
> + i915_timeline_unpin(tl);
> + return rq;
> +}
> +
> +static int live_hwsp_engine(void *arg)
> +{
> +#define NUM_TIMELINES 4096
> + struct drm_i915_private *i915 = arg;
> + struct i915_timeline **timelines;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + intel_wakeref_t wakeref;
> + unsigned long count, n;
> + int err = 0;
> +
> + /*
> + * Create a bunch of timelines and check we can write
> + * independently to each of their breadcrumb slots.
> + */
> +
> + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
> + sizeof(*timelines),
> + GFP_KERNEL);
> + if (!timelines)
> + return -ENOMEM;
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + wakeref = intel_runtime_pm_get(i915);
> +
> + count = 0;
> + for_each_engine(engine, i915, id) {
> + if (!intel_engine_can_store_dword(engine))
> + continue;
> +
> + for (n = 0; n < NUM_TIMELINES; n++) {
> + struct i915_timeline *tl;
> + struct i915_request *rq;
> +
> + tl = i915_timeline_create(i915, "live", NULL);
> + if (IS_ERR(tl)) {
> + err = PTR_ERR(tl);
> + goto out;
> + }
> +
> + if (*tl->hwsp_seqno != tl->seqno) {
> + pr_err("Timeline %lu created with incorrect breadcrumb, found %x, expected %x\n",
> + count, *tl->hwsp_seqno, tl->seqno);
> + err = -EINVAL;
> + i915_timeline_put(tl);
> + goto out;
> + }
Above block is repeated three times so you could wrap it together with
timeline creation in a single helper.
> +
> + rq = tl_write(tl, engine, count);
> + if (IS_ERR(rq)) {
> + pr_err("Failed to write to timeline!\n");
> + i915_timeline_put(tl);
> + err = PTR_ERR(rq);
> + goto out;
> + }
> +
> + timelines[count++] = tl;
> + }
> + }
> +
> + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
> +
> +out:
> + for (n = 0; n < count; n++) {
> + struct i915_timeline *tl = timelines[n];
> +
> + if (!err && *tl->hwsp_seqno != n) {
> + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
> + n, *tl->hwsp_seqno);
> + err = -EINVAL;
> + }
> + i915_timeline_put(tl);
> + }
Okay this loop is only repeated twice so no such strong argument to
consolidate it.
> +
> + intel_runtime_pm_put(i915, wakeref);
> + mutex_unlock(&i915->drm.struct_mutex);
> +
> + kvfree(timelines);
> +
> + return err;
> +#undef NUM_TIMELINES
> +}
> +
> +static int live_hwsp_alternate(void *arg)
> +{
> +#define NUM_TIMELINES 4096
> + struct drm_i915_private *i915 = arg;
> + struct i915_timeline **timelines;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + intel_wakeref_t wakeref;
> + unsigned long count, n;
> + int err = 0;
> +
> + /*
> + * Create a bunch of timelines and check we can write
> + * independently to each of their breadcrumb slots with adjacent
> + * engines.
> + */
> +
> + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
> + sizeof(*timelines),
> + GFP_KERNEL);
> + if (!timelines)
> + return -ENOMEM;
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + wakeref = intel_runtime_pm_get(i915);
> +
> + count = 0;
> + for (n = 0; n < NUM_TIMELINES; n++) {
> + for_each_engine(engine, i915, id) {
> + struct i915_timeline *tl;
> + struct i915_request *rq;
> +
> + if (!intel_engine_can_store_dword(engine))
> + continue;
> +
> + tl = i915_timeline_create(i915, "live", NULL);
> + if (IS_ERR(tl)) {
> + err = PTR_ERR(tl);
> + goto out;
> + }
> +
> + if (*tl->hwsp_seqno != tl->seqno) {
> + pr_err("Timeline %lu created with incorrect breadcrumb, found %x, expected %x\n",
> + count, *tl->hwsp_seqno, tl->seqno);
> + err = -EINVAL;
> + i915_timeline_put(tl);
> + goto out;
> + }
> +
> + rq = tl_write(tl, engine, count);
> + if (IS_ERR(rq)) {
> + pr_err("Failed to write to timeline!\n");
> + i915_timeline_put(tl);
> + err = PTR_ERR(rq);
> + goto out;
> + }
> +
> + timelines[count++] = tl;
> + }
> + }
> +
> + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
> +
> +out:
> + for (n = 0; n < count; n++) {
> + struct i915_timeline *tl = timelines[n];
> +
> + if (!err && *tl->hwsp_seqno != n) {
> + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
> + n, *tl->hwsp_seqno);
> + err = -EINVAL;
> + }
> + i915_timeline_put(tl);
> + }
> +
> + intel_runtime_pm_put(i915, wakeref);
> + mutex_unlock(&i915->drm.struct_mutex);
> +
> + kvfree(timelines);
> +
> + return err;
> +#undef NUM_TIMELINES
> +}
> +
> +static int live_hwsp_recycle(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + intel_wakeref_t wakeref;
> + unsigned long count;
> + int err = 0;
> +
> + /*
> + * Check seqno writes into one timeline at a time. We expect to
> + * recycle the breadcrumb slot between iterations and neither
> + * want to confuse ourselves or the GPU.
> + */
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + wakeref = intel_runtime_pm_get(i915);
> +
> + count = 0;
> + for_each_engine(engine, i915, id) {
> + IGT_TIMEOUT(end_time);
> +
> + if (!intel_engine_can_store_dword(engine))
> + continue;
> +
> + do {
> + struct i915_timeline *tl;
> + struct i915_request *rq;
> +
> + tl = i915_timeline_create(i915, "live", NULL);
> + if (IS_ERR(tl)) {
> + err = PTR_ERR(tl);
> + goto out;
> + }
> +
> + if (*tl->hwsp_seqno != tl->seqno) {
> + pr_err("Timeline %lu created with incorrect breadcrumb, found %x, expected %x\n",
> + count, *tl->hwsp_seqno, tl->seqno);
> + err = -EINVAL;
> + i915_timeline_put(tl);
> + goto out;
> + }
> +
> + rq = tl_write(tl, engine, count);
> + if (IS_ERR(rq)) {
> + pr_err("Failed to write to timeline!\n");
> + i915_timeline_put(tl);
> + err = PTR_ERR(rq);
> + goto out;
> + }
> +
> + i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
> + if (*tl->hwsp_seqno != count) {
> + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
> + count, *tl->hwsp_seqno);
> + err = -EINVAL;
> + }
> +
> + i915_timeline_put(tl);
> + count++;
> +
> + if (err)
> + goto out;
> + } while (!__igt_timeout(end_time, NULL));
> + }
> +
> +out:
> + intel_runtime_pm_put(i915, wakeref);
> + mutex_unlock(&i915->drm.struct_mutex);
> +
> + return err;
> +}
> +
> +int i915_timeline_live_selftests(struct drm_i915_private *i915)
> +{
> + static const struct i915_subtest tests[] = {
> + SUBTEST(live_hwsp_recycle),
> + SUBTEST(live_hwsp_engine),
> + SUBTEST(live_hwsp_alternate),
> + };
> +
> + return i915_subtests(tests, i915);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 968a7e139a67..acd27c7e807b 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -34,12 +34,20 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
> {
> const unsigned long sz = PAGE_SIZE / 2;
> struct mock_ring *ring;
> + int err;
>
> ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
> if (!ring)
> return NULL;
>
> - i915_timeline_init(engine->i915, &ring->timeline, engine->name);
> + err = i915_timeline_init(engine->i915,
> + &ring->timeline,
> + engine->name,
> + NULL);
> + if (err) {
> + kfree(ring);
> + return NULL;
> + }
>
> ring->base.size = sz;
> ring->base.effective_size = sz;
> @@ -209,7 +217,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
> engine->base.emit_breadcrumb = mock_emit_breadcrumb;
> engine->base.submit_request = mock_submit_request;
>
> - i915_timeline_init(i915, &engine->base.timeline, engine->base.name);
> + if (i915_timeline_init(i915,
> + &engine->base.timeline,
> + engine->base.name,
> + NULL))
> + goto err_free;
> i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
>
> intel_engine_init_breadcrumbs(&engine->base);
> @@ -227,6 +239,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
> err_breadcrumbs:
> intel_engine_fini_breadcrumbs(&engine->base);
> i915_timeline_fini(&engine->base.timeline);
> +err_free:
> kfree(engine);
> return NULL;
> }
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list