[PATCH 56/59] timeline-hwsp
Chris Wilson
chris at chris-wilson.co.uk
Fri Dec 21 00:47:31 UTC 2018
---
drivers/gpu/drm/i915/i915_drv.h | 4 ++
drivers/gpu/drm/i915/i915_request.c | 25 ++++++------
drivers/gpu/drm/i915/i915_timeline.c | 59 +++++++++++++++++++++++++++-
drivers/gpu/drm/i915/i915_timeline.h | 6 +++
drivers/gpu/drm/i915/intel_lrc.c | 9 +++--
5 files changed, 87 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2aeea38140b1..06451efdc8f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1961,6 +1961,10 @@ struct drm_i915_private {
struct mutex timeline_lock;
struct list_head timelines;
+ /* Pack multiple timelines' seqnos into the same page */
+ struct i915_vma *timeline_hwsp;
+ u64 timeline_free;
+
struct list_head active_rings;
struct list_head closed_vma;
u32 active_requests;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f523486d225d..152de321ea9c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -182,11 +182,12 @@ static void free_capture_list(struct i915_request *request)
static void __retire_engine_request(struct intel_engine_cs *engine,
struct i915_request *rq)
{
- GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
+ GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
__func__, engine->name,
rq->fence.context, rq->fence.seqno,
lower_32_bits(rq->global_seqno),
- i915_request_hwsp(rq));
+ i915_request_hwsp(rq),
+ intel_engine_get_seqno(engine));
GEM_BUG_ON(!i915_request_completed(rq));
@@ -244,11 +245,12 @@ static void i915_request_retire(struct i915_request *request)
{
struct i915_gem_active *active, *next;
- GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+ GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
request->engine->name,
request->fence.context, request->fence.seqno,
lower_32_bits(request->global_seqno),
- i915_request_hwsp(request));
+ i915_request_hwsp(request),
+ intel_engine_get_seqno(request->engine));
lockdep_assert_held(&request->i915->drm.struct_mutex);
GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
@@ -307,11 +309,12 @@ void i915_request_retire_upto(struct i915_request *rq)
struct intel_ring *ring = rq->ring;
struct i915_request *tmp;
- GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+ GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
rq->engine->name,
rq->fence.context, rq->fence.seqno,
lower_32_bits(rq->global_seqno),
- i915_request_hwsp(rq));
+ i915_request_hwsp(rq),
+ intel_engine_get_seqno(rq->engine));
lockdep_assert_held(&rq->i915->drm.struct_mutex);
GEM_BUG_ON(!i915_request_completed(rq));
@@ -348,10 +351,11 @@ void __i915_request_submit(struct i915_request *request)
struct intel_engine_cs *engine = request->engine;
u32 seqno;
- GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
+ GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
engine->name,
request->fence.context, request->fence.seqno,
engine->timeline.seqno + 1,
+ i915_request_hwsp(request),
intel_engine_get_seqno(engine));
GEM_BUG_ON(!irqs_disabled());
@@ -400,11 +404,12 @@ void __i915_request_unsubmit(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
- GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
+ GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
engine->name,
request->fence.context, request->fence.seqno,
lower_32_bits(request->global_seqno),
- i915_request_hwsp(request));
+ i915_request_hwsp(request),
+ intel_engine_get_seqno(engine));
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->timeline.lock);
@@ -421,7 +426,6 @@ void __i915_request_unsubmit(struct i915_request *request)
/* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
request->global_seqno = 0;
- GEM_DEBUG_EXEC(request->hwsp_seqno = POISON_INUSE);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
intel_engine_cancel_signaling(request);
spin_unlock(&request->lock);
@@ -604,7 +608,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
/* No zalloc, must clear what we need by hand */
rq->global_seqno = 0;
- GEM_DEBUG_EXEC(rq->hwsp_seqno = POISON_INUSE);
rq->signaling.wait.global_seqno = 0;
rq->file_priv = NULL;
rq->batch = NULL;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index c4e7ad179d86..3aa4e3b33ffc 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -9,6 +9,52 @@
#include "i915_timeline.h"
#include "i915_syncmap.h"
+static void get_seqno_locked(struct drm_i915_private *i915,
+ struct i915_timeline *timeline)
+{
+ struct i915_vma *vma;
+ void *vaddr;
+ int offset;
+
+ offset = __builtin_ffsll(i915->gt.timeline_free);
+ if (offset-- == 0 && i915->gt.timeline_hwsp) {
+ i915_vma_put(i915->gt.timeline_hwsp);
+ i915->gt.timeline_hwsp = NULL;
+ }
+
+ vma = i915->gt.timeline_hwsp;
+ if (!vma) {
+ struct drm_i915_gem_object *bo;
+
+ BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
+ bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (!bo)
+ return;
+
+ vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
+ if (IS_ERR(vma))
+ return;
+
+ i915->gt.timeline_hwsp = vma;
+ i915->gt.timeline_free = ~0ull;
+ offset = 0;
+ }
+
+ vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr))
+ return;
+
+ timeline->hwsp_ggtt = i915_vma_get(vma);
+ timeline->hwsp_offset = offset * CACHELINE_BYTES;
+
+ timeline->hwsp_seqno =
+ memset(vaddr + timeline->hwsp_offset,
+ 0,
+ sizeof(*timeline->hwsp_seqno));
+
+ i915->gt.timeline_free &= ~BIT_ULL(offset);
+}
+
void i915_timeline_init(struct drm_i915_private *i915,
struct i915_timeline *timeline,
const char *name)
@@ -25,6 +71,7 @@ void i915_timeline_init(struct drm_i915_private *i915,
timeline->name = name;
mutex_lock(&i915->gt.timeline_lock);
+ get_seqno_locked(i915, timeline);
list_add(&timeline->link, &i915->gt.timelines);
mutex_unlock(&i915->gt.timeline_lock);
@@ -69,13 +116,21 @@ void i915_timelines_park(struct drm_i915_private *i915)
void i915_timeline_fini(struct i915_timeline *timeline)
{
+ struct drm_i915_private *i915 = timeline->i915;
+
GEM_BUG_ON(!list_empty(&timeline->requests));
i915_syncmap_free(&timeline->sync);
- mutex_lock(&timeline->i915->gt.timeline_lock);
+ mutex_lock(&i915->gt.timeline_lock);
list_del(&timeline->link);
- mutex_unlock(&timeline->i915->gt.timeline_lock);
+ if (timeline->hwsp_ggtt == i915->gt.timeline_hwsp)
+ i915->gt.timeline_free |=
+ BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+ mutex_unlock(&i915->gt.timeline_lock);
+
+ i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+ i915_vma_put(timeline->hwsp_ggtt);
}
struct i915_timeline *
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index df0ddcd6459d..826f16771a3d 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -32,6 +32,8 @@
#include "i915_syncmap.h"
#include "i915_utils.h"
+struct i915_vma;
+
struct i915_timeline {
u64 fence_context;
u32 seqno;
@@ -41,6 +43,10 @@ struct i915_timeline {
#define TIMELINE_ENGINE 1
#define TIMELINE_VIRTUAL 2
+ const u32 *hwsp_seqno;
+ struct i915_vma *hwsp_ggtt;
+ u32 hwsp_offset;
+
/**
* List of breadcrumbs associated with GPU requests currently
* outstanding.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7481e394edbb..f7e92d78b649 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -472,11 +472,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
- GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+ GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
engine->name, n,
port[n].context_id, count,
lower_32_bits(rq->global_seqno),
rq->fence.context, rq->fence.seqno,
+ i915_request_hwsp(rq),
intel_engine_get_seqno(engine),
rq_prio(rq));
} else {
@@ -904,11 +905,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
while (num_ports-- && port_isset(port)) {
struct i915_request *rq = port_request(port);
- GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
+ GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
rq->engine->name,
(unsigned int)(port - execlists->port),
lower_32_bits(rq->global_seqno),
rq->fence.context, rq->fence.seqno,
+ i915_request_hwsp(rq),
intel_engine_get_seqno(rq->engine));
GEM_BUG_ON(!execlists->active);
@@ -1132,12 +1134,13 @@ static void process_csb(struct intel_engine_cs *engine)
EXECLISTS_ACTIVE_USER));
rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+ GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
engine->name,
port->context_id, count,
rq ? lower_32_bits(rq->global_seqno) : 0,
rq ? rq->fence.context : 0,
rq ? rq->fence.seqno : 0,
+ rq ? i915_request_hwsp(rq) : 0,
intel_engine_get_seqno(engine),
rq ? rq_prio(rq) : 0);
--
2.20.1
More information about the Intel-gfx-trybot
mailing list