[Intel-gfx] [PATCH v2 06/12] drm/i915: vgpu workload submisison pv support
Xiaolin Zhang
xiaolin.zhang at intel.com
Fri Oct 9 00:04:37 UTC 2020
to improve efficiency and reduce the complexity of vgpu workload
submission support, a pv version of workload submission backend
implemented with engine submission data in the shared memory and
eliminating execlists csb process and context switch interrupt
in submisision routine.
new PV_SUBMISSION pv_cap is used to control this level of pv support in
both guest and host side.
Signed-off-by: Xiaolin Zhang <xiaolin.zhang at intel.com>
---
drivers/gpu/drm/i915/Makefile | 2 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +
drivers/gpu/drm/i915/i915_vgpu.c | 68 +++++-
drivers/gpu/drm/i915/i915_vgpu.h | 28 +++
drivers/gpu/drm/i915/intel_pv_submission.c | 325 +++++++++++++++++++++++++++++
5 files changed, 418 insertions(+), 7 deletions(-)
create mode 100644 drivers/gpu/drm/i915/intel_pv_submission.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e5574e50..13d1739 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -269,7 +269,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
selftests/librapl.o
# virtual gpu code
-i915-y += i915_vgpu.o
+i915-y += i915_vgpu.o intel_pv_submission.o
ifeq ($(CONFIG_DRM_I915_GVT),y)
i915-y += intel_gvt.o
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 2875370..d5b6de8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -5079,6 +5079,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
}
+ } else {
+ intel_vgpu_pv_config_caps(engine->i915, PV_SUBMISSION, engine);
}
if (INTEL_GEN(engine->i915) >= 12)
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index b11fcae..e69a2dd 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -101,6 +101,7 @@ void intel_vgpu_detect(struct drm_i915_private *dev_priv)
/* guest driver PV capability */
dev_priv->vgpu.pv_caps = PV_PPGTT | PV_GGTT;
+ dev_priv->vgpu.pv_caps |= PV_SUBMISSION;
if (intel_vgpu_pv_detect_caps(dev_priv, shared_area)) {
drm_info(&dev_priv->drm,
@@ -122,6 +123,7 @@ void intel_vgpu_destroy(struct drm_i915_private *i915)
if (!intel_vgpu_active(i915) || !pv)
return;
+ kfree(pv->submission);
__free_page(virt_to_page(pv->mem));
kfree(pv);
}
@@ -606,7 +608,8 @@ static u32 intel_vgpu_pv_get_next_fence(struct intel_vgpu_pv *pv)
}
static int intel_vgpu_pv_send(struct drm_i915_private *i915,
- const u32 *action, u32 len, u32 *status)
+ const u32 *action, u32 len, u32 *status,
+ void __iomem *addr)
{
struct i915_virtual_gpu *vgpu = &i915->vgpu;
struct intel_vgpu_pv *pv = vgpu->pv;
@@ -624,7 +627,10 @@ static int intel_vgpu_pv_send(struct drm_i915_private *i915,
if (unlikely(err))
goto unlink;
- i915->vgpu.pv->notify(i915);
+ if (addr)
+ writel(VGT_G2V_PV_SEND_TRIGGER, addr + vgtif_offset(g2v_notify));
+ else
+ i915->vgpu.pv->notify(i915);
err = intel_vgpu_pv_wait_desc_update(i915, desc, fence, status);
if (unlikely(err))
@@ -652,7 +658,7 @@ static int intel_vgpu_pv_send_cmd_buf(struct drm_i915_private *i915,
spin_lock_irqsave(&vgpu->pv->lock, flags);
- ret = intel_vgpu_pv_send(i915, action, len, &status);
+ ret = intel_vgpu_pv_send(i915, action, len, &status, NULL);
if (unlikely(ret < 0)) {
drm_err(&i915->drm, "PV: send action %#X failed; err=%d status=%#X\n",
action[0], ret, status);
@@ -671,6 +677,19 @@ static void intel_vgpu_pv_notify_mmio(struct drm_i915_private *dev_priv)
vgtif_reg(g2v_notify), VGT_G2V_PV_SEND_TRIGGER);
}
+static void
+intel_vgpu_pv_register_cap_gpa(struct drm_i915_private *i915,
+ struct intel_vgpu_pv_cap_addr *cap_addr,
+ void __iomem *shared_area)
+{
+ u32 data[32];
+ u32 status = ~0;
+
+ data[0] = PV_CMD_REGISTER_CAP_GPA;
+ memcpy(&data[1], cap_addr, sizeof(*cap_addr));
+ intel_vgpu_pv_send(i915, data, 1 + sizeof(cap_addr), &status, shared_area);
+}
+
/*
* shared_page setup for VGPU PV features
*/
@@ -680,17 +699,21 @@ static int intel_vgpu_pv_setup_shared_page(struct drm_i915_private *i915,
void __iomem *addr;
struct intel_vgpu_pv *pv;
struct intel_vgpu_pv_shared_page *base;
- u64 gpa;
+ struct intel_vgpu_pv_cap_addr cap_addr;
+ void *sub_base;
+ u64 gpa, sub_gpa;
u16 ver_maj, ver_min;
int ret = 0;
+ int i;
+ u32 size;
/* We allocate 1 page shared between guest and GVT for data exchange.
* _______________________________
* |version|PV_DESCs(SEND) |
* |_______________________________PAGE/8
- * | |
+ * |PV_RSVD |
* |_______________________________PAGE/4
- * | |
+ * |PV_SUBMISSION |
* | |
* | |
* |_______________________________PAGE/2
@@ -758,6 +781,33 @@ static int intel_vgpu_pv_setup_shared_page(struct drm_i915_private *i915,
pv->notify = intel_vgpu_pv_notify_mmio;
spin_lock_init(&pv->lock);
+ /* setup PV per engine data exchange structure */
+ if (i915->vgpu.pv_caps & PV_SUBMISSION) {
+ sub_base = (void *)base + PV_SUB_OFF;
+ sub_gpa = gpa + PV_SUB_OFF;
+
+ size = sizeof(struct intel_vgpu_pv_submission);
+ if (size * I915_NUM_ENGINES > (PV_CMD_OFF - PV_SUB_OFF)) {
+ pv->submission = kmalloc_array(I915_NUM_ENGINES, size, GFP_KERNEL);
+ if (!pv->submission) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ sub_base = pv->submission;
+ sub_gpa = virt_to_phys(pv->submission);
+ }
+
+ for (i = 0; i < I915_NUM_ENGINES; i++) {
+ pv->submission_data[i] = sub_base + size * i;
+ pv->submission_data[i]->submitted = false;
+ spin_lock_init(&pv->submission_lock[i]);
+ }
+
+ cap_addr.cap = PV_SUBMISSION;
+ cap_addr.gpa = sub_gpa;
+ intel_vgpu_pv_register_cap_gpa(i915, &cap_addr, shared_area);
+ }
+
return ret;
err:
__free_page(virt_to_page(base));
@@ -772,6 +822,7 @@ void intel_vgpu_pv_config_caps(struct drm_i915_private *i915,
{
struct i915_ppgtt *ppgtt;
struct i915_ggtt *ggtt;
+ struct intel_engine_cs *engine;
if (!intel_vgpu_pv_check_cap(i915, cap))
return;
@@ -787,6 +838,11 @@ void intel_vgpu_pv_config_caps(struct drm_i915_private *i915,
ggtt->vm.vma_ops.bind_vma = intel_vgpu_pv_ggtt_bind;
ggtt->vm.vma_ops.unbind_vma = intel_vgpu_pv_ggtt_unbind_nop;
}
+
+ if (cap == PV_SUBMISSION) {
+ engine = (struct intel_engine_cs *)data;
+ intel_vgpu_pv_set_submission(engine);
+ }
}
/*
diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h
index 39acd93..aab5df3 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.h
+++ b/drivers/gpu/drm/i915/i915_vgpu.h
@@ -25,6 +25,7 @@
#define _I915_VGPU_H_
#include <linux/types.h>
+#include "gt/intel_engine_types.h"
struct drm_i915_private;
struct i915_ggtt;
@@ -32,6 +33,8 @@ struct i915_ggtt;
#define PV_MAJOR 0
#define PV_MINOR 1
#define PV_DESC_OFF (PAGE_SIZE / 256)
+#define PV_RSVD_OFF (PAGE_SIZE / 8)
+#define PV_SUB_OFF (PAGE_SIZE / 4)
#define PV_CMD_OFF (PAGE_SIZE / 2)
/* intel vGPU PV capabilities */
@@ -39,6 +42,7 @@ enum intel_vgpu_pv_caps {
PV_NONE = 0,
PV_PPGTT = BIT(0),
PV_GGTT = BIT(1),
+ PV_SUBMISSION = BIT(2),
};
/* vgpu PV commands */
@@ -48,6 +52,8 @@ enum intel_vgpu_pv_cmd {
PV_CMD_UNBIND_PPGTT,
PV_CMD_BIND_GGTT,
PV_CMD_UNBIND_GGTT,
+ PV_CMD_REGISTER_CAP_GPA,
+ PV_CMD_SUBMIT_WORKLOAD
};
/* A shared memory(4KB) between GVTg and vgpu allocated by guest */
@@ -56,6 +62,11 @@ struct intel_vgpu_pv_shared_page {
u16 ver_minor;
};
+struct intel_vgpu_pv_cap_addr {
+ u32 cap;
+ u64 gpa;
+};
+
/* PV virtual memory address for GGTT/PPGTT */
struct intel_vgpu_pv_vma {
u32 size; /* num of pages */
@@ -65,6 +76,15 @@ struct intel_vgpu_pv_vma {
u64 pml4; /* ppgtt handler */
};
+/* PV workload submission */
+struct intel_vgpu_pv_submission {
+ bool submitted;
+ /* workload lrc descriptor */
+ u64 descs[EXECLIST_MAX_PORTS];
+ /* guest logical context handler */
+ u64 ctx_gpa[EXECLIST_MAX_PORTS];
+} __packed;
+
/*
* Definition of the command transport message header (DW0)
*
@@ -108,6 +128,13 @@ struct intel_vgpu_pv_ct_buf {
struct intel_vgpu_pv {
struct intel_vgpu_pv_shared_page *mem;
+ /* protect multiple submission per engine */
+ spinlock_t submission_lock[I915_NUM_ENGINES];
+ /* per engine PV workload submission data area */
+ struct intel_vgpu_pv_submission *submission_data[I915_NUM_ENGINES];
+ /* per engine PV workload submission data dynamic allocated */
+ void *submission;
+
/* PV command buffer support */
struct intel_vgpu_pv_ct_buf ctb;
u32 next_fence;
@@ -136,4 +163,5 @@ bool intel_vgpu_pv_detect_caps(struct drm_i915_private *i915,
void __iomem *shared_area);
void intel_vgpu_pv_config_caps(struct drm_i915_private *i915,
enum intel_vgpu_pv_caps cap, void *data);
+void intel_vgpu_pv_set_submission(struct intel_engine_cs *engine);
#endif /* _I915_VGPU_H_ */
diff --git a/drivers/gpu/drm/i915/intel_pv_submission.c b/drivers/gpu/drm/i915/intel_pv_submission.c
new file mode 100644
index 0000000..f4d4a65
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_pv_submission.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "i915_vgpu.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
+#include "i915_trace.h"
+
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
+static u64 execlists_update_context(struct i915_request *rq)
+{
+ struct intel_context *ce = rq->context;
+ u64 desc = ce->lrc.desc;
+ u32 tail, prev;
+
+ tail = intel_ring_set_tail(rq->ring, rq->tail);
+ prev = ce->lrc_reg_state[CTX_RING_TAIL];
+ if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
+ desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+ rq->tail = rq->wa_tail;
+ ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
+ return desc;
+}
+
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
+static struct i915_request *schedule_in(struct i915_request *rq, int idx)
+{
+ __intel_gt_pm_get(rq->engine->gt);
+ return i915_request_get(rq);
+}
+
+static void schedule_out(struct i915_request *rq)
+{
+ intel_gt_pm_put_async(rq->engine->gt);
+ i915_request_put(rq);
+}
+
+static void pv_reset_prepare(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+
+ __tasklet_disable_sync_once(&execlists->tasklet);
+}
+
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
+{
+ struct i915_request * const *port, *rq;
+
+ /* Note we are only using the inflight and not the pending queue */
+ for (port = execlists->active; (rq = *port); port++)
+ schedule_out(rq);
+ execlists->active =
+ memset(execlists->inflight, 0, sizeof(execlists->inflight));
+}
+
+static void pv_reset_rewind(struct intel_engine_cs *engine, bool stalled)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request *rq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->active.lock, flags);
+
+ cancel_port_requests(execlists);
+
+ /* Push back any incomplete requests for replay after the reset. */
+ rq = execlists_unwind_incomplete_requests(execlists);
+ if (!rq)
+ goto out_unlock;
+
+ if (!i915_request_started(rq))
+ stalled = false;
+
+ __i915_request_reset(rq, stalled);
+ intel_lr_context_reset(engine, rq->context, rq->head, stalled);
+
+out_unlock:
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+}
+
+static void pv_reset_finish(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+
+ if (__tasklet_enable(&execlists->tasklet))
+ /* And kick in case we missed a new request submission. */
+ tasklet_hi_schedule(&execlists->tasklet);
+}
+
+static void pv_reset_cancel(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request *rq, *rn;
+ struct rb_node *rb;
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->active.lock, flags);
+
+ /* Cancel the requests on the HW and clear the ELSP tracker. */
+ cancel_port_requests(execlists);
+
+ /* Mark all executing requests as skipped. */
+ list_for_each_entry(rq, &engine->active.requests, sched.link) {
+ i915_request_set_error_once(rq, -EIO);
+ i915_request_mark_complete(rq);
+ }
+
+ /* Flush the queued requests to the timeline list (for retiring). */
+ while ((rb = rb_first_cached(&execlists->queue))) {
+ struct i915_priolist *p = to_priolist(rb);
+ int i;
+
+ priolist_for_each_request_consume(rq, rn, p, i) {
+ list_del_init(&rq->sched.link);
+ __i915_request_submit(rq);
+ dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
+ }
+
+ rb_erase_cached(&p->node, &execlists->queue);
+ i915_priolist_free(p);
+ }
+
+ execlists->queue_priority_hint = INT_MIN;
+ execlists->queue = RB_ROOT_CACHED;
+
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+}
+
+static void pv_submit(struct intel_engine_cs *engine,
+ struct i915_request **out, struct i915_request **end)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct intel_vgpu_pv *pv = engine->i915->vgpu.pv;
+ struct intel_vgpu_pv_submission *sub_data = pv->submission_data[engine->id];
+ struct i915_request *rq;
+ int n, err;
+
+ memset(sub_data->descs, 0, sizeof(sub_data->descs));
+ n = 0;
+
+ do {
+ rq = *out++;
+ sub_data->descs[n] = execlists_update_context(rq);
+ n++;
+ } while (out != end);
+
+ spin_lock(&pv->submission_lock[engine->id]);
+ sub_data->submitted = true;
+ writel(PV_CMD_SUBMIT_WORKLOAD, execlists->submit_reg);
+
+#define done (READ_ONCE(sub_data->submitted) == false)
+ err = wait_for_atomic_us(done, 1000);
+#undef done
+ spin_unlock(&pv->submission_lock[engine->id]);
+
+ if (unlikely(err))
+ DRM_ERROR("PV (%s) workload submission failed\n", engine->name);
+}
+
+static void pv_dequeue(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request **first = execlists->inflight;
+ struct i915_request ** const last_port = first + execlists->port_mask;
+ struct i915_request *last = first[0];
+ struct i915_request **port;
+ struct rb_node *rb;
+ bool submit = false;
+
+ lockdep_assert_held(&engine->active.lock);
+
+ if (last) {
+ if (*++first)
+ return;
+ last = NULL;
+ }
+
+ port = first;
+ while ((rb = rb_first_cached(&execlists->queue))) {
+ struct i915_priolist *p = to_priolist(rb);
+ struct i915_request *rq, *rn;
+ int i;
+
+ priolist_for_each_request_consume(rq, rn, p, i) {
+ if (last && rq->context != last->context) {
+ if (port == last_port)
+ goto done;
+
+ *port = schedule_in(last, port - execlists->inflight);
+ port++;
+ }
+
+ list_del_init(&rq->sched.link);
+ __i915_request_submit(rq);
+ submit = true;
+ last = rq;
+ }
+
+ rb_erase_cached(&p->node, &execlists->queue);
+ i915_priolist_free(p);
+ }
+done:
+ execlists->queue_priority_hint =
+ rb ? to_priolist(rb)->priority : INT_MIN;
+ if (submit) {
+ *port = schedule_in(last, port - execlists->inflight);
+ *++port = NULL;
+ pv_submit(engine, first, port);
+ }
+ execlists->active = execlists->inflight;
+}
+
+static void vgpu_pv_submission_tasklet(unsigned long data)
+{
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request **port, *rq;
+ unsigned long flags;
+ struct intel_vgpu_pv *pv = engine->i915->vgpu.pv;
+ struct intel_vgpu_pv_submission *sub_data = pv->submission_data[engine->id];
+
+ spin_lock_irqsave(&engine->active.lock, flags);
+
+ for (port = execlists->inflight; (rq = *port); port++) {
+ if (!i915_request_completed(rq))
+ break;
+
+ schedule_out(rq);
+ }
+
+ if (port != execlists->inflight) {
+ int idx = port - execlists->inflight;
+ int rem = ARRAY_SIZE(execlists->inflight) - idx;
+
+ memmove(execlists->inflight, port, rem * sizeof(*port));
+ }
+
+ if (!sub_data->submitted)
+ pv_dequeue(engine);
+
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+}
+
+static inline bool
+reset_in_progress(const struct intel_engine_execlists *execlists)
+{
+ return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+ return READ_ONCE(rq->sched.attr.priority);
+}
+
+static void queue_request(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ GEM_BUG_ON(!list_empty(&rq->sched.link));
+ list_add_tail(&rq->sched.link,
+ i915_sched_lookup_priolist(engine, rq_prio(rq)));
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+}
+
+static void pv_submit_request(struct i915_request *request)
+{
+ struct intel_engine_cs *engine = request->engine;
+ unsigned long flags;
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ /* Will be called from irq-context when using foreign fences. */
+ spin_lock_irqsave(&engine->active.lock, flags);
+
+ queue_request(engine, request);
+
+ if (rq_prio(request) <= execlists->queue_priority_hint)
+ goto out;
+
+ execlists->queue_priority_hint = rq_prio(request);
+ if (reset_in_progress(execlists))
+ goto out;
+
+ tasklet_hi_schedule(&execlists->tasklet);
+out:
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+}
+
+void intel_vgpu_pv_set_submission(struct intel_engine_cs *engine)
+{
+ /*
+ * We inherit a bunch of functions from execlists that we'd like
+ * to keep using:
+ *
+ * engine->submit_request = execlists_submit_request;
+ * engine->cancel_requests = execlists_cancel_requests;
+ * engine->schedule = execlists_schedule;
+ *
+ * But we need to override the actual submission backend in order
+ * to talk to the GVT with PV notification message.
+ */
+ engine->submit_request = pv_submit_request;
+ engine->execlists.tasklet.func = vgpu_pv_submission_tasklet;
+
+ /* do not use execlists park/unpark */
+ engine->park = NULL;
+ engine->unpark = NULL;
+
+ engine->reset.prepare = pv_reset_prepare;
+ engine->reset.rewind = pv_reset_rewind;
+ engine->reset.cancel = pv_reset_cancel;
+ engine->reset.finish = pv_reset_finish;
+
+ engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
+ engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
+}
--
2.7.4
More information about the Intel-gfx
mailing list