[Intel-gfx] [RFC 26/29] drm/i915: gvt: workload scheduler

Zhi Wang zhi.a.wang at intel.com
Thu Jan 28 02:21:48 PST 2016


This patch introduces the GVT workload scheduler routines.

GVT workload scheduler is responsible for picking and executing GVT workload
from current scheduled vGPU. Before the workload is submitted to host i915,
the guest execlist context will be shadowed in the host GVT shadow context.
the instructions in guest ring buffer will be copied into GVT shadow ring
buffer. Then GVT-g workload scheduler will scan the instructions in guest
ring buffer and submit it to host i915.

Signed-off-by: Zhi Wang <zhi.a.wang at intel.com>
---
 drivers/gpu/drm/i915/gvt/Makefile    |   2 +-
 drivers/gpu/drm/i915/gvt/debug.h     |   4 +
 drivers/gpu/drm/i915/gvt/gvt.c       |   4 +
 drivers/gpu/drm/i915/gvt/gvt.h       |   2 +
 drivers/gpu/drm/i915/gvt/scheduler.c | 485 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/scheduler.h |  26 +-
 6 files changed, 519 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gvt/scheduler.c

diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile
index bb3170b..46f71db 100644
--- a/drivers/gpu/drm/i915/gvt/Makefile
+++ b/drivers/gpu/drm/i915/gvt/Makefile
@@ -1,6 +1,6 @@
 GVT_SOURCE := gvt.o params.o aperture_gm.o mmio.o handlers.o instance.o \
 		trace_points.o interrupt.o gtt.o cfg_space.o opregion.o utility.o \
-		fb_decoder.o display.o edid.o control.o execlist.o
+		fb_decoder.o display.o edid.o control.o execlist.o scheduler.o
 
 ccflags-y			+= -I$(src) -I$(src)/.. -Wall -Werror -Wno-unused-function
 i915_gvt-y			:= $(GVT_SOURCE)
diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h
index 88b7d48..c4c03ac 100644
--- a/drivers/gpu/drm/i915/gvt/debug.h
+++ b/drivers/gpu/drm/i915/gvt/debug.h
@@ -75,6 +75,7 @@ enum {
 	GVT_DBG_RENDER = (1 << 4),
 	GVT_DBG_EDID = (1 << 5),
 	GVT_DBG_EL = (1 << 6),
+	GVT_DBG_SCHED = (1 << 7),
 };
 
 #define gvt_dbg_core(fmt, args...) \
@@ -89,4 +90,7 @@ enum {
 #define gvt_dbg_el(fmt, args...) \
 	gvt_dbg(GVT_DBG_EL, fmt, ##args)
 
+#define gvt_dbg_sched(fmt, args...) \
+	gvt_dbg(GVT_DBG_SCHED, fmt, ##args)
+
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 77fe5d39..8b56c00 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -283,6 +283,7 @@ static bool init_service_thread(struct pgt_device *pdev)
 static void clean_pgt_device(struct pgt_device *pdev)
 {
 	clean_service_thread(pdev);
+	gvt_clean_workload_scheduler(pdev);
 	gvt_clean_control_interface(pdev);
 	gvt_clean_gtt(pdev);
 	gvt_irq_exit(pdev);
@@ -330,6 +331,9 @@ err:
 
 static bool post_init_pgt_device(struct pgt_device *pdev)
 {
+	if (!gvt_init_workload_scheduler(pdev))
+		return false;
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 02e5a6e..83f1017 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -223,6 +223,8 @@ struct pgt_device {
 
 	struct gvt_gtt_info gtt;
 	struct gvt_device_control control;
+
+	struct gvt_workload_scheduler workload_scheduler;
 };
 
 /* request types to wake up main thread */
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
new file mode 100644
index 0000000..cdf179f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "gvt.h"
+
+#include <linux/kthread.h>
+
+static bool populate_shadow_context(struct gvt_workload *workload)
+{
+	struct vgt_device *vgt = workload->vgt;
+	struct pgt_device *pdev = vgt->pdev;
+	struct gvt_workload_scheduler *scheduler = &pdev->workload_scheduler;
+	int ring_id = workload->ring_id;
+
+	struct intel_context *shadow_ctx = scheduler->shadow_ctx;
+	struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state;
+
+	struct execlist_ring_context *guest_ring_context, *shadow_ring_context;
+
+	struct page *page;
+	void *src, *dst;
+	unsigned long guest_context_pn, context_page_num;
+	int i;
+
+	gvt_dbg_sched("ring id %d workload lrca %x", ring_id, workload->ctx_desc.lrca);
+
+	guest_context_pn = workload->ctx_desc.lrca;
+
+	context_page_num = intel_lr_context_size(&pdev->dev_priv->ring[ring_id]);
+	context_page_num = context_page_num >> PAGE_SHIFT;
+
+	i = 2;
+
+	while (i < context_page_num) {
+		src = gvt_gma_to_va(vgt->gtt.ggtt_mm,
+				(guest_context_pn + i) << GTT_PAGE_SHIFT);
+		if (!src) {
+			gvt_err("invalid guest context descriptor");
+			return false;
+		}
+
+		page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i);
+		dst = kmap_atomic(page);
+		hypervisor_read_va(vgt, src, dst, GTT_PAGE_SIZE, 1);
+		kunmap_atomic(dst);
+		i++;
+	}
+
+	guest_ring_context = gvt_gma_to_va(vgt->gtt.ggtt_mm,
+			(guest_context_pn + 1) << GTT_PAGE_SHIFT);
+	if (!guest_ring_context) {
+		gvt_err("invalid guest context descriptor");
+		return false;
+	}
+
+	page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + 1);
+	shadow_ring_context = kmap_atomic(page);
+
+#define COPY_REG(name) \
+	hypervisor_read_va(vgt, &guest_ring_context->name.val, \
+		&shadow_ring_context->name.val, 4, 1);
+
+	COPY_REG(ctx_ctrl);
+	COPY_REG(ctx_timestamp);
+
+	if (ring_id == RCS) {
+		COPY_REG(bb_per_ctx_ptr);
+		COPY_REG(rcs_indirect_ctx);
+		COPY_REG(rcs_indirect_ctx_offset);
+	}
+#undef COPY_REG
+
+	gvt_set_context_pdp_root_pointer(vgt, shadow_ring_context,
+			workload->shadow_mm->shadow_page_table);
+
+	hypervisor_read_va(vgt,
+			(void *)guest_ring_context + sizeof(*guest_ring_context),
+			(void *)shadow_ring_context + sizeof(*shadow_ring_context),
+			GTT_PAGE_SIZE - sizeof(*guest_ring_context), 1);
+
+	kunmap_atomic(shadow_ring_context);
+	return true;
+}
+
+static void shadow_context_schedule_in(void *data)
+{
+	struct gvt_workload *workload = (struct gvt_workload *)data;
+
+	atomic_set(&workload->shadow_ctx_active, 1);
+	wake_up(&workload->shadow_ctx_status_wq);
+}
+
+static void shadow_context_schedule_out(void *data)
+{
+	struct gvt_workload *workload = (struct gvt_workload *)data;
+
+	atomic_set(&workload->shadow_ctx_active, 0);
+	wake_up(&workload->shadow_ctx_status_wq);
+}
+
+static bool dispatch_workload(struct gvt_workload *workload)
+{
+	struct vgt_device *vgt = workload->vgt;
+	struct pgt_device *pdev = vgt->pdev;
+	struct gvt_workload_scheduler *scheduler = &pdev->workload_scheduler;
+	int ring_id = workload->ring_id;
+	struct intel_context *shadow_ctx = scheduler->shadow_ctx;
+	struct drm_i915_private *dev_priv = workload->vgt->pdev->dev_priv;
+
+	gvt_dbg_sched("ring id %d prepare to dispatch workload %p",
+		ring_id, workload);
+
+	workload->req = i915_gem_request_alloc(&dev_priv->ring[ring_id],
+					       shadow_ctx);
+	if (IS_ERR_OR_NULL(workload->req)) {
+		gvt_err("fail to allocate gem request");
+		workload->status = PTR_ERR(workload->req);
+		return true;
+	}
+
+	gvt_dbg_sched("ring id %d get i915 gem request %p",
+			ring_id, workload->req);
+
+	mutex_lock(&pdev->lock);
+
+	if (!populate_shadow_context(workload)) {
+		workload->status = -EINVAL;
+		goto err;
+	}
+
+	mutex_unlock(&pdev->lock);
+
+	gvt_dbg_sched("ring id %d submit workload to i915 %p",
+			ring_id, workload->req);
+
+	shadow_ctx->gvt_context_private_data[ring_id] = workload;
+	shadow_ctx->gvt_context_addressing_mode[ring_id] =
+		workload->ctx_desc.addressing_mode << 3;
+	shadow_ctx->gvt_context_schedule_in = shadow_context_schedule_in;
+	shadow_ctx->gvt_context_schedule_out = shadow_context_schedule_out;
+
+	i915_gem_request_reference(workload->req);
+	i915_add_request_no_flush(workload->req);
+
+	workload->dispatched = true;
+	return true;
+err:
+	if (workload->req) {
+		i915_gem_request_cancel(workload->req);
+		workload->req = NULL;
+	}
+	mutex_unlock(&pdev->lock);
+	return false;
+}
+
+static struct gvt_workload *pick_next_workload(
+		struct pgt_device *pdev, int ring_id)
+{
+	struct gvt_workload_scheduler *scheduler = &pdev->workload_scheduler;
+	struct gvt_workload *workload = NULL;
+
+	mutex_lock(&pdev->lock);
+
+	/*
+	 * no current instance / will be scheduled out / no workload
+	 * bail out
+	 */
+	if (!scheduler->current_instance) {
+		gvt_dbg_sched("ring id %d stop - no current instance", ring_id);
+		goto out;
+	}
+
+	if (scheduler->need_reschedule) {
+		gvt_dbg_sched("ring id %d stop - will reschedule", ring_id);
+		goto out;
+	}
+
+	if (list_empty(workload_q_head(scheduler->current_instance, ring_id))) {
+		gvt_dbg_sched("ring id %d stop - no avaiable workload", ring_id);
+		goto out;
+	}
+
+	/*
+	 * still have current workload, maybe the workload disptacher
+	 * fail to submit it for some reason, resubmit it.
+	 */
+	if (scheduler->current_workload[ring_id]) {
+		workload = scheduler->current_workload[ring_id];
+		gvt_dbg_sched("ring id %d still have current workload %p",
+				ring_id, workload);
+		goto out;
+	}
+
+	/*
+	 * pick a workload as current workload
+	 * once current workload is set, schedule policy routines
+	 * will wait the current workload to NULL when trying to
+	 * schedule out an instance.
+	 */
+	scheduler->current_workload[ring_id] = container_of(
+			workload_q_head(scheduler->current_instance, ring_id)->next,
+			struct gvt_workload, list);
+
+	workload = scheduler->current_workload[ring_id];
+
+	gvt_dbg_sched("ring id %d pick new workload %p", ring_id, workload);
+
+	atomic_inc(&workload->vgt->running_workload_num);
+out:
+	mutex_unlock(&pdev->lock);
+
+	return workload;
+}
+
+static void update_guest_context(struct gvt_workload *workload)
+{
+	struct vgt_device *vgt = workload->vgt;
+	struct pgt_device *pdev = vgt->pdev;
+	struct gvt_workload_scheduler *scheduler = &pdev->workload_scheduler;
+	int ring_id = workload->ring_id;
+
+	struct intel_context *shadow_ctx = scheduler->shadow_ctx;
+	struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state;
+
+	struct execlist_ring_context *guest_ring_context, *shadow_ring_context;
+
+	struct page *page;
+	void *src, *dst;
+	unsigned long guest_context_pn, context_page_num;
+	int i;
+
+	gvt_dbg_sched("ring id %d workload lrca %x", ring_id, workload->ctx_desc.lrca);
+
+	guest_context_pn = workload->ctx_desc.lrca;
+
+	context_page_num = intel_lr_context_size(&pdev->dev_priv->ring[ring_id]);
+	context_page_num = context_page_num >> PAGE_SHIFT;
+
+	i = 2;
+
+	while (i < context_page_num) {
+		dst = gvt_gma_to_va(vgt->gtt.ggtt_mm,
+				(guest_context_pn + i) << GTT_PAGE_SHIFT);
+		if (!dst) {
+			gvt_err("invalid guest context descriptor");
+			return;
+		}
+
+		page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i);
+		src = kmap_atomic(page);
+		hypervisor_write_va(vgt, dst, src, GTT_PAGE_SIZE, 1);
+		kunmap_atomic(dst);
+		i++;
+	}
+
+	guest_ring_context = gvt_gma_to_va(vgt->gtt.ggtt_mm,
+			(guest_context_pn + 1) << GTT_PAGE_SHIFT);
+	if (!guest_ring_context) {
+		gvt_err("invalid guest context descriptor");
+		return;
+	}
+
+	hypervisor_write_va(vgt, &guest_ring_context->ring_header.val,
+		&workload->rb_tail, 4, 1);
+
+	page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + 1);
+	shadow_ring_context = kmap_atomic(page);
+
+#define COPY_REG(name) \
+	hypervisor_write_va(vgt, &guest_ring_context->name.val, \
+		&shadow_ring_context->name.val, 4, 1);
+
+	COPY_REG(ctx_ctrl);
+	COPY_REG(ctx_timestamp);
+
+#undef COPY_REG
+
+	hypervisor_write_va(vgt,
+			(void *)guest_ring_context + sizeof(*guest_ring_context),
+			(void *)shadow_ring_context + sizeof(*shadow_ring_context),
+			GTT_PAGE_SIZE - sizeof(*guest_ring_context), 1);
+
+	kunmap_atomic(shadow_ring_context);
+}
+
+static void complete_current_workload(struct pgt_device *pdev, int ring_id)
+{
+	struct gvt_workload_scheduler *scheduler = &pdev->workload_scheduler;
+	struct gvt_workload *workload;
+
+	mutex_lock(&pdev->lock);
+
+	workload = scheduler->current_workload[ring_id];
+
+	if (!workload->status) {
+		wait_event(workload->shadow_ctx_status_wq,
+				!atomic_read(&workload->shadow_ctx_active));
+		update_guest_context(workload);
+	}
+
+	if (workload->req)
+		i915_gem_request_unreference(workload->req);
+
+	gvt_dbg_sched("ring id %d complete workload %p status %d",
+			ring_id, workload, workload->status);
+
+	scheduler->current_workload[ring_id] = NULL;
+
+	atomic_dec(&workload->vgt->running_workload_num);
+
+	list_del_init(&workload->list);
+	workload->complete(workload);
+
+	if (waitqueue_active(&scheduler->workload_complete_wq))
+		wake_up(&scheduler->workload_complete_wq);
+
+	mutex_unlock(&pdev->lock);
+}
+
+struct workload_thread_param {
+	struct pgt_device *pdev;
+	int ring_id;
+};
+
+static int workload_thread(void *priv)
+{
+	struct workload_thread_param *p = (struct workload_thread_param *)priv;
+	struct pgt_device *pdev = p->pdev;
+	int ring_id = p->ring_id;
+	struct gvt_workload_scheduler *scheduler = &pdev->workload_scheduler;
+	struct gvt_workload *workload = NULL;
+	int r;
+
+	kfree(p);
+
+	gvt_dbg_core("workload thread for ring %d started", ring_id);
+
+	while (!kthread_should_stop()) {
+		r = wait_event_interruptible(scheduler->waitq[ring_id],
+				kthread_should_stop() ||
+				(workload = pick_next_workload(pdev, ring_id)));
+
+		if (r)
+			gvt_warn("workload thread waken up by unexpected signal!");
+
+		if (kthread_should_stop())
+			break;
+
+		gvt_dbg_sched("ring id %d next workload %p vgt %d",
+				workload->ring_id, workload, workload->vgt->id);
+
+		/*
+		 * Always take i915 big lock first
+		 */
+		r = i915_mutex_lock_interruptible(pdev->dev_priv->dev);
+		if (r < 0) {
+			gvt_warn("i915 submission channel is not available, retry");
+			schedule_timeout(1);
+			continue;
+		}
+
+		gvt_dbg_sched("ring id %d will dispatch workload %p",
+				workload->ring_id, workload);
+
+		if (!dispatch_workload(workload)) {
+			gvt_warn("fail to dispatch workload, skip");
+			goto complete;
+		}
+
+		gvt_dbg_sched("ring id %d wait workload %p",
+				workload->ring_id, workload);
+
+		workload->status = i915_wait_request(workload->req);
+		if (workload->status != 0)
+			gvt_warn("fail to wait workload, skip");
+
+complete:
+		gvt_dbg_sched("will complete workload %p, status: %d",
+				workload, workload->status);
+
+		complete_current_workload(pdev, ring_id);
+		mutex_unlock(&pdev->dev_priv->dev->struct_mutex);
+	}
+
+	return 0;
+}
+
+void gvt_wait_instance_idle(struct vgt_device *vgt)
+{
+	struct pgt_device *pdev = vgt->pdev;
+	struct gvt_workload_scheduler *scheduler = &pdev->workload_scheduler;
+
+	if (atomic_read(&vgt->running_workload_num)) {
+		gvt_dbg_sched("wait instance idle");
+
+		wait_event(scheduler->workload_complete_wq,
+				!atomic_read(&vgt->running_workload_num));
+	}
+}
+
+void gvt_clean_workload_scheduler(struct pgt_device *pdev)
+{
+	struct gvt_workload_scheduler *scheduler = &pdev->workload_scheduler;
+	int i;
+
+	gvt_dbg_core("clean workload scheduler");
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		if (scheduler->thread[i]) {
+			kthread_stop(scheduler->thread[i]);
+			scheduler->thread[i] = NULL;
+		}
+	}
+
+	i915_gem_context_unreference(scheduler->shadow_ctx);
+	scheduler->shadow_ctx = NULL;
+}
+
+bool gvt_init_workload_scheduler(struct pgt_device *pdev)
+{
+	struct gvt_workload_scheduler *scheduler = &pdev->workload_scheduler;
+	struct workload_thread_param *param = NULL;
+	int i;
+
+	gvt_dbg_core("init workload scheduler");
+
+	memset(scheduler, 0, sizeof(*scheduler));
+
+	init_waitqueue_head(&scheduler->workload_complete_wq);
+
+	scheduler->shadow_ctx = i915_gem_create_gvt_context(pdev->dev_priv->dev);
+	if (!scheduler->shadow_ctx) {
+		gvt_err("fail to create shadow context");
+		goto err;
+	}
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		init_waitqueue_head(&scheduler->waitq[i]);
+
+		param = kzalloc(sizeof(*param), GFP_KERNEL);
+		if (!param) {
+			gvt_err("fail to allocate workload thread param");
+			goto err;
+		}
+
+		param->pdev = pdev;
+		param->ring_id = i;
+
+		scheduler->thread[i] = kthread_run(workload_thread, param,
+			"gvt workload %d", i);
+		if (!scheduler->thread[i]) {
+			gvt_err("fail to create workload thread");
+			goto err;
+		}
+	}
+
+	return true;
+err:
+	if (param) {
+		kfree(param);
+		param = NULL;
+	}
+	gvt_clean_workload_scheduler(pdev);
+	return false;
+}
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index dd24fda..c4e7fa2 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -24,6 +24,19 @@
 #ifndef _GVT_SCHEDULER_H_
 #define _GVT_SCHEDULER_H_
 
+struct gvt_workload_scheduler {
+	struct vgt_device *current_instance;
+	struct vgt_device *next_instance;
+	struct gvt_workload *current_workload[I915_NUM_RINGS];
+	bool need_reschedule;
+
+	struct intel_context *shadow_ctx;
+
+	wait_queue_head_t workload_complete_wq;
+	struct task_struct *thread[I915_NUM_RINGS];
+	wait_queue_head_t waitq[I915_NUM_RINGS];
+};
+
 struct gvt_workload {
 	struct vgt_device *vgt;
 	int ring_id;
@@ -52,8 +65,15 @@ struct gvt_workload {
 #define workload_q_head(vgt, ring_id) \
 	(&(vgt->virtual_execlist_info[ring_id].workload_q_head))
 
-#define queue_workload(workload) \
-	list_add_tail(&workload->list, \
-	workload_q_head(workload->vgt, workload->ring_id))
+#define queue_workload(workload) do { \
+	list_add_tail(&workload->list, workload_q_head(workload->vgt, workload->ring_id)); \
+	wake_up(&workload->vgt->pdev->workload_scheduler.waitq[workload->ring_id]); \
+}while(0)
+
+bool gvt_init_workload_scheduler(struct pgt_device *pdev);
+
+void gvt_clean_workload_scheduler(struct pgt_device *pdev);
+
+void gvt_wait_instance_idle(struct vgt_device *vgt);
 
 #endif
-- 
1.9.1



More information about the Intel-gfx mailing list