[PATCH 3/3] drm/i915/gvt: Add error rating system

fred gao fred.gao at intel.com
Wed Jul 19 06:47:02 UTC 2017


This patch is to implement the error rating system to track the
guest cmd scan and prepare workload states, the guest will enter
into failsafe mode once it reaches some limit.

Generally, there are 3 types of errors: a) some commands might be
unknown;  b) some cmd access invalid address space; c) some internal
erors. A penalty score is defined for each of them.

Signed-off-by: fred gao <fred.gao at intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 48 +++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/gvt/execlist.c   |  9 +++++--
 drivers/gpu/drm/i915/gvt/gtt.c        |  3 ++-
 drivers/gpu/drm/i915/gvt/gvt.h        | 11 ++++++++
 drivers/gpu/drm/i915/gvt/handlers.c   |  4 ++-
 drivers/gpu/drm/i915/gvt/scheduler.c  |  7 +++--
 6 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index df300d4..740f9e4 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -837,6 +837,7 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 	struct intel_gvt *gvt = vgpu->gvt;
 
 	if (offset + 4 > gvt->device_info.mmio_size) {
+		vgpu->score += INVALID_ADDR;
 		gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
 				cmd, offset);
 		return -EINVAL;
@@ -854,8 +855,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 	}
 
 	if (is_force_nonpriv_mmio(offset) &&
-	    force_nonpriv_reg_handler(s, offset, index))
+		force_nonpriv_reg_handler(s, offset, index)) {
+		vgpu->score += UNKNOWN_CMD;
 		return -EINVAL;
+	}
 
 	if (offset == i915_mmio_reg_offset(DERRMR) ||
 		offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
@@ -896,8 +899,10 @@ static int cmd_handler_lri(struct parser_exec_state *s)
 			else
 				ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
 		}
-		if (ret)
+		if (ret) {
+			s->vgpu->score += UNKNOWN_CMD;
 			break;
+		}
 		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri");
 	}
 	return ret;
@@ -913,8 +918,10 @@ static int cmd_handler_lrr(struct parser_exec_state *s)
 			ret |= ((cmd_reg_inhibit(s, i) ||
 					(cmd_reg_inhibit(s, i + 1)))) ?
 				-EINVAL : 0;
-		if (ret)
+		if (ret) {
+			s->vgpu->score += UNKNOWN_CMD;
 			break;
+		}
 		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src");
 		ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst");
 	}
@@ -935,8 +942,10 @@ static int cmd_handler_lrm(struct parser_exec_state *s)
 	for (i = 1; i < cmd_len;) {
 		if (IS_BROADWELL(gvt->dev_priv))
 			ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
-		if (ret)
+		if (ret) {
+			s->vgpu->score += UNKNOWN_CMD;
 			break;
+		}
 		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm");
 		if (cmd_val(s, 0) & (1 << 22)) {
 			gma = cmd_gma(s, i + 1);
@@ -1115,8 +1124,10 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
 	dword2 = cmd_val(s, 2);
 
 	v = (dword0 & GENMASK(21, 19)) >> 19;
-	if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code)))
+	if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code))) {
+		s->vgpu->score += UNKNOWN_CMD;
 		return -EINVAL;
+	}
 
 	info->pipe = gen8_plane_code[v].pipe;
 	info->plane = gen8_plane_code[v].plane;
@@ -1136,6 +1147,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
 		info->surf_reg = SPRSURF(info->pipe);
 	} else {
 		WARN_ON(1);
+		s->vgpu->score += UNKNOWN_CMD;
 		return -EINVAL;
 	}
 	return 0;
@@ -1184,6 +1196,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
 		break;
 
 	default:
+		s->vgpu->score += UNKNOWN_CMD;
 		gvt_vgpu_err("unknown plane code %d\n", plane);
 		return -EINVAL;
 	}
@@ -1384,7 +1397,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
 		}
 	} else if ((!vgpu_gmadr_is_valid(s->vgpu, guest_gma)) ||
 			(!vgpu_gmadr_is_valid(s->vgpu,
-					      guest_gma + op_size - 1))) {
+					guest_gma + op_size - 1))) {
 		ret = -EINVAL;
 		goto err;
 	}
@@ -1393,6 +1406,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
 	gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
 			s->info->name, guest_gma, op_size);
 
+	vgpu->score += INVALID_ADDR;
 	pr_err("cmd dump: ");
 	for (i = 0; i < cmd_length(s); i++) {
 		if (!(i % 4))
@@ -1438,6 +1452,7 @@ static inline int unexpected_cmd(struct parser_exec_state *s)
 	struct intel_vgpu *vgpu = s->vgpu;
 
 	gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
+	vgpu->score += UNKNOWN_CMD;
 
 	return -EINVAL;
 }
@@ -1576,11 +1591,11 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
 	return 1;
 }
 
-static uint32_t find_bb_size(struct parser_exec_state *s)
+static int find_bb_size(struct parser_exec_state *s)
 {
 	unsigned long gma = 0;
 	struct cmd_info *info;
-	uint32_t bb_size = 0;
+	int bb_size = 0;
 	uint32_t cmd_len = 0;
 	bool met_bb_end = false;
 	struct intel_vgpu *vgpu = s->vgpu;
@@ -1592,15 +1607,18 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
+		vgpu->score += UNKNOWN_CMD;
 		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
 	}
 	do {
-		copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
-				gma, gma + 4, &cmd);
+		if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
+				gma, gma + 4, &cmd) < 0)
+			return -EINVAL;
 		info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 		if (info == NULL) {
+			vgpu->score += UNKNOWN_CMD;
 			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 			return -EINVAL;
@@ -1637,6 +1655,8 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 
 	/* get the size of the batch buffer */
 	bb_size = find_bb_size(s);
+	if (bb_size < 0)
+		return -EINVAL;
 
 	/* allocate shadow batch buffer */
 	entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL);
@@ -1692,6 +1712,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	return 0;
 
 unmap_src:
+	vgpu->score += INTERNAL_ERR;
 	i915_gem_object_unpin_map(entry_obj->obj);
 put_obj:
 	i915_gem_object_put(entry_obj->obj);
@@ -1707,12 +1728,14 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
 	struct intel_vgpu *vgpu = s->vgpu;
 
 	if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
+		vgpu->score += INVALID_ADDR;
 		gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
 		return -EINVAL;
 	}
 
 	second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
 	if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
+		vgpu->score += INVALID_ADDR;
 		gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
 		return -EINVAL;
 	}
@@ -2426,6 +2449,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
+		vgpu->score += UNKNOWN_CMD;
 		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
@@ -2480,6 +2504,7 @@ static int command_scan(struct parser_exec_state *s,
 		if (s->buf_type == RING_BUFFER_INSTRUCTION) {
 			if (!(s->ip_gma >= rb_start) ||
 				!(s->ip_gma < gma_bottom)) {
+				vgpu->score += INVALID_ADDR;
 				gvt_vgpu_err("ip_gma %lx out of ring scope."
 					"(base:0x%lx, bottom: 0x%lx)\n",
 					s->ip_gma, rb_start,
@@ -2488,6 +2513,7 @@ static int command_scan(struct parser_exec_state *s,
 				return -EINVAL;
 			}
 			if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
+				vgpu->score += INVALID_ADDR;
 				gvt_vgpu_err("ip_gma %lx out of range."
 					"base 0x%lx head 0x%lx tail 0x%lx\n",
 					s->ip_gma, rb_start,
@@ -2695,11 +2721,13 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	if (IS_ERR(map)) {
 		gvt_vgpu_err("failed to vmap shadow indirect ctx\n");
 		ret = PTR_ERR(map);
+		vgpu->score += INTERNAL_ERR;
 		goto put_obj;
 	}
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, false);
 	if (ret) {
+		vgpu->score += INTERNAL_ERR;
 		gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
 		goto unmap_src;
 	}
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index ec8ff32..091d50e 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -526,6 +526,7 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
 	intel_vgpu_unpin_mm(workload->shadow_mm);
 
 err_ret:
+	vgpu->score = INTERNAL_ERR;
 	return ret;
 }
 
@@ -743,11 +744,15 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 	 */
 	if (list_empty(workload_q_head(vgpu, ring_id))) {
 		mutex_lock(&dev_priv->drm.struct_mutex);
-		intel_gvt_scan_and_shadow_workload(workload);
+		ret = intel_gvt_scan_and_shadow_workload(workload);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
-	queue_workload(workload);
+	if (ret) {
+		if (vgpu->score >= FAILSAFE_MODE_SCORE)
+			enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERROR);
+	} else
+		queue_workload(workload);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 8f46019..040b47d 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1709,7 +1709,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
 	int ret;
 
 	if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT)
-		return INTEL_GVT_INVALID_ADDR;
+		goto err;
 
 	if (mm->type == INTEL_GVT_MM_GGTT) {
 		if (!vgpu_gmadr_is_valid(vgpu, gma))
@@ -1766,6 +1766,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
 			mm->page_table_level, gma, gpa);
 	return gpa;
 err:
+	vgpu->score += INVALID_ADDR;
 	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
 	return INTEL_GVT_INVALID_ADDR;
 }
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 4afe080..744dc1c 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -171,6 +171,7 @@ struct intel_vgpu {
 	void *reserve_ring_buffer_va[I915_NUM_ENGINES];
 	/* dynamically allocated once ring buffer size > 1 pages */
 	void *dynamic_ring_buffer_va[I915_NUM_ENGINES];
+	int score;
 
 #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
 	struct {
@@ -193,6 +194,13 @@ struct intel_vgpu {
 #endif
 };
 
+
+#define UNKNOWN_CMD  10
+#define INVALID_ADDR 20
+#define INTERNAL_ERR 20
+
+#define FAILSAFE_MODE_SCORE 20
+
 struct intel_gvt_gm {
 	unsigned long vgpu_allocated_low_gm_size;
 	unsigned long vgpu_allocated_high_gm_size;
@@ -478,6 +486,8 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci);
 void populate_pvinfo_page(struct intel_vgpu *vgpu);
 
 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload);
+void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason);
+
 
 struct intel_gvt_ops {
 	int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *,
@@ -500,6 +510,7 @@ struct intel_gvt_ops {
 enum {
 	GVT_FAILSAFE_UNSUPPORTED_GUEST,
 	GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
+	GVT_FAILSAFE_INTERNAL_ERROR,
 };
 
 static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 17febe8..cefe28f 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -149,7 +149,7 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
 	(num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
 
 
-static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
+void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
 {
 	switch (reason) {
 	case GVT_FAILSAFE_UNSUPPORTED_GUEST:
@@ -157,6 +157,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
 		break;
 	case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
 		pr_err("Graphics resource is not enough for the guest\n");
+	case GVT_FAILSAFE_INTERNAL_ERROR:
+		pr_err("Internal error  for the guest\n");
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 6a7d405..3cf76bd 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -188,7 +188,6 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	int ring_id = workload->ring_id;
-	struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx;
 	void *shadow_ring_buffer_va;
 	u32 *cs;
 	int ret = 0;
@@ -198,6 +197,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 	if (IS_ERR(cs)) {
 		gvt_vgpu_err("fail to alloc size =%ld shadow  ring buffer\n",
 			workload->rb_len);
+		vgpu->score += INTERNAL_ERR;
 		ret = PTR_ERR(cs);
 		goto out;
 	}
@@ -313,7 +313,6 @@ static void intel_gvt_release_shadow_workload(struct intel_vgpu_workload *worklo
 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
 	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
-	struct drm_i915_gem_request *rq;
 
 	if (workload->shadowed)
 		return;
@@ -638,6 +637,10 @@ static int workload_thread(void *priv)
 					FORCEWAKE_ALL);
 
 		intel_runtime_pm_put(gvt->dev_priv);
+		if (ret) {
+			if (vgpu->score >= FAILSAFE_MODE_SCORE)
+				enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERROR);
+		}
 	}
 	return 0;
 }
-- 
2.7.4



More information about the intel-gvt-dev mailing list