[PATCH 6/6] drm/i915/gvt: Add error rating system

fred gao fred.gao at intel.com
Mon Aug 7 04:13:23 UTC 2017


This patch is to implement the error rating system to track the
guest cmd scan and prepare workload states, the guest will enter
into failsafe mode once it reaches some limit.

Generally, there are 3 types of errors: a) some commands might be
unknown;  b) some cmd access invalid address space; c) some gvt
internal erors. A penalty score is defined for each of them.

v2:
- remove some internal i915 errors rating.  (Zhenyu)

Signed-off-by: fred gao <fred.gao at intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 45 +++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/gvt/execlist.c   | 11 ++++++---
 drivers/gpu/drm/i915/gvt/gtt.c        |  3 ++-
 drivers/gpu/drm/i915/gvt/gvt.h        | 11 +++++++++
 drivers/gpu/drm/i915/gvt/handlers.c   |  4 +++-
 drivers/gpu/drm/i915/gvt/scheduler.c  |  2 ++
 6 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 03c82df..bfedfd7 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -837,6 +837,7 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 	struct intel_gvt *gvt = vgpu->gvt;
 
 	if (offset + 4 > gvt->device_info.mmio_size) {
+		vgpu->score += INVALID_ADDR;
 		gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
 				cmd, offset);
 		return -EINVAL;
@@ -854,8 +855,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 	}
 
 	if (is_force_nonpriv_mmio(offset) &&
-	    force_nonpriv_reg_handler(s, offset, index))
+		force_nonpriv_reg_handler(s, offset, index)) {
+		vgpu->score += UNKNOWN_CMD;
 		return -EINVAL;
+	}
 
 	if (offset == i915_mmio_reg_offset(DERRMR) ||
 		offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
@@ -896,8 +899,10 @@ static int cmd_handler_lri(struct parser_exec_state *s)
 			else
 				ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
 		}
-		if (ret)
+		if (ret) {
+			s->vgpu->score += UNKNOWN_CMD;
 			break;
+		}
 		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri");
 	}
 	return ret;
@@ -913,8 +918,10 @@ static int cmd_handler_lrr(struct parser_exec_state *s)
 			ret |= ((cmd_reg_inhibit(s, i) ||
 					(cmd_reg_inhibit(s, i + 1)))) ?
 				-EINVAL : 0;
-		if (ret)
+		if (ret) {
+			s->vgpu->score += UNKNOWN_CMD;
 			break;
+		}
 		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src");
 		ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst");
 	}
@@ -935,8 +942,10 @@ static int cmd_handler_lrm(struct parser_exec_state *s)
 	for (i = 1; i < cmd_len;) {
 		if (IS_BROADWELL(gvt->dev_priv))
 			ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
-		if (ret)
+		if (ret) {
+			s->vgpu->score += UNKNOWN_CMD;
 			break;
+		}
 		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm");
 		if (cmd_val(s, 0) & (1 << 22)) {
 			gma = cmd_gma(s, i + 1);
@@ -1115,8 +1124,10 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
 	dword2 = cmd_val(s, 2);
 
 	v = (dword0 & GENMASK(21, 19)) >> 19;
-	if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code)))
+	if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code))) {
+		s->vgpu->score += UNKNOWN_CMD;
 		return -EINVAL;
+	}
 
 	info->pipe = gen8_plane_code[v].pipe;
 	info->plane = gen8_plane_code[v].plane;
@@ -1136,6 +1147,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
 		info->surf_reg = SPRSURF(info->pipe);
 	} else {
 		WARN_ON(1);
+		s->vgpu->score += UNKNOWN_CMD;
 		return -EINVAL;
 	}
 	return 0;
@@ -1184,6 +1196,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
 		break;
 
 	default:
+		s->vgpu->score += UNKNOWN_CMD;
 		gvt_vgpu_err("unknown plane code %d\n", plane);
 		return -EINVAL;
 	}
@@ -1384,7 +1397,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
 		}
 	} else if ((!vgpu_gmadr_is_valid(s->vgpu, guest_gma)) ||
 			(!vgpu_gmadr_is_valid(s->vgpu,
-					      guest_gma + op_size - 1))) {
+					guest_gma + op_size - 1))) {
 		ret = -EINVAL;
 		goto err;
 	}
@@ -1393,6 +1406,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
 	gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
 			s->info->name, guest_gma, op_size);
 
+	vgpu->score += INVALID_ADDR;
 	pr_err("cmd dump: ");
 	for (i = 0; i < cmd_length(s); i++) {
 		if (!(i % 4))
@@ -1438,6 +1452,7 @@ static inline int unexpected_cmd(struct parser_exec_state *s)
 	struct intel_vgpu *vgpu = s->vgpu;
 
 	gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
+	vgpu->score += UNKNOWN_CMD;
 
 	return -EINVAL;
 }
@@ -1576,11 +1591,11 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
 	return 1;
 }
 
-static uint32_t find_bb_size(struct parser_exec_state *s)
+static int find_bb_size(struct parser_exec_state *s)
 {
 	unsigned long gma = 0;
 	struct cmd_info *info;
-	uint32_t bb_size = 0;
+	int bb_size = 0;
 	uint32_t cmd_len = 0;
 	bool met_bb_end = false;
 	struct intel_vgpu *vgpu = s->vgpu;
@@ -1592,15 +1607,18 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
+		vgpu->score += UNKNOWN_CMD;
 		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
 	}
 	do {
-		copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
-				gma, gma + 4, &cmd);
+		if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
+				gma, gma + 4, &cmd) < 0)
+			return -EINVAL;
 		info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 		if (info == NULL) {
+			vgpu->score += UNKNOWN_CMD;
 			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 			return -EINVAL;
@@ -1637,6 +1655,8 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 
 	/* get the size of the batch buffer */
 	bb_size = find_bb_size(s);
+	if (bb_size < 0)
+		return -EINVAL;
 
 	/* allocate shadow batch buffer */
 	entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL);
@@ -1707,12 +1727,14 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
 	struct intel_vgpu *vgpu = s->vgpu;
 
 	if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
+		vgpu->score += INVALID_ADDR;
 		gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
 		return -EINVAL;
 	}
 
 	second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
 	if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
+		vgpu->score += INVALID_ADDR;
 		gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
 		return -EINVAL;
 	}
@@ -2426,6 +2448,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
+		vgpu->score += UNKNOWN_CMD;
 		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
@@ -2480,6 +2503,7 @@ static int command_scan(struct parser_exec_state *s,
 		if (s->buf_type == RING_BUFFER_INSTRUCTION) {
 			if (!(s->ip_gma >= rb_start) ||
 				!(s->ip_gma < gma_bottom)) {
+				vgpu->score += INVALID_ADDR;
 				gvt_vgpu_err("ip_gma %lx out of ring scope."
 					"(base:0x%lx, bottom: 0x%lx)\n",
 					s->ip_gma, rb_start,
@@ -2488,6 +2512,7 @@ static int command_scan(struct parser_exec_state *s,
 				return -EINVAL;
 			}
 			if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
+				vgpu->score += INVALID_ADDR;
 				gvt_vgpu_err("ip_gma %lx out of range."
 					"base 0x%lx head 0x%lx tail 0x%lx\n",
 					s->ip_gma, rb_start,
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index ed7f8ca..430ad53 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -526,6 +526,7 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
 	intel_vgpu_unpin_mm(workload->shadow_mm);
 
 err_ret:
+	vgpu->score = GVT_INTERNAL_ERR;
 	return ret;
 }
 
@@ -743,12 +744,16 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 	 */
 	if (list_empty(workload_q_head(vgpu, ring_id))) {
 		mutex_lock(&dev_priv->drm.struct_mutex);
-		intel_gvt_scan_and_shadow_workload(workload);
+		ret = intel_gvt_scan_and_shadow_workload(workload);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
-	queue_workload(workload);
-	return 0;
+	if (ret == 0)
+		queue_workload(workload);
+	else if (vgpu->score >= FAILSAFE_MODE_SCORE)
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERR);
+
+	return ret;
 }
 
 int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 8f46019..040b47d 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1709,7 +1709,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
 	int ret;
 
 	if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT)
-		return INTEL_GVT_INVALID_ADDR;
+		goto err;
 
 	if (mm->type == INTEL_GVT_MM_GGTT) {
 		if (!vgpu_gmadr_is_valid(vgpu, gma))
@@ -1766,6 +1766,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
 			mm->page_table_level, gma, gpa);
 	return gpa;
 err:
+	vgpu->score += INVALID_ADDR;
 	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
 	return INTEL_GVT_INVALID_ADDR;
 }
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index ae4e7d8..84f7d9b 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -172,6 +172,7 @@ struct intel_vgpu {
 	int reserve_ring_buffer_size[I915_NUM_ENGINES];
 	/* dynamically allocated once ring buffer size > 1 pages */
 	void *dynamic_ring_buffer_va[I915_NUM_ENGINES];
+	int score;
 
 #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
 	struct {
@@ -194,6 +195,13 @@ struct intel_vgpu {
 #endif
 };
 
+
+#define UNKNOWN_CMD  10
+#define INVALID_ADDR 20
+#define GVT_INTERNAL_ERR 20
+
+#define FAILSAFE_MODE_SCORE 20
+
 struct intel_gvt_gm {
 	unsigned long vgpu_allocated_low_gm_size;
 	unsigned long vgpu_allocated_high_gm_size;
@@ -479,6 +487,8 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci);
 void populate_pvinfo_page(struct intel_vgpu *vgpu);
 
 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload);
+void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason);
+
 
 struct intel_gvt_ops {
 	int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *,
@@ -501,6 +511,7 @@ struct intel_gvt_ops {
 enum {
 	GVT_FAILSAFE_UNSUPPORTED_GUEST,
 	GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
+	GVT_FAILSAFE_INTERNAL_ERR,
 };
 
 static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 17febe8..5b2bf1e 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -149,7 +149,7 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
 	(num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
 
 
-static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
+void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
 {
 	switch (reason) {
 	case GVT_FAILSAFE_UNSUPPORTED_GUEST:
@@ -157,6 +157,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
 		break;
 	case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
 		pr_err("Graphics resource is not enough for the guest\n");
+	case GVT_FAILSAFE_INTERNAL_ERR:
+		pr_err("GVT Internal error  for the guest\n");
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 2cbf6be..305630e 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -636,6 +636,8 @@ static int workload_thread(void *priv)
 					FORCEWAKE_ALL);
 
 		intel_runtime_pm_put(gvt->dev_priv);
+		if (ret && (vgpu->score >= FAILSAFE_MODE_SCORE))
+			enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERR);
 	}
 	return 0;
 }
-- 
2.7.4



More information about the intel-gvt-dev mailing list