[PATCH 6/6] drm/i915/gvt: Add error rating system

fred gao fred.gao at intel.com
Thu Aug 10 15:31:40 UTC 2017


This patch is to implement the error rating system to track the
guest cmd scan and prepare workload states, the guest will enter
into failsafe mode once it reaches some limit.

Generally, there are 3 types of errors: a) some commands might be
unknown;  b) some cmd access invalid address space; c) some gvt
internal erors. A penalty score is defined for each of them.

v2:
- remove some internal i915 errors rating.  (Zhenyu)

Signed-off-by: fred gao <fred.gao at intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 43 +++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/gvt/execlist.c   | 10 +++++---
 drivers/gpu/drm/i915/gvt/gtt.c        |  3 ++-
 drivers/gpu/drm/i915/gvt/gvt.h        | 11 +++++++++
 drivers/gpu/drm/i915/gvt/handlers.c   |  4 +++-
 drivers/gpu/drm/i915/gvt/scheduler.c  |  2 ++
 6 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index e53efc0..2c96b2b 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -837,6 +837,7 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 	struct intel_gvt *gvt = vgpu->gvt;
 
 	if (offset + 4 > gvt->device_info.mmio_size) {
+		vgpu->score += INVALID_ADDR;
 		gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
 				cmd, offset);
 		return -EINVAL;
@@ -854,8 +855,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 	}
 
 	if (is_force_nonpriv_mmio(offset) &&
-	    force_nonpriv_reg_handler(s, offset, index))
+		force_nonpriv_reg_handler(s, offset, index)) {
+		vgpu->score += UNKNOWN_CMD;
 		return -EINVAL;
+	}
 
 	if (offset == i915_mmio_reg_offset(DERRMR) ||
 		offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
@@ -896,8 +899,10 @@ static int cmd_handler_lri(struct parser_exec_state *s)
 			else
 				ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
 		}
-		if (ret)
+		if (ret) {
+			s->vgpu->score += UNKNOWN_CMD;
 			break;
+		}
 		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri");
 	}
 	return ret;
@@ -913,8 +918,10 @@ static int cmd_handler_lrr(struct parser_exec_state *s)
 			ret |= ((cmd_reg_inhibit(s, i) ||
 					(cmd_reg_inhibit(s, i + 1)))) ?
 				-EINVAL : 0;
-		if (ret)
+		if (ret) {
+			s->vgpu->score += UNKNOWN_CMD;
 			break;
+		}
 		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src");
 		ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst");
 	}
@@ -935,8 +942,10 @@ static int cmd_handler_lrm(struct parser_exec_state *s)
 	for (i = 1; i < cmd_len;) {
 		if (IS_BROADWELL(gvt->dev_priv))
 			ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
-		if (ret)
+		if (ret) {
+			s->vgpu->score += UNKNOWN_CMD;
 			break;
+		}
 		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm");
 		if (cmd_val(s, 0) & (1 << 22)) {
 			gma = cmd_gma(s, i + 1);
@@ -1115,8 +1124,10 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
 	dword2 = cmd_val(s, 2);
 
 	v = (dword0 & GENMASK(21, 19)) >> 19;
-	if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code)))
+	if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code))) {
+		s->vgpu->score += UNKNOWN_CMD;
 		return -EINVAL;
+	}
 
 	info->pipe = gen8_plane_code[v].pipe;
 	info->plane = gen8_plane_code[v].plane;
@@ -1136,6 +1147,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
 		info->surf_reg = SPRSURF(info->pipe);
 	} else {
 		WARN_ON(1);
+		s->vgpu->score += UNKNOWN_CMD;
 		return -EINVAL;
 	}
 	return 0;
@@ -1184,6 +1196,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
 		break;
 
 	default:
+		s->vgpu->score += UNKNOWN_CMD;
 		gvt_vgpu_err("unknown plane code %d\n", plane);
 		return -EINVAL;
 	}
@@ -1393,6 +1406,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
 	gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
 			s->info->name, guest_gma, op_size);
 
+	vgpu->score += INVALID_ADDR;
 	pr_err("cmd dump: ");
 	for (i = 0; i < cmd_length(s); i++) {
 		if (!(i % 4))
@@ -1438,6 +1452,7 @@ static inline int unexpected_cmd(struct parser_exec_state *s)
 	struct intel_vgpu *vgpu = s->vgpu;
 
 	gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
+	vgpu->score += UNKNOWN_CMD;
 
 	return -EINVAL;
 }
@@ -1576,11 +1591,11 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
 	return 1;
 }
 
-static uint32_t find_bb_size(struct parser_exec_state *s)
+static int find_bb_size(struct parser_exec_state *s)
 {
 	unsigned long gma = 0;
 	struct cmd_info *info;
-	uint32_t bb_size = 0;
+	int bb_size = 0;
 	uint32_t cmd_len = 0;
 	bool met_bb_end = false;
 	struct intel_vgpu *vgpu = s->vgpu;
@@ -1592,15 +1607,18 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
+		vgpu->score += UNKNOWN_CMD;
 		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
 	}
 	do {
-		copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
-				gma, gma + 4, &cmd);
+		if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
+				gma, gma + 4, &cmd) < 0)
+			return -EINVAL;
 		info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 		if (info == NULL) {
+			vgpu->score += UNKNOWN_CMD;
 			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 			return -EINVAL;
@@ -1637,6 +1655,8 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 
 	/* get the size of the batch buffer */
 	bb_size = find_bb_size(s);
+	if (bb_size < 0)
+		return -EINVAL;
 
 	/* allocate shadow batch buffer */
 	entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL);
@@ -1707,12 +1727,14 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
 	struct intel_vgpu *vgpu = s->vgpu;
 
 	if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
+		vgpu->score += INVALID_ADDR;
 		gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
 		return -EINVAL;
 	}
 
 	second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
 	if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
+		vgpu->score += INVALID_ADDR;
 		gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
 		return -EINVAL;
 	}
@@ -2426,6 +2448,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
+		vgpu->score += UNKNOWN_CMD;
 		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
@@ -2480,6 +2503,7 @@ static int command_scan(struct parser_exec_state *s,
 		if (s->buf_type == RING_BUFFER_INSTRUCTION) {
 			if (!(s->ip_gma >= rb_start) ||
 				!(s->ip_gma < gma_bottom)) {
+				vgpu->score += INVALID_ADDR;
 				gvt_vgpu_err("ip_gma %lx out of ring scope."
 					"(base:0x%lx, bottom: 0x%lx)\n",
 					s->ip_gma, rb_start,
@@ -2488,6 +2512,7 @@ static int command_scan(struct parser_exec_state *s,
 				return -EINVAL;
 			}
 			if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
+				vgpu->score += INVALID_ADDR;
 				gvt_vgpu_err("ip_gma %lx out of range."
 					"base 0x%lx head 0x%lx tail 0x%lx\n",
 					s->ip_gma, rb_start,
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index 329faeb..d9341c2 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -538,6 +538,7 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
 err_unpin_mm:
 	intel_vgpu_unpin_mm(workload->shadow_mm);
 err_ret:
+	vgpu->score = GVT_INTERNAL_ERR;
 	return ret;
 }
 
@@ -771,13 +772,16 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 	if (list_empty(workload_q_head(vgpu, ring_id))) {
 		intel_runtime_pm_get(dev_priv);
 		mutex_lock(&dev_priv->drm.struct_mutex);
-		intel_gvt_scan_and_shadow_workload(workload);
+		ret = intel_gvt_scan_and_shadow_workload(workload);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 		intel_runtime_pm_put(dev_priv);
 	}
 
-	queue_workload(workload);
-	return 0;
+	if (ret == 0)
+		queue_workload(workload);
+	else if (vgpu->score >= FAILSAFE_MODE_SCORE)
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERR);
+	return ret;
 }
 
 int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 0bd028f..31aaf8e 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1727,7 +1727,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
 	int ret;
 
 	if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT)
-		return INTEL_GVT_INVALID_ADDR;
+		goto err;
 
 	if (mm->type == INTEL_GVT_MM_GGTT) {
 		if (!vgpu_gmadr_is_valid(vgpu, gma))
@@ -1797,6 +1797,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
 			mm->page_table_level, gma, gpa);
 	return gpa;
 err:
+	vgpu->score += INVALID_ADDR;
 	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
 	return INTEL_GVT_INVALID_ADDR;
 }
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 8960544..69bcd44 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -168,6 +168,7 @@ struct intel_vgpu {
 	/* 1/2K for each reserve ring buffer */
 	void *reserve_ring_buffer_va[I915_NUM_ENGINES];
 	int reserve_ring_buffer_size[I915_NUM_ENGINES];
+	int score;
 	DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
 	struct i915_gem_context *shadow_ctx;
 	DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES);
@@ -190,6 +191,13 @@ struct intel_vgpu {
 #endif
 };
 
+
+#define UNKNOWN_CMD  10
+#define INVALID_ADDR 20
+#define GVT_INTERNAL_ERR 20
+
+#define FAILSAFE_MODE_SCORE 20
+
 struct intel_gvt_gm {
 	unsigned long vgpu_allocated_low_gm_size;
 	unsigned long vgpu_allocated_high_gm_size;
@@ -487,6 +495,8 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci);
 void populate_pvinfo_page(struct intel_vgpu *vgpu);
 
 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload);
+void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason);
+
 
 struct intel_gvt_ops {
 	int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *,
@@ -509,6 +519,7 @@ struct intel_gvt_ops {
 enum {
 	GVT_FAILSAFE_UNSUPPORTED_GUEST,
 	GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
+	GVT_FAILSAFE_INTERNAL_ERR,
 };
 
 static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 825abfc..fcadb80 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -157,7 +157,7 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
 	(num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
 
 
-static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
+void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
 {
 	switch (reason) {
 	case GVT_FAILSAFE_UNSUPPORTED_GUEST:
@@ -165,6 +165,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
 		break;
 	case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
 		pr_err("Graphics resource is not enough for the guest\n");
+	case GVT_FAILSAFE_INTERNAL_ERR:
+		pr_err("GVT Internal error  for the guest\n");
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 26c955d..51c680b 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -640,6 +640,8 @@ static int workload_thread(void *priv)
 					FORCEWAKE_ALL);
 
 		intel_runtime_pm_put(gvt->dev_priv);
+		if (ret && (vgpu->score >= FAILSAFE_MODE_SCORE))
+			enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERR);
 	}
 	return 0;
 }
-- 
2.7.4



More information about the intel-gvt-dev mailing list