[PATCH 3/3] drm/i915/gvt: Add error rating system

Zhenyu Wang zhenyuw at linux.intel.com
Wed Jul 19 09:05:47 UTC 2017


On 2017.07.19 14:47:02 +0800, fred gao wrote:
> This patch is to implement the error rating system to track the
> guest cmd scan and prepare workload states, the guest will enter
> into failsafe mode once it reaches some limit.
> 
> Generally, there are 3 types of errors: a) some commands might be
> unknown;  b) some cmd access invalid address space; c) some internal
> erors. A penalty score is defined for each of them.

What's internal errors? looks like you set that for e.g mem alloc
failure, etc. why counting that as bad guest behavior?

> 
> Signed-off-by: fred gao <fred.gao at intel.com>
> ---
>  drivers/gpu/drm/i915/gvt/cmd_parser.c | 48 +++++++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/gvt/execlist.c   |  9 +++++--
>  drivers/gpu/drm/i915/gvt/gtt.c        |  3 ++-
>  drivers/gpu/drm/i915/gvt/gvt.h        | 11 ++++++++
>  drivers/gpu/drm/i915/gvt/handlers.c   |  4 ++-
>  drivers/gpu/drm/i915/gvt/scheduler.c  |  7 +++--
>  6 files changed, 66 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> index df300d4..740f9e4 100644
> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> @@ -837,6 +837,7 @@ static int cmd_reg_handler(struct parser_exec_state *s,
>  	struct intel_gvt *gvt = vgpu->gvt;
>  
>  	if (offset + 4 > gvt->device_info.mmio_size) {
> +		vgpu->score += INVALID_ADDR;
>  		gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
>  				cmd, offset);
>  		return -EINVAL;
> @@ -854,8 +855,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
>  	}
>  
>  	if (is_force_nonpriv_mmio(offset) &&
> -	    force_nonpriv_reg_handler(s, offset, index))
> +		force_nonpriv_reg_handler(s, offset, index)) {
> +		vgpu->score += UNKNOWN_CMD;
>  		return -EINVAL;
> +	}
>  
>  	if (offset == i915_mmio_reg_offset(DERRMR) ||
>  		offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
> @@ -896,8 +899,10 @@ static int cmd_handler_lri(struct parser_exec_state *s)
>  			else
>  				ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
>  		}
> -		if (ret)
> +		if (ret) {
> +			s->vgpu->score += UNKNOWN_CMD;
>  			break;
> +		}
>  		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri");
>  	}
>  	return ret;
> @@ -913,8 +918,10 @@ static int cmd_handler_lrr(struct parser_exec_state *s)
>  			ret |= ((cmd_reg_inhibit(s, i) ||
>  					(cmd_reg_inhibit(s, i + 1)))) ?
>  				-EINVAL : 0;
> -		if (ret)
> +		if (ret) {
> +			s->vgpu->score += UNKNOWN_CMD;
>  			break;
> +		}
>  		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src");
>  		ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst");
>  	}
> @@ -935,8 +942,10 @@ static int cmd_handler_lrm(struct parser_exec_state *s)
>  	for (i = 1; i < cmd_len;) {
>  		if (IS_BROADWELL(gvt->dev_priv))
>  			ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
> -		if (ret)
> +		if (ret) {
> +			s->vgpu->score += UNKNOWN_CMD;
>  			break;
> +		}
>  		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm");
>  		if (cmd_val(s, 0) & (1 << 22)) {
>  			gma = cmd_gma(s, i + 1);
> @@ -1115,8 +1124,10 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
>  	dword2 = cmd_val(s, 2);
>  
>  	v = (dword0 & GENMASK(21, 19)) >> 19;
> -	if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code)))
> +	if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code))) {
> +		s->vgpu->score += UNKNOWN_CMD;
>  		return -EINVAL;
> +	}
>  
>  	info->pipe = gen8_plane_code[v].pipe;
>  	info->plane = gen8_plane_code[v].plane;
> @@ -1136,6 +1147,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
>  		info->surf_reg = SPRSURF(info->pipe);
>  	} else {
>  		WARN_ON(1);
> +		s->vgpu->score += UNKNOWN_CMD;
>  		return -EINVAL;
>  	}
>  	return 0;
> @@ -1184,6 +1196,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
>  		break;
>  
>  	default:
> +		s->vgpu->score += UNKNOWN_CMD;
>  		gvt_vgpu_err("unknown plane code %d\n", plane);
>  		return -EINVAL;
>  	}
> @@ -1384,7 +1397,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
>  		}
>  	} else if ((!vgpu_gmadr_is_valid(s->vgpu, guest_gma)) ||
>  			(!vgpu_gmadr_is_valid(s->vgpu,
> -					      guest_gma + op_size - 1))) {
> +					guest_gma + op_size - 1))) {
>  		ret = -EINVAL;
>  		goto err;
>  	}
> @@ -1393,6 +1406,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
>  	gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
>  			s->info->name, guest_gma, op_size);
>  
> +	vgpu->score += INVALID_ADDR;
>  	pr_err("cmd dump: ");
>  	for (i = 0; i < cmd_length(s); i++) {
>  		if (!(i % 4))
> @@ -1438,6 +1452,7 @@ static inline int unexpected_cmd(struct parser_exec_state *s)
>  	struct intel_vgpu *vgpu = s->vgpu;
>  
>  	gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
> +	vgpu->score += UNKNOWN_CMD;
>  
>  	return -EINVAL;
>  }
> @@ -1576,11 +1591,11 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
>  	return 1;
>  }
>  
> -static uint32_t find_bb_size(struct parser_exec_state *s)
> +static int find_bb_size(struct parser_exec_state *s)
>  {
>  	unsigned long gma = 0;
>  	struct cmd_info *info;
> -	uint32_t bb_size = 0;
> +	int bb_size = 0;
>  	uint32_t cmd_len = 0;
>  	bool met_bb_end = false;
>  	struct intel_vgpu *vgpu = s->vgpu;
> @@ -1592,15 +1607,18 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
>  
>  	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
>  	if (info == NULL) {
> +		vgpu->score += UNKNOWN_CMD;
>  		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
>  				cmd, get_opcode(cmd, s->ring_id));
>  		return -EINVAL;
>  	}
>  	do {
> -		copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
> -				gma, gma + 4, &cmd);
> +		if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
> +				gma, gma + 4, &cmd) < 0)
> +			return -EINVAL;
>  		info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
>  		if (info == NULL) {
> +			vgpu->score += UNKNOWN_CMD;
>  			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
>  				cmd, get_opcode(cmd, s->ring_id));
>  			return -EINVAL;
> @@ -1637,6 +1655,8 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>  
>  	/* get the size of the batch buffer */
>  	bb_size = find_bb_size(s);
> +	if (bb_size < 0)
> +		return -EINVAL;
>  
>  	/* allocate shadow batch buffer */
>  	entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL);
> @@ -1692,6 +1712,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>  	return 0;
>  
>  unmap_src:
> +	vgpu->score += INTERNAL_ERR;
>  	i915_gem_object_unpin_map(entry_obj->obj);
>  put_obj:
>  	i915_gem_object_put(entry_obj->obj);
> @@ -1707,12 +1728,14 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
>  	struct intel_vgpu *vgpu = s->vgpu;
>  
>  	if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
> +		vgpu->score += INVALID_ADDR;
>  		gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
>  		return -EINVAL;
>  	}
>  
>  	second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
>  	if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
> +		vgpu->score += INVALID_ADDR;
>  		gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
>  		return -EINVAL;
>  	}
> @@ -2426,6 +2449,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
>  
>  	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
>  	if (info == NULL) {
> +		vgpu->score += UNKNOWN_CMD;
>  		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
>  				cmd, get_opcode(cmd, s->ring_id));
>  		return -EINVAL;
> @@ -2480,6 +2504,7 @@ static int command_scan(struct parser_exec_state *s,
>  		if (s->buf_type == RING_BUFFER_INSTRUCTION) {
>  			if (!(s->ip_gma >= rb_start) ||
>  				!(s->ip_gma < gma_bottom)) {
> +				vgpu->score += INVALID_ADDR;
>  				gvt_vgpu_err("ip_gma %lx out of ring scope."
>  					"(base:0x%lx, bottom: 0x%lx)\n",
>  					s->ip_gma, rb_start,
> @@ -2488,6 +2513,7 @@ static int command_scan(struct parser_exec_state *s,
>  				return -EINVAL;
>  			}
>  			if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
> +				vgpu->score += INVALID_ADDR;
>  				gvt_vgpu_err("ip_gma %lx out of range."
>  					"base 0x%lx head 0x%lx tail 0x%lx\n",
>  					s->ip_gma, rb_start,
> @@ -2695,11 +2721,13 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
>  	if (IS_ERR(map)) {
>  		gvt_vgpu_err("failed to vmap shadow indirect ctx\n");
>  		ret = PTR_ERR(map);
> +		vgpu->score += INTERNAL_ERR;
>  		goto put_obj;
>  	}
>  
>  	ret = i915_gem_object_set_to_cpu_domain(obj, false);
>  	if (ret) {
> +		vgpu->score += INTERNAL_ERR;
>  		gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
>  		goto unmap_src;
>  	}
> diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
> index ec8ff32..091d50e 100644
> --- a/drivers/gpu/drm/i915/gvt/execlist.c
> +++ b/drivers/gpu/drm/i915/gvt/execlist.c
> @@ -526,6 +526,7 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
>  	intel_vgpu_unpin_mm(workload->shadow_mm);
>  
>  err_ret:
> +	vgpu->score = INTERNAL_ERR;
>  	return ret;
>  }
>  
> @@ -743,11 +744,15 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
>  	 */
>  	if (list_empty(workload_q_head(vgpu, ring_id))) {
>  		mutex_lock(&dev_priv->drm.struct_mutex);
> -		intel_gvt_scan_and_shadow_workload(workload);
> +		ret = intel_gvt_scan_and_shadow_workload(workload);
>  		mutex_unlock(&dev_priv->drm.struct_mutex);
>  	}
>  
> -	queue_workload(workload);
> +	if (ret) {
> +		if (vgpu->score >= FAILSAFE_MODE_SCORE)
> +			enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERROR);
> +	} else
> +		queue_workload(workload);
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
> index 8f46019..040b47d 100644
> --- a/drivers/gpu/drm/i915/gvt/gtt.c
> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
> @@ -1709,7 +1709,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
>  	int ret;
>  
>  	if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT)
> -		return INTEL_GVT_INVALID_ADDR;
> +		goto err;
>  
>  	if (mm->type == INTEL_GVT_MM_GGTT) {
>  		if (!vgpu_gmadr_is_valid(vgpu, gma))
> @@ -1766,6 +1766,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
>  			mm->page_table_level, gma, gpa);
>  	return gpa;
>  err:
> +	vgpu->score += INVALID_ADDR;
>  	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
>  	return INTEL_GVT_INVALID_ADDR;
>  }
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index 4afe080..744dc1c 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -171,6 +171,7 @@ struct intel_vgpu {
>  	void *reserve_ring_buffer_va[I915_NUM_ENGINES];
>  	/* dynamically allocated once ring buffer size > 1 pages */
>  	void *dynamic_ring_buffer_va[I915_NUM_ENGINES];
> +	int score;
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
>  	struct {
> @@ -193,6 +194,13 @@ struct intel_vgpu {
>  #endif
>  };
>  
> +
> +#define UNKNOWN_CMD  10
> +#define INVALID_ADDR 20
> +#define INTERNAL_ERR 20
> +
> +#define FAILSAFE_MODE_SCORE 20
> +
>  struct intel_gvt_gm {
>  	unsigned long vgpu_allocated_low_gm_size;
>  	unsigned long vgpu_allocated_high_gm_size;
> @@ -478,6 +486,8 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci);
>  void populate_pvinfo_page(struct intel_vgpu *vgpu);
>  
>  int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload);
> +void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason);
> +
>  
>  struct intel_gvt_ops {
>  	int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *,
> @@ -500,6 +510,7 @@ struct intel_gvt_ops {
>  enum {
>  	GVT_FAILSAFE_UNSUPPORTED_GUEST,
>  	GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
> +	GVT_FAILSAFE_INTERNAL_ERROR,
>  };
>  
>  static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
> index 17febe8..cefe28f 100644
> --- a/drivers/gpu/drm/i915/gvt/handlers.c
> +++ b/drivers/gpu/drm/i915/gvt/handlers.c
> @@ -149,7 +149,7 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
>  	(num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
>  
>  
> -static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
> +void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
>  {
>  	switch (reason) {
>  	case GVT_FAILSAFE_UNSUPPORTED_GUEST:
> @@ -157,6 +157,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
>  		break;
>  	case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
>  		pr_err("Graphics resource is not enough for the guest\n");
> +	case GVT_FAILSAFE_INTERNAL_ERROR:
> +		pr_err("Internal error  for the guest\n");
>  	default:
>  		break;
>  	}
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 6a7d405..3cf76bd 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -188,7 +188,6 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
>  {
>  	struct intel_vgpu *vgpu = workload->vgpu;
>  	int ring_id = workload->ring_id;
> -	struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx;
>  	void *shadow_ring_buffer_va;
>  	u32 *cs;
>  	int ret = 0;
> @@ -198,6 +197,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
>  	if (IS_ERR(cs)) {
>  		gvt_vgpu_err("fail to alloc size =%ld shadow  ring buffer\n",
>  			workload->rb_len);
> +		vgpu->score += INTERNAL_ERR;
>  		ret = PTR_ERR(cs);
>  		goto out;
>  	}
> @@ -313,7 +313,6 @@ static void intel_gvt_release_shadow_workload(struct intel_vgpu_workload *worklo
>  	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
>  	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
>  	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
> -	struct drm_i915_gem_request *rq;
>  
>  	if (workload->shadowed)
>  		return;
> @@ -638,6 +637,10 @@ static int workload_thread(void *priv)
>  					FORCEWAKE_ALL);
>  
>  		intel_runtime_pm_put(gvt->dev_priv);
> +		if (ret) {
> +			if (vgpu->score >= FAILSAFE_MODE_SCORE)
> +				enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERROR);
> +		}
>  	}
>  	return 0;
>  }
> -- 
> 2.7.4
> 
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev

-- 
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20170719/75b1168f/attachment-0001.sig>


More information about the intel-gvt-dev mailing list