[PATCH 3/3] drm/i915/gvt: Add error rating system
Zhenyu Wang
zhenyuw at linux.intel.com
Wed Jul 19 09:05:47 UTC 2017
On 2017.07.19 14:47:02 +0800, fred gao wrote:
> This patch is to implement the error rating system to track the
> guest cmd scan and prepare workload states, the guest will enter
> into failsafe mode once it reaches some limit.
>
> Generally, there are 3 types of errors: a) some commands might be
> unknown; b) some cmd access invalid address space; c) some internal
> erors. A penalty score is defined for each of them.
What's internal errors? looks like you set that for e.g mem alloc
failure, etc. why counting that as bad guest behavior?
>
> Signed-off-by: fred gao <fred.gao at intel.com>
> ---
> drivers/gpu/drm/i915/gvt/cmd_parser.c | 48 +++++++++++++++++++++++++++--------
> drivers/gpu/drm/i915/gvt/execlist.c | 9 +++++--
> drivers/gpu/drm/i915/gvt/gtt.c | 3 ++-
> drivers/gpu/drm/i915/gvt/gvt.h | 11 ++++++++
> drivers/gpu/drm/i915/gvt/handlers.c | 4 ++-
> drivers/gpu/drm/i915/gvt/scheduler.c | 7 +++--
> 6 files changed, 66 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> index df300d4..740f9e4 100644
> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> @@ -837,6 +837,7 @@ static int cmd_reg_handler(struct parser_exec_state *s,
> struct intel_gvt *gvt = vgpu->gvt;
>
> if (offset + 4 > gvt->device_info.mmio_size) {
> + vgpu->score += INVALID_ADDR;
> gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
> cmd, offset);
> return -EINVAL;
> @@ -854,8 +855,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
> }
>
> if (is_force_nonpriv_mmio(offset) &&
> - force_nonpriv_reg_handler(s, offset, index))
> + force_nonpriv_reg_handler(s, offset, index)) {
> + vgpu->score += UNKNOWN_CMD;
> return -EINVAL;
> + }
>
> if (offset == i915_mmio_reg_offset(DERRMR) ||
> offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
> @@ -896,8 +899,10 @@ static int cmd_handler_lri(struct parser_exec_state *s)
> else
> ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
> }
> - if (ret)
> + if (ret) {
> + s->vgpu->score += UNKNOWN_CMD;
> break;
> + }
> ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri");
> }
> return ret;
> @@ -913,8 +918,10 @@ static int cmd_handler_lrr(struct parser_exec_state *s)
> ret |= ((cmd_reg_inhibit(s, i) ||
> (cmd_reg_inhibit(s, i + 1)))) ?
> -EINVAL : 0;
> - if (ret)
> + if (ret) {
> + s->vgpu->score += UNKNOWN_CMD;
> break;
> + }
> ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src");
> ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst");
> }
> @@ -935,8 +942,10 @@ static int cmd_handler_lrm(struct parser_exec_state *s)
> for (i = 1; i < cmd_len;) {
> if (IS_BROADWELL(gvt->dev_priv))
> ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0;
> - if (ret)
> + if (ret) {
> + s->vgpu->score += UNKNOWN_CMD;
> break;
> + }
> ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm");
> if (cmd_val(s, 0) & (1 << 22)) {
> gma = cmd_gma(s, i + 1);
> @@ -1115,8 +1124,10 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
> dword2 = cmd_val(s, 2);
>
> v = (dword0 & GENMASK(21, 19)) >> 19;
> - if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code)))
> + if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code))) {
> + s->vgpu->score += UNKNOWN_CMD;
> return -EINVAL;
> + }
>
> info->pipe = gen8_plane_code[v].pipe;
> info->plane = gen8_plane_code[v].plane;
> @@ -1136,6 +1147,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
> info->surf_reg = SPRSURF(info->pipe);
> } else {
> WARN_ON(1);
> + s->vgpu->score += UNKNOWN_CMD;
> return -EINVAL;
> }
> return 0;
> @@ -1184,6 +1196,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
> break;
>
> default:
> + s->vgpu->score += UNKNOWN_CMD;
> gvt_vgpu_err("unknown plane code %d\n", plane);
> return -EINVAL;
> }
> @@ -1384,7 +1397,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
> }
> } else if ((!vgpu_gmadr_is_valid(s->vgpu, guest_gma)) ||
> (!vgpu_gmadr_is_valid(s->vgpu,
> - guest_gma + op_size - 1))) {
> + guest_gma + op_size - 1))) {
> ret = -EINVAL;
> goto err;
> }
> @@ -1393,6 +1406,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
> gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
> s->info->name, guest_gma, op_size);
>
> + vgpu->score += INVALID_ADDR;
> pr_err("cmd dump: ");
> for (i = 0; i < cmd_length(s); i++) {
> if (!(i % 4))
> @@ -1438,6 +1452,7 @@ static inline int unexpected_cmd(struct parser_exec_state *s)
> struct intel_vgpu *vgpu = s->vgpu;
>
> gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
> + vgpu->score += UNKNOWN_CMD;
>
> return -EINVAL;
> }
> @@ -1576,11 +1591,11 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
> return 1;
> }
>
> -static uint32_t find_bb_size(struct parser_exec_state *s)
> +static int find_bb_size(struct parser_exec_state *s)
> {
> unsigned long gma = 0;
> struct cmd_info *info;
> - uint32_t bb_size = 0;
> + int bb_size = 0;
> uint32_t cmd_len = 0;
> bool met_bb_end = false;
> struct intel_vgpu *vgpu = s->vgpu;
> @@ -1592,15 +1607,18 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
>
> info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
> if (info == NULL) {
> + vgpu->score += UNKNOWN_CMD;
> gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
> cmd, get_opcode(cmd, s->ring_id));
> return -EINVAL;
> }
> do {
> - copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
> - gma, gma + 4, &cmd);
> + if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
> + gma, gma + 4, &cmd) < 0)
> + return -EINVAL;
> info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
> if (info == NULL) {
> + vgpu->score += UNKNOWN_CMD;
> gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
> cmd, get_opcode(cmd, s->ring_id));
> return -EINVAL;
> @@ -1637,6 +1655,8 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>
> /* get the size of the batch buffer */
> bb_size = find_bb_size(s);
> + if (bb_size < 0)
> + return -EINVAL;
>
> /* allocate shadow batch buffer */
> entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL);
> @@ -1692,6 +1712,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
> return 0;
>
> unmap_src:
> + vgpu->score += INTERNAL_ERR;
> i915_gem_object_unpin_map(entry_obj->obj);
> put_obj:
> i915_gem_object_put(entry_obj->obj);
> @@ -1707,12 +1728,14 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
> struct intel_vgpu *vgpu = s->vgpu;
>
> if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
> + vgpu->score += INVALID_ADDR;
> gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
> return -EINVAL;
> }
>
> second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
> if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
> + vgpu->score += INVALID_ADDR;
> gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
> return -EINVAL;
> }
> @@ -2426,6 +2449,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
>
> info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
> if (info == NULL) {
> + vgpu->score += UNKNOWN_CMD;
> gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
> cmd, get_opcode(cmd, s->ring_id));
> return -EINVAL;
> @@ -2480,6 +2504,7 @@ static int command_scan(struct parser_exec_state *s,
> if (s->buf_type == RING_BUFFER_INSTRUCTION) {
> if (!(s->ip_gma >= rb_start) ||
> !(s->ip_gma < gma_bottom)) {
> + vgpu->score += INVALID_ADDR;
> gvt_vgpu_err("ip_gma %lx out of ring scope."
> "(base:0x%lx, bottom: 0x%lx)\n",
> s->ip_gma, rb_start,
> @@ -2488,6 +2513,7 @@ static int command_scan(struct parser_exec_state *s,
> return -EINVAL;
> }
> if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
> + vgpu->score += INVALID_ADDR;
> gvt_vgpu_err("ip_gma %lx out of range."
> "base 0x%lx head 0x%lx tail 0x%lx\n",
> s->ip_gma, rb_start,
> @@ -2695,11 +2721,13 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
> if (IS_ERR(map)) {
> gvt_vgpu_err("failed to vmap shadow indirect ctx\n");
> ret = PTR_ERR(map);
> + vgpu->score += INTERNAL_ERR;
> goto put_obj;
> }
>
> ret = i915_gem_object_set_to_cpu_domain(obj, false);
> if (ret) {
> + vgpu->score += INTERNAL_ERR;
> gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
> goto unmap_src;
> }
> diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
> index ec8ff32..091d50e 100644
> --- a/drivers/gpu/drm/i915/gvt/execlist.c
> +++ b/drivers/gpu/drm/i915/gvt/execlist.c
> @@ -526,6 +526,7 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
> intel_vgpu_unpin_mm(workload->shadow_mm);
>
> err_ret:
> + vgpu->score = INTERNAL_ERR;
> return ret;
> }
>
> @@ -743,11 +744,15 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
> */
> if (list_empty(workload_q_head(vgpu, ring_id))) {
> mutex_lock(&dev_priv->drm.struct_mutex);
> - intel_gvt_scan_and_shadow_workload(workload);
> + ret = intel_gvt_scan_and_shadow_workload(workload);
> mutex_unlock(&dev_priv->drm.struct_mutex);
> }
>
> - queue_workload(workload);
> + if (ret) {
> + if (vgpu->score >= FAILSAFE_MODE_SCORE)
> + enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERROR);
> + } else
> + queue_workload(workload);
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
> index 8f46019..040b47d 100644
> --- a/drivers/gpu/drm/i915/gvt/gtt.c
> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
> @@ -1709,7 +1709,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
> int ret;
>
> if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT)
> - return INTEL_GVT_INVALID_ADDR;
> + goto err;
>
> if (mm->type == INTEL_GVT_MM_GGTT) {
> if (!vgpu_gmadr_is_valid(vgpu, gma))
> @@ -1766,6 +1766,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
> mm->page_table_level, gma, gpa);
> return gpa;
> err:
> + vgpu->score += INVALID_ADDR;
> gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
> return INTEL_GVT_INVALID_ADDR;
> }
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index 4afe080..744dc1c 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -171,6 +171,7 @@ struct intel_vgpu {
> void *reserve_ring_buffer_va[I915_NUM_ENGINES];
> /* dynamically allocated once ring buffer size > 1 pages */
> void *dynamic_ring_buffer_va[I915_NUM_ENGINES];
> + int score;
>
> #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
> struct {
> @@ -193,6 +194,13 @@ struct intel_vgpu {
> #endif
> };
>
> +
> +#define UNKNOWN_CMD 10
> +#define INVALID_ADDR 20
> +#define INTERNAL_ERR 20
> +
> +#define FAILSAFE_MODE_SCORE 20
> +
> struct intel_gvt_gm {
> unsigned long vgpu_allocated_low_gm_size;
> unsigned long vgpu_allocated_high_gm_size;
> @@ -478,6 +486,8 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci);
> void populate_pvinfo_page(struct intel_vgpu *vgpu);
>
> int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload);
> +void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason);
> +
>
> struct intel_gvt_ops {
> int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *,
> @@ -500,6 +510,7 @@ struct intel_gvt_ops {
> enum {
> GVT_FAILSAFE_UNSUPPORTED_GUEST,
> GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
> + GVT_FAILSAFE_INTERNAL_ERROR,
> };
>
> static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
> index 17febe8..cefe28f 100644
> --- a/drivers/gpu/drm/i915/gvt/handlers.c
> +++ b/drivers/gpu/drm/i915/gvt/handlers.c
> @@ -149,7 +149,7 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
> (num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
>
>
> -static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
> +void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
> {
> switch (reason) {
> case GVT_FAILSAFE_UNSUPPORTED_GUEST:
> @@ -157,6 +157,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
> break;
> case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
> pr_err("Graphics resource is not enough for the guest\n");
> + case GVT_FAILSAFE_INTERNAL_ERROR:
> + pr_err("Internal error for the guest\n");
> default:
> break;
> }
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 6a7d405..3cf76bd 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -188,7 +188,6 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
> {
> struct intel_vgpu *vgpu = workload->vgpu;
> int ring_id = workload->ring_id;
> - struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx;
> void *shadow_ring_buffer_va;
> u32 *cs;
> int ret = 0;
> @@ -198,6 +197,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
> if (IS_ERR(cs)) {
> gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n",
> workload->rb_len);
> + vgpu->score += INTERNAL_ERR;
> ret = PTR_ERR(cs);
> goto out;
> }
> @@ -313,7 +313,6 @@ static void intel_gvt_release_shadow_workload(struct intel_vgpu_workload *worklo
> struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
> struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
> struct intel_engine_cs *engine = dev_priv->engine[ring_id];
> - struct drm_i915_gem_request *rq;
>
> if (workload->shadowed)
> return;
> @@ -638,6 +637,10 @@ static int workload_thread(void *priv)
> FORCEWAKE_ALL);
>
> intel_runtime_pm_put(gvt->dev_priv);
> + if (ret) {
> + if (vgpu->score >= FAILSAFE_MODE_SCORE)
> + enter_failsafe_mode(vgpu, GVT_FAILSAFE_INTERNAL_ERROR);
> + }
> }
> return 0;
> }
> --
> 2.7.4
>
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
--
Open Source Technology Center, Intel ltd.
$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20170719/75b1168f/attachment-0001.sig>
More information about the intel-gvt-dev
mailing list