[PATCH 01/10] drm/i915/gvt: parse init context to update cmd accessible reg whitelist
Yan Zhao
yan.y.zhao at intel.com
Tue Dec 1 06:04:30 UTC 2020
On Tue, Dec 01, 2020 at 01:46:04PM +0800, Zhenyu Wang wrote:
> On 2020.11.25 08:37:41 +0800, Yan Zhao wrote:
> > Logical Context is actually a big batch buffer consisting of multiple
> > LRI commands + saved registers. It comprises Ring Context (the first
> > 0x50 dwords) and Engine Context. The registers defined in Engine Context
> > are command accessible, and safe to execute in VM Context.
> > However, not all of them are currently tracked in existing register
> > whitelist. Here we kick hardware to generate a dummy Engine Context and
> > then scan the dummy Engine context to update whitelist dynamically. Based
> > on updated list, later patches will audit future VM Engine Contexts to
> > disallow undesired LRIs (if out of what hardware generates).
> >
> > Cc: Kevin Tian <kevin.tian at intel.com>
> > Signed-off-by: Wang Zhi <zhi.a.wang at intel.com>
> > Signed-off-by: Yan Zhao <yan.y.zhao at intel.com>
> > ---
> > drivers/gpu/drm/i915/gvt/cmd_parser.c | 153 +++++++++++++++++++++++++-
> > drivers/gpu/drm/i915/gvt/cmd_parser.h | 2 +
> > drivers/gpu/drm/i915/gvt/gvt.h | 4 +
> > drivers/gpu/drm/i915/gvt/vgpu.c | 4 +-
> > 4 files changed, 159 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> > index 16b582cb97ed..b1e508471c40 100644
> > --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> > +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> > @@ -38,10 +38,15 @@
> >
> > #include "i915_drv.h"
> > #include "gt/intel_ring.h"
> > +#include "gt/intel_gt_requests.h"
> > #include "gvt.h"
> > #include "i915_pvinfo.h"
> > #include "trace.h"
> >
> > +#include "gem/i915_gem_context.h"
> > +#include "gem/i915_gem_pm.h"
> > +#include "gt/intel_context.h"
> > +
> > #define INVALID_OP (~0U)
> >
> > #define OP_LEN_MI 9
> > @@ -454,6 +459,7 @@ enum {
> > RING_BUFFER_INSTRUCTION,
> > BATCH_BUFFER_INSTRUCTION,
> > BATCH_BUFFER_2ND_LEVEL,
> > + RING_BUFFER_CTX,
> > };
> >
> > enum {
> > @@ -495,6 +501,7 @@ struct parser_exec_state {
> > */
> > int saved_buf_addr_type;
> > bool is_ctx_wa;
> > + bool is_init_ctx;
> >
> > const struct cmd_info *info;
> >
> > @@ -708,6 +715,11 @@ static inline u32 cmd_val(struct parser_exec_state *s, int index)
> > return *cmd_ptr(s, index);
> > }
> >
> > +static inline bool is_init_ctx(struct parser_exec_state *s)
> > +{
> > + return (s->buf_type == RING_BUFFER_CTX && s->is_init_ctx);
> > +}
> > +
> > static void parser_exec_state_dump(struct parser_exec_state *s)
> > {
> > int cnt = 0;
> > @@ -721,7 +733,8 @@ static void parser_exec_state_dump(struct parser_exec_state *s)
> >
> > gvt_dbg_cmd(" %s %s ip_gma(%08lx) ",
> > s->buf_type == RING_BUFFER_INSTRUCTION ?
> > - "RING_BUFFER" : "BATCH_BUFFER",
> > + "RING_BUFFER" : ((s->buf_type == RING_BUFFER_CTX) ?
> > + "CTX_BUFFER" : "BATCH_BUFFER"),
> > s->buf_addr_type == GTT_BUFFER ?
> > "GTT" : "PPGTT", s->ip_gma);
> >
> > @@ -756,7 +769,8 @@ static inline void update_ip_va(struct parser_exec_state *s)
> > if (WARN_ON(s->ring_head == s->ring_tail))
> > return;
> >
> > - if (s->buf_type == RING_BUFFER_INSTRUCTION) {
> > + if (s->buf_type == RING_BUFFER_INSTRUCTION ||
> > + s->buf_type == RING_BUFFER_CTX) {
> > unsigned long ring_top = s->ring_start + s->ring_size;
> >
> > if (s->ring_head > s->ring_tail) {
> > @@ -936,6 +950,11 @@ static int cmd_reg_handler(struct parser_exec_state *s,
> > return -EFAULT;
> > }
> >
> > + if (is_init_ctx(s)) {
> > + intel_gvt_mmio_set_cmd_accessible(gvt, offset);
> > + return 0;
> > + }
> > +
> > if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) {
> > gvt_vgpu_err("%s access to non-render register (%x)\n",
> > cmd, offset);
> > @@ -1215,6 +1234,8 @@ static int cmd_handler_mi_batch_buffer_end(struct parser_exec_state *s)
> > s->buf_type = BATCH_BUFFER_INSTRUCTION;
> > ret = ip_gma_set(s, s->ret_ip_gma_bb);
> > s->buf_addr_type = s->saved_buf_addr_type;
> > + } else if (s->buf_type == RING_BUFFER_CTX) {
> > + ret = ip_gma_set(s, s->ring_tail);
> > } else {
> > s->buf_type = RING_BUFFER_INSTRUCTION;
> > s->buf_addr_type = GTT_BUFFER;
> > @@ -2763,7 +2784,8 @@ static int command_scan(struct parser_exec_state *s,
> > gma_bottom = rb_start + rb_len;
> >
> > while (s->ip_gma != gma_tail) {
> > - if (s->buf_type == RING_BUFFER_INSTRUCTION) {
> > + if (s->buf_type == RING_BUFFER_INSTRUCTION ||
> > + s->buf_type == RING_BUFFER_CTX) {
> > if (!(s->ip_gma >= rb_start) ||
> > !(s->ip_gma < gma_bottom)) {
> > gvt_vgpu_err("ip_gma %lx out of ring scope."
> > @@ -3056,6 +3078,131 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
> > return 0;
> > }
> >
> > +/* generate dummy contexts by sending empty requests to HW, and let
> > + * the HW to fill Engine Contexts. This dummy contexts are used for
> > + * initialization purpose (update reg whitelist), so referred to as
> > + * init context here
> > + */
> > +void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
> > +{
> > + struct intel_gvt *gvt = vgpu->gvt;
> > + struct drm_i915_private *dev_priv = gvt->gt->i915;
> > + struct intel_engine_cs *engine;
> > + enum intel_engine_id id;
> > + struct drm_i915_gem_object *ctx_obj[I915_NUM_ENGINES] = {};
> > + const unsigned long start = LRC_STATE_PN * PAGE_SIZE;
> > + struct i915_request *rq;
> > + struct intel_vgpu_submission *s = &vgpu->submission;
> > + struct i915_request *requests[I915_NUM_ENGINES] = {};
> > + bool is_ctx_pinned[I915_NUM_ENGINES] = {};
> > + int ret;
> > +
> > + if (gvt->is_reg_whitelist_updated)
> > + return;
> > +
> > + for_each_engine(engine, &dev_priv->gt, id) {
> > + ret = intel_context_pin(s->shadow[id]);
> > + if (ret) {
> > + gvt_vgpu_err("fail to pin shadow ctx\n");
> > + goto out;
> > + }
> > + is_ctx_pinned[id] = true;
> > + }
> > +
> > + for_each_engine(engine, &dev_priv->gt, id) {
> > + rq = i915_request_create(s->shadow[id]);
> > + if (IS_ERR(rq)) {
> > + gvt_vgpu_err("fail to alloc default request\n");
> > + ret = -EIO;
> > + goto out;
> > + }
> > + requests[id] = i915_request_get(rq);
> > + i915_request_add(rq);
> > + }
>
> Why split this in different iteration?
yes, the two iterations can be combined into one.
will combine them.
>
> > +
> > + if (intel_gt_wait_for_idle(&dev_priv->gt,
> > + I915_GEM_IDLE_TIMEOUT) == -ETIME) {
> > + ret = -EIO;
> > + goto out;
> > + }
> > +
> > + for_each_engine(engine, &dev_priv->gt, id) {
> > + struct i915_request *rq;
> > +
> > + rq = requests[id];
> > + GEM_BUG_ON(!i915_request_completed(rq));
> > + GEM_BUG_ON(!intel_context_is_pinned(rq->context));
> > + ctx_obj[id] = rq->context->state->obj;
> > + }
> > +
>
> and these two iterations?
This iteration cannot be combined with previous one because there's only
one intel_gt_wait_for_idle for all rings.
>
> > + /* scan init ctx to update cmd accessible list */
> > + for_each_engine(engine, &dev_priv->gt, id) {
> > + int size = engine->context_size - PAGE_SIZE;
> > + void *vaddr;
> > + struct parser_exec_state s;
> > + struct drm_i915_gem_object *obj;
> > +
> > + if (!ctx_obj[id]) {
> > + ret = -EIO;
> > + goto out;
> > + }
> > +
> > + obj = ctx_obj[id];
> > +
> > + i915_gem_object_set_cache_coherency(obj,
> > + I915_CACHE_LLC);
> > +
> > + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> > + if (IS_ERR(vaddr)) {
> > + gvt_err("failed to pin init ctx obj, ring=%d, err=%lx\n",
> > + id, PTR_ERR(vaddr));
> > + goto out;
> > + }
> > +
> > + s.buf_type = RING_BUFFER_CTX;
> > + s.buf_addr_type = GTT_BUFFER;
> > + s.vgpu = vgpu;
> > + s.engine = engine;
> > + s.ring_start = 0;
> > + s.ring_size = size;
> > + s.ring_head = 0;
> > + s.ring_tail = size;
> > + s.rb_va = vaddr + start;
> > + s.workload = NULL;
> > + s.is_ctx_wa = false;
> > + s.is_init_ctx = true;
> > +
> > + /* skipping the first RING_CTX_SIZE(0x50) dwords */
> > + ret = ip_gma_set(&s, RING_CTX_SIZE);
> > + if (ret) {
> > + i915_gem_object_unpin_map(obj);
> > + goto out;
> > + }
> > +
> > + ret = command_scan(&s, 0, size, 0, size);
> > + if (ret)
> > + gvt_err("Scan init ctx error\n");
> > +
> > + i915_gem_object_unpin_map(obj);
> > + }
> > +
> > +out:
> > + if (!ret)
> > + gvt->is_reg_whitelist_updated = true;
> > +
> > + for (id = 0; id < ARRAY_SIZE(requests); id++) {
> > + if (!requests[id])
> > + continue;
> > + i915_request_put(requests[id]);
> > + }
> > +
> > + for_each_engine(engine, &dev_priv->gt, id) {
> > + if (!is_ctx_pinned[id])
> > + continue;
> > + intel_context_unpin(s->shadow[id]);
> > + }
> > +}
> > +
> > static int init_cmd_table(struct intel_gvt *gvt)
> > {
> > unsigned int gen_type = intel_gvt_get_device_type(gvt);
> > diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.h b/drivers/gpu/drm/i915/gvt/cmd_parser.h
> > index ab25d151932a..09ca2b8a63c8 100644
> > --- a/drivers/gpu/drm/i915/gvt/cmd_parser.h
> > +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.h
> > @@ -50,4 +50,6 @@ int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload);
> >
> > int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx);
> >
> > +void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu);
> > +
> > #endif
> > diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> > index a81cf0f01e78..c470e185bc00 100644
> > --- a/drivers/gpu/drm/i915/gvt/gvt.h
> > +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> > @@ -327,6 +327,7 @@ struct intel_gvt {
> > u32 *mocs_mmio_offset_list;
> > u32 mocs_mmio_offset_list_cnt;
> > } engine_mmio_list;
> > + bool is_reg_whitelist_updated;
> >
> > struct dentry *debugfs_root;
> > };
> > @@ -410,6 +411,9 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt);
> > #define vgpu_fence_base(vgpu) (vgpu->fence.base)
> > #define vgpu_fence_sz(vgpu) (vgpu->fence.size)
> >
> > +/* ring context size i.e. the first 0x50 dwords*/
> > +#define RING_CTX_SIZE 320
> > +
> > struct intel_vgpu_creation_params {
> > __u64 handle;
> > __u64 low_gm_sz; /* in MB */
> > diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
> > index f6d7e33c7099..7fc4a3e9a560 100644
> > --- a/drivers/gpu/drm/i915/gvt/vgpu.c
> > +++ b/drivers/gpu/drm/i915/gvt/vgpu.c
> > @@ -499,9 +499,11 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
> >
> > mutex_lock(&gvt->lock);
> > vgpu = __intel_gvt_create_vgpu(gvt, ¶m);
> > - if (!IS_ERR(vgpu))
> > + if (!IS_ERR(vgpu)) {
> > /* calculate left instance change for types */
> > intel_gvt_update_vgpu_types(gvt);
> > + intel_gvt_update_reg_whitelist(vgpu);
>
> Do we need to generate for vgpu create everytime? Suppose on same HW,
> init ctx should be same right?
>
intel_gvt_update_vgpu_types() are called after each vgpu creation but it
will return immediately if gvt->is_reg_whitelist_updated is true.
Thanks
Yan
> > + }
> > mutex_unlock(&gvt->lock);
> >
> > return vgpu;
> > --
> > 2.17.1
> >
> > _______________________________________________
> > intel-gvt-dev mailing list
> > intel-gvt-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
>
> --
>
> $gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
More information about the intel-gvt-dev
mailing list