[PATCH 01/10] drm/i915/gvt: parse init context to update cmd accessible reg whitelist
Zhenyu Wang
zhenyuw at linux.intel.com
Tue Dec 1 06:05:39 UTC 2020
On 2020.12.01 14:04:30 +0800, Yan Zhao wrote:
> On Tue, Dec 01, 2020 at 01:46:04PM +0800, Zhenyu Wang wrote:
> > On 2020.11.25 08:37:41 +0800, Yan Zhao wrote:
> > > Logical Context is actually a big batch buffer consisting of multiple
> > > LRI commands + saved registers. It comprises Ring Context (the first
> > > 0x50 dwords) and Engine Context. The registers defined in Engine Context
> > > are command accessible, and safe to execute in VM Context.
> > > However, not all of them are currently tracked in existing register
> > > whitelist. Here we kick hardware to generate a dummy Engine Context and
> > > then scan the dummy Engine context to update whitelist dynamically. Based
> > > on updated list, later patches will audit future VM Engine Contexts to
> > > disallow undesired LRIs (if out of what hardware generates).
> > >
> > > Cc: Kevin Tian <kevin.tian at intel.com>
> > > Signed-off-by: Wang Zhi <zhi.a.wang at intel.com>
> > > Signed-off-by: Yan Zhao <yan.y.zhao at intel.com>
> > > ---
> > > drivers/gpu/drm/i915/gvt/cmd_parser.c | 153 +++++++++++++++++++++++++-
> > > drivers/gpu/drm/i915/gvt/cmd_parser.h | 2 +
> > > drivers/gpu/drm/i915/gvt/gvt.h | 4 +
> > > drivers/gpu/drm/i915/gvt/vgpu.c | 4 +-
> > > 4 files changed, 159 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> > > index 16b582cb97ed..b1e508471c40 100644
> > > --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> > > +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> > > @@ -38,10 +38,15 @@
> > >
> > > #include "i915_drv.h"
> > > #include "gt/intel_ring.h"
> > > +#include "gt/intel_gt_requests.h"
> > > #include "gvt.h"
> > > #include "i915_pvinfo.h"
> > > #include "trace.h"
> > >
> > > +#include "gem/i915_gem_context.h"
> > > +#include "gem/i915_gem_pm.h"
> > > +#include "gt/intel_context.h"
> > > +
> > > #define INVALID_OP (~0U)
> > >
> > > #define OP_LEN_MI 9
> > > @@ -454,6 +459,7 @@ enum {
> > > RING_BUFFER_INSTRUCTION,
> > > BATCH_BUFFER_INSTRUCTION,
> > > BATCH_BUFFER_2ND_LEVEL,
> > > + RING_BUFFER_CTX,
> > > };
> > >
> > > enum {
> > > @@ -495,6 +501,7 @@ struct parser_exec_state {
> > > */
> > > int saved_buf_addr_type;
> > > bool is_ctx_wa;
> > > + bool is_init_ctx;
> > >
> > > const struct cmd_info *info;
> > >
> > > @@ -708,6 +715,11 @@ static inline u32 cmd_val(struct parser_exec_state *s, int index)
> > > return *cmd_ptr(s, index);
> > > }
> > >
> > > +static inline bool is_init_ctx(struct parser_exec_state *s)
> > > +{
> > > + return (s->buf_type == RING_BUFFER_CTX && s->is_init_ctx);
> > > +}
> > > +
> > > static void parser_exec_state_dump(struct parser_exec_state *s)
> > > {
> > > int cnt = 0;
> > > @@ -721,7 +733,8 @@ static void parser_exec_state_dump(struct parser_exec_state *s)
> > >
> > > gvt_dbg_cmd(" %s %s ip_gma(%08lx) ",
> > > s->buf_type == RING_BUFFER_INSTRUCTION ?
> > > - "RING_BUFFER" : "BATCH_BUFFER",
> > > + "RING_BUFFER" : ((s->buf_type == RING_BUFFER_CTX) ?
> > > + "CTX_BUFFER" : "BATCH_BUFFER"),
> > > s->buf_addr_type == GTT_BUFFER ?
> > > "GTT" : "PPGTT", s->ip_gma);
> > >
> > > @@ -756,7 +769,8 @@ static inline void update_ip_va(struct parser_exec_state *s)
> > > if (WARN_ON(s->ring_head == s->ring_tail))
> > > return;
> > >
> > > - if (s->buf_type == RING_BUFFER_INSTRUCTION) {
> > > + if (s->buf_type == RING_BUFFER_INSTRUCTION ||
> > > + s->buf_type == RING_BUFFER_CTX) {
> > > unsigned long ring_top = s->ring_start + s->ring_size;
> > >
> > > if (s->ring_head > s->ring_tail) {
> > > @@ -936,6 +950,11 @@ static int cmd_reg_handler(struct parser_exec_state *s,
> > > return -EFAULT;
> > > }
> > >
> > > + if (is_init_ctx(s)) {
> > > + intel_gvt_mmio_set_cmd_accessible(gvt, offset);
> > > + return 0;
> > > + }
> > > +
> > > if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) {
> > > gvt_vgpu_err("%s access to non-render register (%x)\n",
> > > cmd, offset);
> > > @@ -1215,6 +1234,8 @@ static int cmd_handler_mi_batch_buffer_end(struct parser_exec_state *s)
> > > s->buf_type = BATCH_BUFFER_INSTRUCTION;
> > > ret = ip_gma_set(s, s->ret_ip_gma_bb);
> > > s->buf_addr_type = s->saved_buf_addr_type;
> > > + } else if (s->buf_type == RING_BUFFER_CTX) {
> > > + ret = ip_gma_set(s, s->ring_tail);
> > > } else {
> > > s->buf_type = RING_BUFFER_INSTRUCTION;
> > > s->buf_addr_type = GTT_BUFFER;
> > > @@ -2763,7 +2784,8 @@ static int command_scan(struct parser_exec_state *s,
> > > gma_bottom = rb_start + rb_len;
> > >
> > > while (s->ip_gma != gma_tail) {
> > > - if (s->buf_type == RING_BUFFER_INSTRUCTION) {
> > > + if (s->buf_type == RING_BUFFER_INSTRUCTION ||
> > > + s->buf_type == RING_BUFFER_CTX) {
> > > if (!(s->ip_gma >= rb_start) ||
> > > !(s->ip_gma < gma_bottom)) {
> > > gvt_vgpu_err("ip_gma %lx out of ring scope."
> > > @@ -3056,6 +3078,131 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
> > > return 0;
> > > }
> > >
> > > +/* generate dummy contexts by sending empty requests to HW, and let
> > > + * the HW to fill Engine Contexts. This dummy contexts are used for
> > > + * initialization purpose (update reg whitelist), so referred to as
> > > + * init context here
> > > + */
> > > +void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
> > > +{
> > > + struct intel_gvt *gvt = vgpu->gvt;
> > > + struct drm_i915_private *dev_priv = gvt->gt->i915;
> > > + struct intel_engine_cs *engine;
> > > + enum intel_engine_id id;
> > > + struct drm_i915_gem_object *ctx_obj[I915_NUM_ENGINES] = {};
> > > + const unsigned long start = LRC_STATE_PN * PAGE_SIZE;
> > > + struct i915_request *rq;
> > > + struct intel_vgpu_submission *s = &vgpu->submission;
> > > + struct i915_request *requests[I915_NUM_ENGINES] = {};
> > > + bool is_ctx_pinned[I915_NUM_ENGINES] = {};
> > > + int ret;
> > > +
> > > + if (gvt->is_reg_whitelist_updated)
> > > + return;
> > > +
> > > + for_each_engine(engine, &dev_priv->gt, id) {
> > > + ret = intel_context_pin(s->shadow[id]);
> > > + if (ret) {
> > > + gvt_vgpu_err("fail to pin shadow ctx\n");
> > > + goto out;
> > > + }
> > > + is_ctx_pinned[id] = true;
> > > + }
> > > +
> > > + for_each_engine(engine, &dev_priv->gt, id) {
> > > + rq = i915_request_create(s->shadow[id]);
> > > + if (IS_ERR(rq)) {
> > > + gvt_vgpu_err("fail to alloc default request\n");
> > > + ret = -EIO;
> > > + goto out;
> > > + }
> > > + requests[id] = i915_request_get(rq);
> > > + i915_request_add(rq);
> > > + }
> >
> > Why split this in different iteration?
> yes, the two iterations can be combined into one.
> will combine them.
>
> >
> > > +
> > > + if (intel_gt_wait_for_idle(&dev_priv->gt,
> > > + I915_GEM_IDLE_TIMEOUT) == -ETIME) {
> > > + ret = -EIO;
> > > + goto out;
> > > + }
> > > +
> > > + for_each_engine(engine, &dev_priv->gt, id) {
> > > + struct i915_request *rq;
> > > +
> > > + rq = requests[id];
> > > + GEM_BUG_ON(!i915_request_completed(rq));
> > > + GEM_BUG_ON(!intel_context_is_pinned(rq->context));
> > > + ctx_obj[id] = rq->context->state->obj;
> > > + }
> > > +
> >
> > and these two iterations?
> This iteration cannot be combined with previous one because there's only
> one intel_gt_wait_for_idle for all rings.
>
yeah, I mean above this and below which handles for each engine's ctx,
should be in one, right?
> >
> > > + /* scan init ctx to update cmd accessible list */
> > > + for_each_engine(engine, &dev_priv->gt, id) {
> > > + int size = engine->context_size - PAGE_SIZE;
> > > + void *vaddr;
> > > + struct parser_exec_state s;
> > > + struct drm_i915_gem_object *obj;
> > > +
> > > + if (!ctx_obj[id]) {
> > > + ret = -EIO;
> > > + goto out;
> > > + }
> > > +
> > > + obj = ctx_obj[id];
> > > +
> > > + i915_gem_object_set_cache_coherency(obj,
> > > + I915_CACHE_LLC);
> > > +
> > > + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> > > + if (IS_ERR(vaddr)) {
> > > + gvt_err("failed to pin init ctx obj, ring=%d, err=%lx\n",
> > > + id, PTR_ERR(vaddr));
> > > + goto out;
> > > + }
> > > +
> > > + s.buf_type = RING_BUFFER_CTX;
> > > + s.buf_addr_type = GTT_BUFFER;
> > > + s.vgpu = vgpu;
> > > + s.engine = engine;
> > > + s.ring_start = 0;
> > > + s.ring_size = size;
> > > + s.ring_head = 0;
> > > + s.ring_tail = size;
> > > + s.rb_va = vaddr + start;
> > > + s.workload = NULL;
> > > + s.is_ctx_wa = false;
> > > + s.is_init_ctx = true;
> > > +
> > > + /* skipping the first RING_CTX_SIZE(0x50) dwords */
> > > + ret = ip_gma_set(&s, RING_CTX_SIZE);
> > > + if (ret) {
> > > + i915_gem_object_unpin_map(obj);
> > > + goto out;
> > > + }
> > > +
> > > + ret = command_scan(&s, 0, size, 0, size);
> > > + if (ret)
> > > + gvt_err("Scan init ctx error\n");
> > > +
> > > + i915_gem_object_unpin_map(obj);
> > > + }
> > > +
> > > +out:
> > > + if (!ret)
> > > + gvt->is_reg_whitelist_updated = true;
> > > +
> > > + for (id = 0; id < ARRAY_SIZE(requests); id++) {
> > > + if (!requests[id])
> > > + continue;
> > > + i915_request_put(requests[id]);
> > > + }
> > > +
> > > + for_each_engine(engine, &dev_priv->gt, id) {
> > > + if (!is_ctx_pinned[id])
> > > + continue;
> > > + intel_context_unpin(s->shadow[id]);
> > > + }
> > > +}
> > > +
> > > static int init_cmd_table(struct intel_gvt *gvt)
> > > {
> > > unsigned int gen_type = intel_gvt_get_device_type(gvt);
> > > diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.h b/drivers/gpu/drm/i915/gvt/cmd_parser.h
> > > index ab25d151932a..09ca2b8a63c8 100644
> > > --- a/drivers/gpu/drm/i915/gvt/cmd_parser.h
> > > +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.h
> > > @@ -50,4 +50,6 @@ int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload);
> > >
> > > int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx);
> > >
> > > +void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu);
> > > +
> > > #endif
> > > diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> > > index a81cf0f01e78..c470e185bc00 100644
> > > --- a/drivers/gpu/drm/i915/gvt/gvt.h
> > > +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> > > @@ -327,6 +327,7 @@ struct intel_gvt {
> > > u32 *mocs_mmio_offset_list;
> > > u32 mocs_mmio_offset_list_cnt;
> > > } engine_mmio_list;
> > > + bool is_reg_whitelist_updated;
> > >
> > > struct dentry *debugfs_root;
> > > };
> > > @@ -410,6 +411,9 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt);
> > > #define vgpu_fence_base(vgpu) (vgpu->fence.base)
> > > #define vgpu_fence_sz(vgpu) (vgpu->fence.size)
> > >
> > > +/* ring context size i.e. the first 0x50 dwords*/
> > > +#define RING_CTX_SIZE 320
> > > +
> > > struct intel_vgpu_creation_params {
> > > __u64 handle;
> > > __u64 low_gm_sz; /* in MB */
> > > diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
> > > index f6d7e33c7099..7fc4a3e9a560 100644
> > > --- a/drivers/gpu/drm/i915/gvt/vgpu.c
> > > +++ b/drivers/gpu/drm/i915/gvt/vgpu.c
> > > @@ -499,9 +499,11 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
> > >
> > > mutex_lock(&gvt->lock);
> > > vgpu = __intel_gvt_create_vgpu(gvt, ¶m);
> > > - if (!IS_ERR(vgpu))
> > > + if (!IS_ERR(vgpu)) {
> > > /* calculate left instance change for types */
> > > intel_gvt_update_vgpu_types(gvt);
> > > + intel_gvt_update_reg_whitelist(vgpu);
> >
> > Do we need to generate for vgpu create everytime? Suppose on same HW,
> > init ctx should be same right?
> >
> intel_gvt_update_vgpu_types() are called after each vgpu creation but it
> will return immediately if gvt->is_reg_whitelist_updated is true.
>
> Thanks
> Yan
>
>
> > > + }
> > > mutex_unlock(&gvt->lock);
> > >
> > > return vgpu;
> > > --
> > > 2.17.1
> > >
> > > _______________________________________________
> > > intel-gvt-dev mailing list
> > > intel-gvt-dev at lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
> >
> > --
> >
> > $gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
>
>
--
$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20201201/2f47df8d/attachment-0001.sig>
More information about the intel-gvt-dev
mailing list