[Intel-gfx] [PATCH 4/7] drm/i915/perf: Whitelist OA report trigger registers
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Fri Nov 13 22:12:09 UTC 2020
On Fri, Nov 13, 2020 at 10:03:56AM -0800, Umesh Nerlige Ramappa wrote:
>OA reports can be triggered into the OA buffer by writing into the
>OAREPORTTRIG registers. Whitelist the registers to allow non-privileged
>user to trigger reports.
>
>Whitelist registers only if perf_stream_paranoid is set to 0. In
>i915_perf_open_ioctl, this setting is checked and the whitelist is
>enabled accordingly. On closing the perf fd, the whitelist is removed.
>
>This ensures that the access to the whitelist is gated by
>perf_stream_paranoid.
>
>v2:
>- Move related change to this patch (Lionel)
>- Bump up perf revision (Lionel)
>
>v3: Pardon whitelisted registers for selftest (Umesh)
>v4: Document supported gens for the feature (Lionel)
>v5: Whitelist registers only if perf_stream_paranoid is set to 0 (Jon)
>v6: Move oa whitelist array to i915_perf (Chris)
>v7: Fix OA writing beyond the wal->list memory (CI)
>v8: Protect write to engine whitelist registers
>
>v9: (Umesh)
>- Use uncore->lock to protect write to forcepriv regs
>- In case wal->count falls to zero on _wa_remove, make sure you still
> clear the registers. Remove wal->count check when applying whitelist.
>
>v10: (Umesh)
>- Split patches modifying intel_workarounds
>- On some platforms there are no whitelisted regs. intel_engine_resume
> applies whitelist on these platforms too and the wal->count gates such
> platforms. Bring back the wal->count check.
>- intel_engine_allow/deny_user_register_access modifies the engine
> whitelist and the wal->count. Use uncore->lock to serialize it with
> intel_engine_apply_whitelist.
>- Grow the wal->list when adding whitelist registers after driver load.
>
>v11:
>- Fix memory leak in _wa_list_grow (Chris)
>- Serialize kfree with engine resume using uncore->lock (Umesh)
>- Grow the list only if wal->count is not aligned (Umesh)
>
>Signed-off-by: Piotr Maciejewski <piotr.maciejewski at intel.com>
>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>---
> drivers/gpu/drm/i915/gt/intel_workarounds.c | 138 ++++++++++++++++++
> drivers/gpu/drm/i915/gt/intel_workarounds.h | 7 +
> .../gpu/drm/i915/gt/intel_workarounds_types.h | 5 +
> drivers/gpu/drm/i915/i915_perf.c | 79 +++++++++-
> drivers/gpu/drm/i915/i915_perf_types.h | 8 +
> 5 files changed, 234 insertions(+), 3 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>index 0f9d2a65dcfe..722f11e43dad 100644
>--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
>+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>@@ -114,6 +114,80 @@ static void wa_init_finish(struct i915_wa_list *wal)
> wal->wa_count, wal->name, wal->engine_name);
> }
>
>+static int _wa_index(struct i915_wa_list *wal, i915_reg_t reg)
>+{
>+ unsigned int addr = i915_mmio_reg_offset(reg);
>+ int start = 0, end = wal->count;
>+
>+ /* addr and wal->list[].reg, both include the R/W flags */
>+ while (start < end) {
>+ unsigned int mid = start + (end - start) / 2;
>+
>+ if (i915_mmio_reg_offset(wal->list[mid].reg) < addr)
>+ start = mid + 1;
>+ else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr)
>+ end = mid;
>+ else
>+ return mid;
>+ }
>+
>+ return -ENOENT;
>+}
>+
>+static int _wa_list_grow(struct i915_wa_list *wal, size_t count)
>+{
>+ struct i915_wa *list;
>+ unsigned long flags;
>+
>+ list = kmalloc_array(ALIGN(count + 1, WA_LIST_CHUNK), sizeof(*list),
>+ GFP_KERNEL);
>+ if (!list) {
>+ DRM_ERROR("No space for workaround init!\n");
>+ return -ENOMEM;
>+ }
>+
>+ if (wal->list)
>+ memcpy(list, wal->list, sizeof(*list) * count);
>+
>+ /*
>+ * Most wal->lists are only modified during driver init and do not
>+ * require to be serialized with application of workarounds. The one
>+ * case where this is required is when OA adds/removes whitelist entries
>+ * from the wal->list and engine resume is in progress.
>+ */
>+ if (wal->engine)
>+ spin_lock_irqsave(&wal->engine->uncore->lock, flags);
>+
>+ kfree(wal->list);
if (wal->list)
kfree(wal->list);
>+ wal->list = list;
>+
>+ if (wal->engine)
>+ spin_unlock_irqrestore(&wal->engine->uncore->lock, flags);
>+
>+ return 0;
>+}
>+
>+static void _wa_remove(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
>+{
>+ int index;
>+ struct i915_wa *wa = wal->list;
>+
>+ reg.reg |= flags;
>+
>+ index = _wa_index(wal, reg);
>+ if (GEM_DEBUG_WARN_ON(index < 0))
>+ return;
>+
>+ memset(wa + index, 0, sizeof(*wa));
>+
>+ while (index < wal->count - 1) {
>+ swap(wa[index], wa[index + 1]);
>+ index++;
>+ }
>+
>+ wal->count--;
>+}
>+
> static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
> {
> unsigned int addr = i915_mmio_reg_offset(wa->reg);
>@@ -2080,6 +2154,70 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine)
> wa_init_finish(wal);
> }
>
>+int intel_engine_allow_user_register_access(struct intel_engine_cs *engine,
>+ struct i915_whitelist_reg *reg,
>+ u32 count)
>+{
>+ unsigned long flags;
>+ struct i915_wa_list *wal;
>+ int ret;
>+
>+ if (!engine || !reg || !count)
>+ return -EINVAL;
>+
>+ wal = &engine->whitelist;
>+
>+ /*
>+ * i915 compacts the wa list by calling wa_init_finish during driver
>+ * load. If we want to add additional workarounds after driver load,
>+ * we need to grow the list. _wa_list_grow will add at least one free
>+ * slot for a workaround. Any additional slot required are added by
>+ * _wa_add in the below for loop.
>+ *
>+ * Once we remove the workarounds, we compact the list again in
>+ * intel_engine_deny_user_register_access by calling wa_init_finish.
>+ *
>+ * Note that we want to grow the list here only if wal->count is not
>+ * aligned. If it is aligned, whitelist_reg_ext will grow the list.
>+ *
>+ */
>+ if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
>+ ret = _wa_list_grow(wal, wal->count);
>+ if (ret < 0)
>+ return ret;
>+ }
>+
>+ spin_lock_irqsave(&engine->uncore->lock, flags);
>+ for (; count--; reg++)
>+ whitelist_reg_ext(wal, reg->reg, reg->flags);
>+
>+ __engine_apply_whitelist(engine);
>+ spin_unlock_irqrestore(&engine->uncore->lock, flags);
>+
>+ return 0;
>+}
>+
>+void intel_engine_deny_user_register_access(struct intel_engine_cs *engine,
>+ struct i915_whitelist_reg *reg,
>+ u32 count)
>+{
>+ unsigned long flags;
>+ struct i915_wa_list *wal;
>+
>+ if (!engine || !reg || !count)
>+ return;
>+
>+ wal = &engine->whitelist;
>+ spin_lock_irqsave(&engine->uncore->lock, flags);
>+ for (; count--; reg++)
>+ _wa_remove(wal, reg->reg, reg->flags);
>+
>+ __engine_apply_whitelist(engine);
>+ spin_unlock_irqrestore(&engine->uncore->lock, flags);
>+
>+ wa_init_finish(wal);
>+}
>+
> void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
> {
> wa_list_apply(engine->uncore, &engine->wa_list);
>diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h
>index 8c9c769c2204..558c21b7d4cb 100644
>--- a/drivers/gpu/drm/i915/gt/intel_workarounds.h
>+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h
>@@ -37,4 +37,11 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine);
> int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
> const char *from);
>
>+int intel_engine_allow_user_register_access(struct intel_engine_cs *engine,
>+ struct i915_whitelist_reg *reg,
>+ u32 count);
>+void intel_engine_deny_user_register_access(struct intel_engine_cs *engine,
>+ struct i915_whitelist_reg *reg,
>+ u32 count);
>+
> #endif
>diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
>index e562fd43697b..53372546be9b 100644
>--- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
>+++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
>@@ -11,6 +11,11 @@
>
> #include "i915_reg.h"
>
>+struct i915_whitelist_reg {
>+ i915_reg_t reg;
>+ u32 flags;
>+};
>+
> struct i915_wa {
> i915_reg_t reg;
> u32 clr;
>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>index df5166d89d82..64cf27187b40 100644
>--- a/drivers/gpu/drm/i915/i915_perf.c
>+++ b/drivers/gpu/drm/i915/i915_perf.c
>@@ -1362,12 +1362,56 @@ free_noa_wait(struct i915_perf_stream *stream)
> i915_vma_unpin_and_release(&stream->noa_wait, 0);
> }
>
>+static struct i915_whitelist_reg gen9_oa_wl_regs[] = {
>+ { OAREPORTTRIG2, RING_FORCE_TO_NONPRIV_ACCESS_RW },
>+ { OAREPORTTRIG6, RING_FORCE_TO_NONPRIV_ACCESS_RW },
>+};
>+
>+static struct i915_whitelist_reg gen12_oa_wl_regs[] = {
>+ { GEN12_OAG_OAREPORTTRIG2, RING_FORCE_TO_NONPRIV_ACCESS_RW },
>+ { GEN12_OAG_OAREPORTTRIG6, RING_FORCE_TO_NONPRIV_ACCESS_RW },
>+};
>+
>+static int intel_engine_apply_oa_whitelist(struct i915_perf_stream *stream)
>+{
>+ struct intel_engine_cs *engine = stream->engine;
>+ int ret;
>+
>+ if (i915_perf_stream_paranoid ||
>+ !(stream->sample_flags & SAMPLE_OA_REPORT) ||
>+ !stream->perf->oa_wl)
>+ return 0;
>+
>+ ret = intel_engine_allow_user_register_access(engine,
>+ stream->perf->oa_wl,
>+ stream->perf->num_oa_wl);
>+ if (ret < 0)
>+ return ret;
>+
>+ stream->oa_whitelisted = true;
>+ return 0;
>+}
>+
>+static void intel_engine_remove_oa_whitelist(struct i915_perf_stream *stream)
>+{
>+ struct intel_engine_cs *engine = stream->engine;
>+
>+ if (!stream->oa_whitelisted)
>+ return;
>+
>+ intel_engine_deny_user_register_access(engine,
>+ stream->perf->oa_wl,
>+ stream->perf->num_oa_wl);
>+}
>+
> static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> {
> struct i915_perf *perf = stream->perf;
>
> BUG_ON(stream != perf->exclusive_stream);
>
>+ intel_engine_remove_oa_whitelist(stream);
>+
> /*
> * Unset exclusive_stream first, it will be checked while disabling
> * the metric set on gen8+.
>@@ -1463,7 +1507,8 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
> * bit."
> */
> intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset |
>- OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
>+ OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT |
>+ GEN7_OABUFFER_EDGE_TRIGGER);
> intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
>
> /* Mark that we need updated tail pointers to read from... */
>@@ -1516,7 +1561,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
> * bit."
> */
> intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
>- OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
>+ OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT |
>+ GEN7_OABUFFER_EDGE_TRIGGER);
> intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
> gtt_offset & GEN12_OAG_OATAILPTR_MASK);
>
>@@ -3497,6 +3543,20 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
> if (!(param->flags & I915_PERF_FLAG_DISABLED))
> i915_perf_enable_locked(stream);
>
>+ /*
>+ * OA whitelist allows non-privileged access to some OA counters for
>+ * triggering reports into the OA buffer. This is only allowed if
>+ * perf_stream_paranoid is set to 0 by the sysadmin.
>+ *
>+ * We want to make sure this is almost the last thing we do before
>+ * returning the stream fd. If we do end up checking for errors in code
>+ * that follows this, we MUST call intel_engine_remove_oa_whitelist in
>+ * the error handling path to remove the whitelisted registers.
>+ */
>+ ret = intel_engine_apply_oa_whitelist(stream);
>+ if (ret < 0)
>+ goto err_flags;
>+
> /* Take a reference on the driver that will be kept with stream_fd
> * until its release.
> */
>@@ -4323,6 +4383,8 @@ void i915_perf_init(struct drm_i915_private *i915)
> perf->ctx_flexeu0_offset = 0x3de;
>
> perf->gen8_valid_ctx_bit = BIT(16);
>+ perf->oa_wl = gen9_oa_wl_regs;
>+ perf->num_oa_wl = ARRAY_SIZE(gen9_oa_wl_regs);
> }
> } else if (IS_GEN_RANGE(i915, 10, 11)) {
> perf->oa_formats = gen8_plus_oa_formats;
>@@ -4340,6 +4402,9 @@ void i915_perf_init(struct drm_i915_private *i915)
> perf->ops.disable_metric_set = gen10_disable_metric_set;
> perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
>
>+ perf->oa_wl = gen9_oa_wl_regs;
>+ perf->num_oa_wl = ARRAY_SIZE(gen9_oa_wl_regs);
>+
> if (IS_GEN(i915, 10)) {
> perf->ctx_oactxctrl_offset = 0x128;
> perf->ctx_flexeu0_offset = 0x3de;
>@@ -4366,6 +4431,9 @@ void i915_perf_init(struct drm_i915_private *i915)
>
> perf->ctx_flexeu0_offset = 0;
> perf->ctx_oactxctrl_offset = 0x144;
>+
>+ perf->oa_wl = gen12_oa_wl_regs;
>+ perf->num_oa_wl = ARRAY_SIZE(gen12_oa_wl_regs);
> }
> }
>
>@@ -4468,8 +4536,13 @@ int i915_perf_ioctl_version(void)
> *
> * 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
> * interval for the hrtimer used to check for OA data.
>+ *
>+ * 6: Whitelist OATRIGGER registers to allow user to trigger reports
>+ * into the OA buffer. This applies only to gen8+. The feature can
>+ * only be accessed if perf_stream_paranoid is set to 0 by privileged
>+ * user.
> */
>- return 5;
>+ return 6;
> }
>
> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
>index a36a455ae336..cceb54012053 100644
>--- a/drivers/gpu/drm/i915/i915_perf_types.h
>+++ b/drivers/gpu/drm/i915/i915_perf_types.h
>@@ -311,6 +311,11 @@ struct i915_perf_stream {
> * buffer should be checked for available data.
> */
> u64 poll_oa_period;
>+
>+ /**
>+ * @oa_whitelisted: Indicates that the oa registers are whitelisted.
>+ */
>+ bool oa_whitelisted;
> };
>
> /**
>@@ -431,6 +436,9 @@ struct i915_perf {
> u32 ctx_oactxctrl_offset;
> u32 ctx_flexeu0_offset;
>
>+ struct i915_whitelist_reg *oa_wl;
>+ size_t num_oa_wl;
>+
> /**
> * The RPT_ID/reason field for Gen8+ includes a bit
> * to determine if the CTX ID in the report is valid
>--
>2.20.1
>
>_______________________________________________
>Intel-gfx mailing list
>Intel-gfx at lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list