[Intel-gfx] [PATCH] drm/i915/mtl: Add support for 32 bit OAG formats in MTL
Lucas De Marchi
lucas.demarchi at intel.com
Tue Nov 29 07:15:52 UTC 2022
On Mon, Nov 28, 2022 at 05:21:46PM -0800, Umesh Nerlige Ramappa wrote:
>As part of OA support for MTL,
>
>- Enable 32 bit OAG formats for MTL.
>- 0x200c is repurposed on MTL. Use a separate mux table to verify oa
> configs passed by user.
>- Similar to ACM, OA/CS timestamp is mismatched on MTL. Add MTL to the
> WA.
>- On MTL, gt->scratch was using stolen lmem. An MI_SRM to stolen lmem is
> hanging. Add a page in noa_wait BO to save/restore GPR registers for
> the noa_wait logic.
why are all these changes squashed in a single patch?
>
>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>---
> drivers/gpu/drm/i915/gt/intel_gt_types.h | 6 ---
> drivers/gpu/drm/i915/i915_perf.c | 49 ++++++++++++++++++------
> 2 files changed, 38 insertions(+), 17 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>index c1d9cd255e06..13dffe0a3d20 100644
>--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>@@ -296,12 +296,6 @@ enum intel_gt_scratch_field {
>
> /* 8 bytes */
> INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
>-
>- /* 6 * 8 bytes */
>- INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
>-
>- /* 4 bytes */
>- INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
> };
>
> #endif /* __INTEL_GT_TYPES_H__ */
>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>index 00e09bb18b13..a735b9540113 100644
>--- a/drivers/gpu/drm/i915/i915_perf.c
>+++ b/drivers/gpu/drm/i915/i915_perf.c
>@@ -1842,8 +1842,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
> for (d = 0; d < dword_count; d++) {
> *cs++ = cmd;
> *cs++ = i915_mmio_reg_offset(reg) + 4 * d;
>- *cs++ = intel_gt_scratch_offset(stream->engine->gt,
>- offset) + 4 * d;
>+ *cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d;
> *cs++ = 0;
> }
>
>@@ -1876,7 +1875,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
> MI_PREDICATE_RESULT_2_ENGINE(base) :
> MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
>
>- bo = i915_gem_object_create_internal(i915, 4096);
>+ /*
>+ * gt->scratch was being used to save/restore the GPR registers, but on
>+ * MTL the scratch uses stolen lmem. An MI_SRM to this memory region
>+ * causes an engine hang. Instead allocate an additional page here to
humn.. is it because of the pte being wrong? "stolen lmem" in mtl is
still system memory... do we know why we'd need this change?
Lucas De Marchi
>+ * save/restore GPR registers
>+ */
>+ bo = i915_gem_object_create_internal(i915, 8192);
> if (IS_ERR(bo)) {
> drm_err(&i915->drm,
> "Failed to allocate NOA wait batchbuffer\n");
>@@ -1910,14 +1915,19 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
> goto err_unpin;
> }
>
>+ stream->noa_wait = vma;
>+
>+#define GPR_SAVE_OFFSET 4096
>+#define PREDICATE_SAVE_OFFSET 4160
>+
> /* Save registers. */
> for (i = 0; i < N_CS_GPR; i++)
> cs = save_restore_register(
> stream, cs, true /* save */, CS_GPR(i),
>- INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
>+ GPR_SAVE_OFFSET + 8 * i, 2);
> cs = save_restore_register(
> stream, cs, true /* save */, mi_predicate_result,
>- INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
>+ PREDICATE_SAVE_OFFSET, 1);
>
> /* First timestamp snapshot location. */
> ts0 = cs;
>@@ -2033,10 +2043,10 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
> for (i = 0; i < N_CS_GPR; i++)
> cs = save_restore_register(
> stream, cs, false /* restore */, CS_GPR(i),
>- INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
>+ GPR_SAVE_OFFSET + 8 * i, 2);
> cs = save_restore_register(
> stream, cs, false /* restore */, mi_predicate_result,
>- INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
>+ PREDICATE_SAVE_OFFSET, 1);
>
> /* And return to the ring. */
> *cs++ = MI_BATCH_BUFFER_END;
>@@ -2046,7 +2056,6 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
> i915_gem_object_flush_map(bo);
> __i915_gem_object_release_map(bo);
>
>- stream->noa_wait = vma;
> goto out_ww;
>
> err_unpin:
>@@ -3127,8 +3136,11 @@ get_sseu_config(struct intel_sseu *out_sseu,
> */
> u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
> {
>- /* Wa_18013179988:dg2 */
>- if (IS_DG2(i915)) {
>+ /*
>+ * Wa_18013179988:dg2
>+ * Wa_14015846243:mtl
>+ */
>+ if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
> intel_wakeref_t wakeref;
> u32 reg, shift;
>
>@@ -4306,6 +4318,17 @@ static const struct i915_range gen12_oa_mux_regs[] = {
> {}
> };
>
>+/*
>+ * Ref: 14010536224:
>+ * 0x20cc is repurposed on MTL, so use a separate array for MTL.
>+ */
>+static const struct i915_range mtl_oa_mux_regs[] = {
>+ { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
>+ { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
>+ { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
>+ { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
>+};
>+
> static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
> {
> return reg_in_range_table(addr, gen7_oa_b_counters);
>@@ -4349,7 +4372,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>
> static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
> {
>- return reg_in_range_table(addr, gen12_oa_mux_regs);
>+ if (IS_METEORLAKE(perf->i915))
>+ return reg_in_range_table(addr, mtl_oa_mux_regs);
>+ else
>+ return reg_in_range_table(addr, gen12_oa_mux_regs);
> }
>
> static u32 mask_reg_value(u32 reg, u32 val)
>@@ -4746,6 +4772,7 @@ static void oa_init_supported_formats(struct i915_perf *perf)
> break;
>
> case INTEL_DG2:
>+ case INTEL_METEORLAKE:
> oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
> oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
> break;
>--
>2.36.1
>
More information about the Intel-gfx
mailing list