[Intel-gfx] [PATCH] drm/i915/mtl: Add support for 32 bit OAG formats in MTL

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Tue Nov 29 18:40:14 UTC 2022


On Mon, Nov 28, 2022 at 11:15:52PM -0800, Lucas De Marchi wrote:
>On Mon, Nov 28, 2022 at 05:21:46PM -0800, Umesh Nerlige Ramappa wrote:
>>As part of OA support for MTL,
>>
>>- Enable 32 bit OAG formats for MTL.
>>- 0x200c is repurposed on MTL. Use a separate mux table to verify oa
>> configs passed by user.
>>- Similar to ACM, OA/CS timestamp is mismatched on MTL. Add MTL to the
>> WA.
>>- On MTL, gt->scratch was using stolen lmem. An MI_SRM to stolen lmem is
>> hanging. Add a page in noa_wait BO to save/restore GPR registers for
>> the noa_wait logic.
>
>why are all these changes squashed in a single patch?

I don't have a good reason other than we had it this way to enable OA 
for MTL. I can break them into separate patches in the next revision.

>
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>---
>>drivers/gpu/drm/i915/gt/intel_gt_types.h |  6 ---
>>drivers/gpu/drm/i915/i915_perf.c         | 49 ++++++++++++++++++------
>>2 files changed, 38 insertions(+), 17 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>index c1d9cd255e06..13dffe0a3d20 100644
>>--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>@@ -296,12 +296,6 @@ enum intel_gt_scratch_field {
>>
>>	/* 8 bytes */
>>	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
>>-
>>-	/* 6 * 8 bytes */
>>-	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
>>-
>>-	/* 4 bytes */
>>-	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
>>};
>>
>>#endif /* __INTEL_GT_TYPES_H__ */
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index 00e09bb18b13..a735b9540113 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -1842,8 +1842,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
>>	for (d = 0; d < dword_count; d++) {
>>		*cs++ = cmd;
>>		*cs++ = i915_mmio_reg_offset(reg) + 4 * d;
>>-		*cs++ = intel_gt_scratch_offset(stream->engine->gt,
>>-						offset) + 4 * d;
>>+		*cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d;
>>		*cs++ = 0;
>>	}
>>
>>@@ -1876,7 +1875,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>					  MI_PREDICATE_RESULT_2_ENGINE(base) :
>>					  MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
>>
>>-	bo = i915_gem_object_create_internal(i915, 4096);
>>+	/*
>>+	 * gt->scratch was being used to save/restore the GPR registers, but on
>>+	 * MTL the scratch uses stolen lmem. An MI_SRM to this memory region
>>+	 * causes an engine hang. Instead allocate an additional page here to
>
>humn.. is it because of the pte being wrong?"stolen lmem" in mtl is
>still system memory... do we know why we'd need this change?

This memory is accessed by the CS to save/restore some registers and 
that was causing a hang, so I guess mapping this memory to ggtt did not 
work at the time. I am not sure if this was fixed later, but using a 
memory that's owned by OA seems to work fine.

Thanks,
Umesh
>
>Lucas De Marchi
>
>
>>+	 * save/restore GPR registers
>>+	 */
>>+	bo = i915_gem_object_create_internal(i915, 8192);
>>	if (IS_ERR(bo)) {
>>		drm_err(&i915->drm,
>>			"Failed to allocate NOA wait batchbuffer\n");
>>@@ -1910,14 +1915,19 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>		goto err_unpin;
>>	}
>>
>>+	stream->noa_wait = vma;
>>+
>>+#define GPR_SAVE_OFFSET 4096
>>+#define PREDICATE_SAVE_OFFSET 4160
>>+
>>	/* Save registers. */
>>	for (i = 0; i < N_CS_GPR; i++)
>>		cs = save_restore_register(
>>			stream, cs, true /* save */, CS_GPR(i),
>>-			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
>>+			GPR_SAVE_OFFSET + 8 * i, 2);
>>	cs = save_restore_register(
>>		stream, cs, true /* save */, mi_predicate_result,
>>-		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
>>+		PREDICATE_SAVE_OFFSET, 1);
>>
>>	/* First timestamp snapshot location. */
>>	ts0 = cs;
>>@@ -2033,10 +2043,10 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>	for (i = 0; i < N_CS_GPR; i++)
>>		cs = save_restore_register(
>>			stream, cs, false /* restore */, CS_GPR(i),
>>-			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
>>+			GPR_SAVE_OFFSET + 8 * i, 2);
>>	cs = save_restore_register(
>>		stream, cs, false /* restore */, mi_predicate_result,
>>-		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
>>+		PREDICATE_SAVE_OFFSET, 1);
>>
>>	/* And return to the ring. */
>>	*cs++ = MI_BATCH_BUFFER_END;
>>@@ -2046,7 +2056,6 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>	i915_gem_object_flush_map(bo);
>>	__i915_gem_object_release_map(bo);
>>
>>-	stream->noa_wait = vma;
>>	goto out_ww;
>>
>>err_unpin:
>>@@ -3127,8 +3136,11 @@ get_sseu_config(struct intel_sseu *out_sseu,
>> */
>>u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
>>{
>>-	/* Wa_18013179988:dg2 */
>>-	if (IS_DG2(i915)) {
>>+	/*
>>+	 * Wa_18013179988:dg2
>>+	 * Wa_14015846243:mtl
>>+	 */
>>+	if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
>>		intel_wakeref_t wakeref;
>>		u32 reg, shift;
>>
>>@@ -4306,6 +4318,17 @@ static const struct i915_range gen12_oa_mux_regs[] = {
>>	{}
>>};
>>
>>+/*
>>+ * Ref: 14010536224:
>>+ * 0x20cc is repurposed on MTL, so use a separate array for MTL.
>>+ */
>>+static const struct i915_range mtl_oa_mux_regs[] = {
>>+	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
>>+	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
>>+	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
>>+	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
>>+};
>>+
>>static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>>{
>>	return reg_in_range_table(addr, gen7_oa_b_counters);
>>@@ -4349,7 +4372,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>>
>>static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
>>{
>>-	return reg_in_range_table(addr, gen12_oa_mux_regs);
>>+	if (IS_METEORLAKE(perf->i915))
>>+		return reg_in_range_table(addr, mtl_oa_mux_regs);
>>+	else
>>+		return reg_in_range_table(addr, gen12_oa_mux_regs);
>>}
>>
>>static u32 mask_reg_value(u32 reg, u32 val)
>>@@ -4746,6 +4772,7 @@ static void oa_init_supported_formats(struct i915_perf *perf)
>>		break;
>>
>>	case INTEL_DG2:
>>+	case INTEL_METEORLAKE:
>>		oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
>>		oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
>>		break;
>>-- 
>>2.36.1
>>


More information about the Intel-gfx mailing list