[Intel-gfx] [PATCH v3 04/16] drm/i915/perf: Determine gen12 oa ctx offset at runtime

Jani Nikula jani.nikula at linux.intel.com
Wed Oct 12 16:12:20 UTC 2022


On Tue, 11 Oct 2022, Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com> wrote:
> On Tue, Oct 11, 2022 at 08:47:00PM +0300, Jani Nikula wrote:
>>On Mon, 10 Oct 2022, Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com> wrote:
>>> Some SKUs of same gen12 platform may have different oactxctrl
>>> offsets. For gen12, determine oactxctrl offsets at runtime.
>>>
>>> v2: (Lionel)
>>> - Move MI definitions to intel_gpu_commands.h
>>> - Ensure __find_reg_in_lri does read past context image size
>>>
>>> v3: (Ashutosh)
>>> - Drop unnecessary use of double underscores
>>> - fix find_reg_in_lri
>>> - Return error if oa context offset is U32_MAX
>>> - Error out if oa_ctx_ctrl_offset does not find offset
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>> ---
>>>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   4 +
>>>  drivers/gpu/drm/i915/i915_perf.c             | 154 +++++++++++++++----
>>>  drivers/gpu/drm/i915/i915_perf_oa_regs.h     |   2 +-
>>>  3 files changed, 129 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>>> index d4e9702d3c8e..f50ea92910d9 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>>> @@ -187,6 +187,10 @@
>>>  #define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
>>>  #define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/
>>>
>>> +#define MI_OPCODE(x)		(((x) >> 23) & 0x3f)
>>> +#define IS_MI_LRI_CMD(x)	(MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
>>> +#define MI_LRI_LEN(x)		(((x) & 0xff) + 1)
>>> +
>>>  /*
>>>   * 3D instructions used by the kernel
>>>   */
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>> index cd57b5836386..b292aa39633e 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -1358,6 +1358,68 @@ static int gen12_get_render_context_id(struct i915_perf_stream *stream)
>>>  	return 0;
>>>  }
>>>
>>> +#define valid_oactxctrl_offset(x) ((x) && (x) != U32_MAX)
>>> +static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
>>> +{
>>> +	u32 idx = *offset;
>>> +	u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
>>
>>I don't understand any of this stuff, but why are the offset, index and
>>length u32 instead of just, say, int?
>
> I can use int, but the ce->engine->context_size is u32 and we are 
> parsing the context image here, so I have just used the same type for 
> offset, index, length.
>
> Any guideline/recommendation to choose int over u32?

If it's an index in general, looping, regular computation, just plain
old int will often do. Use u32 and friends when you actually need a
specific size to map to hardware or registers etc.

I guess it's not so clear cut here, and mixing types not necessarily a
bright idea here either.

BR,
Jani.


>
>>
>>> +	bool found = false;
>>> +
>>> +	idx++;
>>> +	for (; idx < len; idx += 2) {
>>
>>I find the initialization of idx and len confusing, and thereby the
>>entire loop too.
>
> Considering that the context image is a collection of MI_LRI commands 
> with each command having this format:
>
> dword 0: MI_LRI
> dword 1: reg offset
> dword 2: reg value
> dword 3: reg offset
> dword 4: reg value
> ...
>
> oa_context_image_offset() and oa_find_reg_in_lri() are parsing this 
> context image to look for a specific reg_offset. Once the offset is 
> known, the OA code programs the reg_value for the context.
>
> Thanks,
> Umesh
>
>>
>>BR,
>>Jani.
>>
>>
>>> +		if (state[idx] == reg) {
>>> +			found = true;
>>> +			break;
>>> +		}
>>> +	}
>>> +
>>> +	*offset = idx;
>>> +	return found;
>>> +}
>>> +
>>> +static u32 oa_context_image_offset(struct intel_context *ce, u32 reg)
>>> +{
>>> +	u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
>>> +	u32 *state = ce->lrc_reg_state;
>>> +
>>> +	for (offset = 0; offset < len; ) {
>>> +		if (IS_MI_LRI_CMD(state[offset])) {
>>> +			if (oa_find_reg_in_lri(state, reg, &offset, len))
>>> +				break;
>>> +		} else {
>>> +			offset++;
>>> +		}
>>> +	}
>>> +
>>> +	return offset < len ? offset : U32_MAX;
>>> +}
>>> +
>>> +static int set_oa_ctx_ctrl_offset(struct intel_context *ce)
>>> +{
>>> +	i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
>>> +	struct i915_perf *perf = &ce->engine->i915->perf;
>>> +	u32 offset = perf->ctx_oactxctrl_offset;
>>> +
>>> +	/* Do this only once. Failure is stored as offset of U32_MAX */
>>> +	if (offset)
>>> +		goto exit;
>>> +
>>> +	offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg));
>>> +	perf->ctx_oactxctrl_offset = offset;
>>> +
>>> +	drm_dbg(&ce->engine->i915->drm,
>>> +		"%s oa ctx control at 0x%08x dword offset\n",
>>> +		ce->engine->name, offset);
>>> +
>>> +exit:
>>> +	return valid_oactxctrl_offset(offset) ? 0 : -ENODEV;
>>> +}
>>> +
>>> +static bool engine_supports_mi_query(struct intel_engine_cs *engine)
>>> +{
>>> +	return engine->class == RENDER_CLASS;
>>> +}
>>> +
>>>  /**
>>>   * oa_get_render_ctx_id - determine and hold ctx hw id
>>>   * @stream: An i915-perf stream opened for OA metrics
>>> @@ -1377,6 +1439,21 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>>>  	if (IS_ERR(ce))
>>>  		return PTR_ERR(ce);
>>>
>>> +	if (engine_supports_mi_query(stream->engine)) {
>>> +		/*
>>> +		 * We are enabling perf query here. If we don't find the context
>>> +		 * offset here, just return an error.
>>> +		 */
>>> +		ret = set_oa_ctx_ctrl_offset(ce);
>>> +		if (ret) {
>>> +			intel_context_unpin(ce);
>>> +			drm_err(&stream->perf->i915->drm,
>>> +				"Enabling perf query failed for %s\n",
>>> +				stream->engine->name);
>>> +			return ret;
>>> +		}
>>> +	}
>>> +
>>>  	switch (GRAPHICS_VER(ce->engine->i915)) {
>>>  	case 7: {
>>>  		/*
>>> @@ -2408,10 +2485,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
>>>  	int err;
>>>  	struct intel_context *ce = stream->pinned_ctx;
>>>  	u32 format = stream->oa_buffer.format;
>>> +	u32 offset = stream->perf->ctx_oactxctrl_offset;
>>>  	struct flex regs_context[] = {
>>>  		{
>>>  			GEN8_OACTXCONTROL,
>>> -			stream->perf->ctx_oactxctrl_offset + 1,
>>> +			offset + 1,
>>>  			active ? GEN8_OA_COUNTER_RESUME : 0,
>>>  		},
>>>  	};
>>> @@ -2436,15 +2514,18 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
>>>  		},
>>>  	};
>>>
>>> -	/* Modify the context image of pinned context with regs_context*/
>>> -	err = intel_context_lock_pinned(ce);
>>> -	if (err)
>>> -		return err;
>>> +	/* Modify the context image of pinned context with regs_context */
>>> +	if (valid_oactxctrl_offset(offset)) {
>>> +		err = intel_context_lock_pinned(ce);
>>> +		if (err)
>>> +			return err;
>>>
>>> -	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
>>> -	intel_context_unlock_pinned(ce);
>>> -	if (err)
>>> -		return err;
>>> +		err = gen8_modify_context(ce, regs_context,
>>> +					  ARRAY_SIZE(regs_context));
>>> +		intel_context_unlock_pinned(ce);
>>> +		if (err)
>>> +			return err;
>>> +	}
>>>
>>>  	/* Apply regs_lri using LRI with pinned context */
>>>  	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
>>> @@ -2566,6 +2647,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>>  			   const struct i915_oa_config *oa_config,
>>>  			   struct i915_active *active)
>>>  {
>>> +	u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
>>>  	/* The MMIO offsets for Flex EU registers aren't contiguous */
>>>  	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
>>>  #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
>>> @@ -2576,7 +2658,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>>  		},
>>>  		{
>>>  			GEN8_OACTXCONTROL,
>>> -			stream->perf->ctx_oactxctrl_offset + 1,
>>> +			ctx_oactxctrl + 1,
>>>  		},
>>>  		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
>>>  		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
>>> @@ -4545,6 +4627,37 @@ static void oa_init_supported_formats(struct i915_perf *perf)
>>>  	}
>>>  }
>>>
>>> +static void i915_perf_init_info(struct drm_i915_private *i915)
>>> +{
>>> +	struct i915_perf *perf = &i915->perf;
>>> +
>>> +	switch (GRAPHICS_VER(i915)) {
>>> +	case 8:
>>> +		perf->ctx_oactxctrl_offset = 0x120;
>>> +		perf->ctx_flexeu0_offset = 0x2ce;
>>> +		perf->gen8_valid_ctx_bit = BIT(25);
>>> +		break;
>>> +	case 9:
>>> +		perf->ctx_oactxctrl_offset = 0x128;
>>> +		perf->ctx_flexeu0_offset = 0x3de;
>>> +		perf->gen8_valid_ctx_bit = BIT(16);
>>> +		break;
>>> +	case 11:
>>> +		perf->ctx_oactxctrl_offset = 0x124;
>>> +		perf->ctx_flexeu0_offset = 0x78e;
>>> +		perf->gen8_valid_ctx_bit = BIT(16);
>>> +		break;
>>> +	case 12:
>>> +		/*
>>> +		 * Calculate offset at runtime in oa_pin_context for gen12 and
>>> +		 * cache the value in perf->ctx_oactxctrl_offset.
>>> +		 */
>>> +		break;
>>> +	default:
>>> +		MISSING_CASE(GRAPHICS_VER(i915));
>>> +	}
>>> +}
>>> +
>>>  /**
>>>   * i915_perf_init - initialize i915-perf state on module bind
>>>   * @i915: i915 device instance
>>> @@ -4583,6 +4696,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>>>  		 * execlist mode by default.
>>>  		 */
>>>  		perf->ops.read = gen8_oa_read;
>>> +		i915_perf_init_info(i915);
>>>
>>>  		if (IS_GRAPHICS_VER(i915, 8, 9)) {
>>>  			perf->ops.is_valid_b_counter_reg =
>>> @@ -4602,18 +4716,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>>  			perf->ops.enable_metric_set = gen8_enable_metric_set;
>>>  			perf->ops.disable_metric_set = gen8_disable_metric_set;
>>>  			perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
>>> -
>>> -			if (GRAPHICS_VER(i915) == 8) {
>>> -				perf->ctx_oactxctrl_offset = 0x120;
>>> -				perf->ctx_flexeu0_offset = 0x2ce;
>>> -
>>> -				perf->gen8_valid_ctx_bit = BIT(25);
>>> -			} else {
>>> -				perf->ctx_oactxctrl_offset = 0x128;
>>> -				perf->ctx_flexeu0_offset = 0x3de;
>>> -
>>> -				perf->gen8_valid_ctx_bit = BIT(16);
>>> -			}
>>>  		} else if (GRAPHICS_VER(i915) == 11) {
>>>  			perf->ops.is_valid_b_counter_reg =
>>>  				gen7_is_valid_b_counter_addr;
>>> @@ -4627,11 +4729,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>>  			perf->ops.enable_metric_set = gen8_enable_metric_set;
>>>  			perf->ops.disable_metric_set = gen11_disable_metric_set;
>>>  			perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
>>> -
>>> -			perf->ctx_oactxctrl_offset = 0x124;
>>> -			perf->ctx_flexeu0_offset = 0x78e;
>>> -
>>> -			perf->gen8_valid_ctx_bit = BIT(16);
>>>  		} else if (GRAPHICS_VER(i915) == 12) {
>>>  			perf->ops.is_valid_b_counter_reg =
>>>  				gen12_is_valid_b_counter_addr;
>>> @@ -4645,9 +4742,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>>  			perf->ops.enable_metric_set = gen12_enable_metric_set;
>>>  			perf->ops.disable_metric_set = gen12_disable_metric_set;
>>>  			perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
>>> -
>>> -			perf->ctx_flexeu0_offset = 0;
>>> -			perf->ctx_oactxctrl_offset = 0x144;
>>>  		}
>>>  	}
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
>>> index f31c9f13a9fc..0ef3562ff4aa 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
>>> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
>>> @@ -97,7 +97,7 @@
>>>  #define  GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
>>>  #define  GEN12_OAR_OACONTROL_COUNTER_ENABLE       (1 << 0)
>>>
>>> -#define GEN12_OACTXCONTROL _MMIO(0x2360)
>>> +#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360)
>>>  #define GEN12_OAR_OASTATUS _MMIO(0x2968)
>>>
>>>  /* Gen12 OAG unit */
>>
>>-- 
>>Jani Nikula, Intel Open Source Graphics Center

-- 
Jani Nikula, Intel Open Source Graphics Center


More information about the Intel-gfx mailing list