[PATCH 07/17] drm/xe/oa/uapi: Define and parse OA stream properties

Dixit, Ashutosh ashutosh.dixit at intel.com
Sat Jan 20 02:48:25 UTC 2024


On Tue, 19 Dec 2023 15:23:59 -0800, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> On Thu, Dec 07, 2023 at 10:43:19PM -0800, Ashutosh Dixit wrote:
> > Properties for OA streams are specified by user space, when the stream is
> > opened, as a chain of drm_xe_ext_set_property struct's. Parse and validate
> > these stream properties.
> >
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_oa.c   | 372 +++++++++++++++++++++++++++++++++++
> > drivers/gpu/drm/xe/xe_oa.h   |   2 +
> > drivers/gpu/drm/xe/xe_perf.c |   2 +
> > include/uapi/drm/xe_drm.h    | 114 +++++++++++
> > 4 files changed, 490 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> > index 6a903bf4f87d1..9b0bd58fcbc06 100644
> > --- a/drivers/gpu/drm/xe/xe_oa.c
> > +++ b/drivers/gpu/drm/xe/xe_oa.c
> > @@ -3,10 +3,13 @@
> >  * Copyright © 2023 Intel Corporation
> >  */
> >
> > +#include <linux/nospec.h>
> > #include <linux/sysctl.h>
> >
> > +#include "regs/xe_gt_regs.h"
> > #include "regs/xe_oa_regs.h"
> > #include "xe_device.h"
> > +#include "xe_exec_queue.h"
> > #include "xe_gt.h"
> > #include "xe_mmio.h"
> > #include "xe_oa.h"
> > @@ -46,6 +49,20 @@ struct xe_oa_config {
> >	struct rcu_head rcu;
> > };
> >
> > +struct xe_oa_open_param {
> > +	u32 oa_unit_id;
> > +	bool sample;
> > +	u32 metric_set;
> > +	enum xe_oa_format_name oa_format;
> > +	int period_exponent;
> > +	u32 poll_period_us;
> > +	u32 open_flags;
> > +	int exec_queue_id;
> > +	int engine_instance;
> > +	struct xe_exec_queue *exec_q;
> > +	struct xe_hw_engine *hwe;
> > +};
> > +
> > #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
> >
> > static const struct xe_oa_format oa_formats[] = {
> > @@ -88,6 +105,361 @@ static void xe_oa_config_put(struct xe_oa_config *oa_config)
> >	kref_put(&oa_config->ref, xe_oa_config_release);
> > }
> >
> > +/*
> > + * OA timestamp frequency = CS timestamp frequency in most platforms. On some
> > + * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such
> > + * cases, return the adjusted CS timestamp frequency to the user.
> > + */
> > +u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
> > +{
> > +	u32 reg, shift;
> > +
> > +	/*
> > +	 * Wa_18013179988:dg2
> > +	 * Wa_14015568240:pvc
> > +	 * Wa_14015846243:mtl
> > +	 */
> > +	switch (gt_to_xe(gt)->info.platform) {
> > +	case XE_DG2:
> > +	case XE_PVC:
> > +	case XE_METEORLAKE:
> > +		xe_device_mem_access_get(gt_to_xe(gt));
> > +		reg = xe_mmio_read32(gt, RPM_CONFIG0);
> > +		xe_device_mem_access_put(gt_to_xe(gt));
> > +
> > +		shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
> > +		return gt->info.reference_clock << (3 - shift);
> > +
> > +	default:
> > +		return gt->info.reference_clock;
> > +	}
> > +}
> > +
> > +static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent)
> > +{
> > +	u64 nom = (2ULL << exponent) * NSEC_PER_SEC;
> > +	u32 den = xe_oa_timestamp_frequency(gt);
> > +
> > +	return div_u64(nom + den - 1, den);
> > +}
> > +
> > +static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type)
> > +{
> > +	switch (hwe->oa_unit->type) {
> > +	case DRM_XE_OA_UNIT_TYPE_OAG:
> > +		return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR ||
> > +			type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC;
> > +	case DRM_XE_OA_UNIT_TYPE_OAM:
> > +		return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC;
> > +	default:
> > +		return false;
> > +	}
> > +}
> > +
> > +static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
> > +{
> > +	u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
> > +	u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
> > +	u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
> > +	u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
> > +	int idx;
> > +
> > +	for_each_set_bit(idx, oa->format_mask, XE_OA_FORMAT_MAX) {
> > +		const struct xe_oa_format *f = &oa->oa_formats[idx];
> > +
> > +		if (counter_size == f->counter_size && bc_report == f->bc_report &&
> > +		    type == f->type && counter_sel == f->counter_select) {
> > +			*name = idx;
> > +			return 0;
> > +		}
> > +	}
> > +
> > +	return -EINVAL;
> > +}
> > +
> > +u16 xe_oa_unit_id(struct xe_hw_engine *hwe)
> > +{
> > +	return hwe->oa_unit && hwe->oa_unit->num_engines ?
> > +		hwe->oa_unit->oa_unit_id : U16_MAX;
> > +}
> > +
> > +static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param)
> > +{
> > +	struct xe_gt *gt;
> > +	int i, ret = 0;
> > +
> > +	if (param->exec_q) {
> > +		/* When we have an exec_q, get hwe from the exec_q */
> > +		for_each_gt(gt, oa->xe, i) {
>
> Looks like the exec_queue can submit to a specific gt. I think we should
> try to get the hwe from the same gt as the exec_q. Basically this:
>
> if (param->exec_q->gt != gt)
>	continue;
>
> or you can just drop the for loop and assume that xe_gt_hw_engine is not
> supposed to fail for this gt (exec_q->gt).

Thanks for spotting this. I've gone ahead and changed to the second option
above in v8.

>
> > +			param->hwe = xe_gt_hw_engine(gt, param->exec_q->class,
> > +						     param->engine_instance, true);
> > +			if (param->hwe)
> > +				break;
> > +		}
> > +		if (param->hwe && (xe_oa_unit_id(param->hwe) != param->oa_unit_id)) {
> > +			drm_dbg(&oa->xe->drm, "OA unit ID mismatch for exec_q\n");
> > +			ret = -EINVAL;
> > +		}
> > +	} else {
> > +		struct xe_hw_engine *hwe;
> > +		enum xe_hw_engine_id id;
> > +
> > +		/* Else just get the first hwe attached to the oa unit */
> > +		for_each_gt(gt, oa->xe, i) {
> > +			for_each_hw_engine(hwe, gt, id) {
> > +				if (xe_oa_unit_id(hwe) == param->oa_unit_id) {
> > +					param->hwe = hwe;
> > +					goto out;
> > +				}
> > +			}
> > +		}
> > +	}
> > +out:
> > +	if (!param->hwe) {
> > +		drm_dbg(&oa->xe->drm, "Unable to find hwe for OA unit ID %d\n",
> > +			param->oa_unit_id);
> > +		ret = -EINVAL;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
> > +				     struct xe_oa_open_param *param)
> > +{
> > +	if (value >= oa->oa_unit_ids) {
> > +		drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
> > +		return -EINVAL;
> > +	}
> > +	param->oa_unit_id = value;
> > +	return 0;
> > +}
> > +
> > +static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
> > +				    struct xe_oa_open_param *param)
> > +{
> > +	param->sample = value;
> > +	return 0;
> > +}
> > +
> > +static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
> > +				     struct xe_oa_open_param *param)
> > +{
> > +	param->metric_set = value;
> > +	return 0;
> > +}
> > +
> > +static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
> > +				    struct xe_oa_open_param *param)
> > +{
> > +	int ret = decode_oa_format(oa, value, &param->oa_format);
> > +
> > +	if (ret) {
> > +		drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
> > +		return ret;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
> > +				      struct xe_oa_open_param *param)
> > +{
> > +#define OA_EXPONENT_MAX 31
> > +
> > +	if (value > OA_EXPONENT_MAX) {
> > +		drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
> > +		return -EINVAL;
> > +	}
> > +	param->period_exponent = value;
>
> I think i915 has some additional logic where only root can sample at really
> high frequencies, but well, this is a root only use case, so I don't know
> what that logic achieved. Hoping that you intended to drop that logic,
> which is okay. Just confirming.

Good point, I have now removed that logic. This also deleted stuff like
xe_oa_max_sample_rate and xe_oa_sample_rate_hard_limit, so the previous
patch "drm/xe/oa/uapi: Add oa_max_sample_rate sysctl" is now dropped.

>
> > +	return 0;
> > +}
> > +
> > +static int xe_oa_set_prop_poll_oa_period(struct xe_oa *oa, u64 value,
> > +					 struct xe_oa_open_param *param)
> > +{
> > +	if (value < 100) {
> > +		drm_dbg(&oa->xe->drm, "OA timer too small (%lldus < 100us)\n", value);
> > +		return -EINVAL;
> > +	}
> > +	param->poll_period_us = value;
>
> I am not sure if anyone ended up using this at all. This will be unused if
> we add interrupt support in future. Any thoughts on adding interrupt
> support in future?

I think features like interrupt will need to be implemented incrementally
in the future, as long as they don't violate (or cause changes) in the
current uapi (which I hope we can merge before pondering these sort of
features).

> Also note that if we throttle poll to only signal the user after a set
> number of reports are available, then this parameter is not of much use.
> The poll throttling itself will reduce the CPU overhead that this was
> trying to address. Are there plans to bring that feature to XE (poll
> throttling)?

If it's needed, we will add it (incrementally) later. For now I have gone
ahead and dropped DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US.

>
> > +	return 0;
> > +}
> > +
> > +static int xe_oa_set_prop_open_flags(struct xe_oa *oa, u64 value,
> > +				     struct xe_oa_open_param *param)
> > +{
> > +	u32 known_open_flags =
> > +		DRM_XE_OA_FLAG_FD_CLOEXEC | DRM_XE_OA_FLAG_FD_NONBLOCK | DRM_XE_OA_FLAG_DISABLED;
> > +
> > +	if (value & ~known_open_flags) {
> > +		drm_dbg(&oa->xe->drm, "Unknown open_flag %#llx\n", value);
> > +		return -EINVAL;
> > +	}
> > +	param->open_flags = value;
> > +	return 0;
> > +}
> > +
> > +static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
> > +					struct xe_oa_open_param *param)
> > +{
> > +	param->exec_queue_id = value;
> > +	return 0;
> > +}
> > +
> > +static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
> > +					  struct xe_oa_open_param *param)
> > +{
> > +	param->engine_instance = value;
> > +	return 0;
> > +}
> > +
> > +typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
> > +				     struct xe_oa_open_param *param);
> > +static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = {
> > +	[DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
> > +	[DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
> > +	[DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
> > +	[DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
> > +	[DRM_XE_OA_PROPERTY_OA_EXPONENT] = xe_oa_set_prop_oa_exponent,
> > +	[DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US] = xe_oa_set_prop_poll_oa_period,
> > +	[DRM_XE_OA_PROPERTY_OPEN_FLAGS] = xe_oa_set_prop_open_flags,
> > +	[DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
> > +	[DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
> > +};
> > +
> > +static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension,
> > +				       struct xe_oa_open_param *param)
> > +{
> > +	u64 __user *address = u64_to_user_ptr(extension);
> > +	struct drm_xe_ext_set_property ext;
> > +	int err;
> > +	u32 idx;
> > +
> > +	err = __copy_from_user(&ext, address, sizeof(ext));
> > +	if (XE_IOCTL_DBG(oa->xe, err))
> > +		return -EFAULT;
> > +
> > +	if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) ||
> > +	    XE_IOCTL_DBG(oa->xe, ext.pad))
> > +		return -EINVAL;
> > +
> > +	idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs));
> > +	return xe_oa_set_property_funcs[idx](oa, ext.value, param);
> > +}
> > +
> > +typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension,
> > +				       struct xe_oa_open_param *param);
> > +static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
> > +	[DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
> > +};
> > +
> > +static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension,
> > +				 struct xe_oa_open_param *param)
> > +{
> > +	u64 __user *address = u64_to_user_ptr(extension);
> > +	struct xe_user_extension ext;
> > +	int err;
> > +	u32 idx;
> > +
> > +	err = __copy_from_user(&ext, address, sizeof(ext));
> > +	if (XE_IOCTL_DBG(oa->xe, err))
> > +		return -EFAULT;
> > +
> > +	if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
> > +	    XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
> > +		return -EINVAL;
> > +
> > +	idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
> > +	err = xe_oa_user_extension_funcs[idx](oa, extension, param);
> > +	if (XE_IOCTL_DBG(oa->xe, err))
> > +		return err;
> > +
> > +	if (ext.next_extension)
> > +		return xe_oa_user_extensions(oa, ext.next_extension, param);
>
> What if the user passed a circular list of extensions? If it will result in
> an issue, we should also add a test for it.

Good catch. I have added a check for the number of extensions passed, the
same technique which is used in exec_queue_user_extensions.

>
> > +
> > +	return 0;
> > +}
> > +
> > +int xe_oa_stream_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +	struct xe_oa *oa = &to_xe_device(dev)->oa;
> > +	struct xe_file *xef = to_xe_file(file);
> > +	struct drm_xe_oa_open_param dparam;
> > +	struct xe_oa_open_param param = {};
> > +	const struct xe_oa_format *f;
> > +	bool privileged_op = true;
> > +	int ret;
> > +
> > +	if (!oa->xe) {
> > +		drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n");
> > +		return -ENODEV;
> > +	}
> > +
> > +	ret = __copy_from_user(&dparam, data, sizeof(dparam));
> > +	if (XE_IOCTL_DBG(oa->xe, ret))
> > +		return -EFAULT;
> > +
> > +	ret = xe_oa_user_extensions(oa, dparam.extensions, &param);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (param.exec_queue_id > 0) {
> > +		param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id);
> > +		if (XE_IOCTL_DBG(oa->xe, !param.exec_q))
> > +			return -ENOENT;
> > +	}
> > +
> > +	/*
> > +	 * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC,
> > +	 * without global stream access, can be an unprivileged operation
> > +	 */
> > +	if (param.exec_q && !param.sample)
> > +		privileged_op = false;
> > +
> > +	if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) {
> > +		drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n");
> > +		ret = -EACCES;
> > +		goto err_exec_q;
> > +	}
> > +
> > +	if (!param.exec_q && !param.sample) {
> > +		drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n");
> > +		ret = -EINVAL;
> > +		goto err_exec_q;
> > +	}
> > +
> > +	ret = xe_oa_assign_hwe(oa, &param);
> > +	if (ret)
> > +		goto err_exec_q;
> > +
> > +	f = &oa->oa_formats[param.oa_format];
> > +	if (!param.oa_format || !f->size ||
> > +	    !engine_supports_oa_format(param.hwe, f->type)) {
> > +		drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n",
> > +			param.oa_format, f->type, f->size, param.hwe->class);
> > +		ret = -EINVAL;
> > +		goto err_exec_q;
> > +	}
> > +
> > +	if (param.period_exponent > 0) {
> > +		u64 oa_period, oa_freq_hz;
> > +
> > +		oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent);
> > +		oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period);
> > +		if (oa_freq_hz > xe_oa_max_sample_rate && !perfmon_capable()) {
> > +			drm_dbg(&oa->xe->drm,
> > +				"OA exponent would exceed the max sampling frequency (sysctl dev.xe.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n",
> > +				xe_oa_max_sample_rate);
> > +			ret = -EACCES;
> > +			goto err_exec_q;
> > +		}
> > +	}
> > +err_exec_q:
> > +	if (ret < 0 && param.exec_q)
> > +		xe_exec_queue_put(param.exec_q);
> > +	return ret;
> > +}
> > +
> > static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr)
> > {
> >	static const struct xe_reg flex_eu_regs[] = {
> > diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h
> > index e4863f8681b14..a0f9a876ea6b4 100644
> > --- a/drivers/gpu/drm/xe/xe_oa.h
> > +++ b/drivers/gpu/drm/xe/xe_oa.h
> > @@ -19,6 +19,8 @@ void xe_oa_unregister(struct xe_device *xe);
> > int xe_oa_sysctl_register(void);
> > void xe_oa_sysctl_unregister(void);
> >
> > +int xe_oa_stream_open_ioctl(struct drm_device *dev, void *data,
> > +			    struct drm_file *file);
> > int xe_oa_add_config_ioctl(struct drm_device *dev, void *data,
> >			   struct drm_file *file);
> > int xe_oa_remove_config_ioctl(struct drm_device *dev, void *data,
> > diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c
> > index 2aee4c7989486..2c0615481b7df 100644
> > --- a/drivers/gpu/drm/xe/xe_perf.c
> > +++ b/drivers/gpu/drm/xe/xe_perf.c
> > @@ -16,6 +16,8 @@ static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg,
> >		       struct drm_file *file)
> > {
> >	switch (arg->perf_op) {
> > +	case DRM_XE_PERF_OP_STREAM_OPEN:
> > +		return xe_oa_stream_open_ioctl(dev, (void *)arg->param, file);
> >	case DRM_XE_PERF_OP_ADD_CONFIG:
> >		return xe_oa_add_config_ioctl(dev, (void *)arg->param, file);
> >	case DRM_XE_PERF_OP_REMOVE_CONFIG:
> > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > index f17134828c093..8156301df7315 100644
> > --- a/include/uapi/drm/xe_drm.h
> > +++ b/include/uapi/drm/xe_drm.h
> > @@ -1192,6 +1192,120 @@ enum drm_xe_oa_format_type {
> >	DRM_XE_OA_FMT_TYPE_PEC,
> > };
> >
> > +/** enum drm_xe_oa_property_id - OA stream property id's */
> > +enum drm_xe_oa_property_id {
> > +	/**
> > +	 * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open
> > +	 * the OA stream, see @oa_unit_id in 'struct
> > +	 * drm_xe_query_oa_units'. Defaults to 0 if not provided.
> > +	 */
> > +	DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1,
> > +
> > +	/**
> > +	 * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests the inclusion of
> > +	 * raw OA unit reports as part of stream samples.
> > +	 */
> > +	DRM_XE_OA_PROPERTY_SAMPLE_OA,
> > +
> > +	/**
> > +	 * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA
> > +	 * reportst, previously added via @@DRM_XE_PERF_OP_ADD_CONFIG.
>
> typo: reports

Done, thanks.

>
> > +	 */
> > +	DRM_XE_OA_PROPERTY_OA_METRIC_SET,
> > +
> > +	/** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */
> > +	DRM_XE_OA_PROPERTY_OA_FORMAT,
> > +	/**
> > +	 * OA_FORMAT's are specified the same way as in Bspec, in terms of
> > +	 * the following quantities: a. enum @drm_xe_oa_format_type
> > +	 * b. Counter select c. Counter size and d. BC report
> > +	 */
> > +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE		(0xff << 0)
> > +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL	(0xff << 8)
> > +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE	(0xff << 16)
> > +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT		(0xff << 24)
>
> indentation/alignment is off I guess

Once again, indentation/alignment is fine. It is the presence of the
additional '+' which disturbs the tab's and makes it appear that alignment
is off.

>
> > +
> > +	/**
> > +	 * @DRM_XE_OA_PROPERTY_OA_EXPONENT: Requests periodic OA unit sampling
> > +	 * with sampling frequency proportional to 2^(period_exponent + 1)
> > +	 */
> > +	DRM_XE_OA_PROPERTY_OA_EXPONENT,
> > +
> > +	/**
> > +	 * @DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US: Timer interval in microseconds
> > +	 * to check OA buffer for available data. Minimum allowed value is 100
> > +	 * microseconds. A default value is used by the driver if this parameter
> > +	 * is skipped. Larger timer values will reduce cpu consumption during OA
> > +	 * perf captures, but excessively large values could result in data loss
> > +	 * due to OA buffer overwrites.
> > +	 */
> > +	DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US,
>
> Again, very likely not used, but please confirm with the UMDs though.

This is dropped for now. See above.

>
> > +
> > +	/**
> > +	 * @DRM_XE_OA_PROPERTY_OPEN_FLAGS: CLOEXEC and NONBLOCK flags are
> > +	 * directly applied to returned OA fd. DISABLED opens the OA stream in a
> > +	 * DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE).
> > +	 */
> > +	DRM_XE_OA_PROPERTY_OPEN_FLAGS,
> > +#define DRM_XE_OA_FLAG_FD_CLOEXEC	(1 << 0)
> > +#define DRM_XE_OA_FLAG_FD_NONBLOCK	(1 << 1)
> > +#define DRM_XE_OA_FLAG_DISABLED		(1 << 2)
>
> oh, overlooked this before commenting earlier on the fcntl stuff. Looks
> like you already were passing this in params. Anyways, fcntl should be good
> for CLOEXEC/NONBLOCK.

Yes, CLOEXEC/NONBLOCK are dropped (verified fcntl works fine). DISABLED is
converted into a new property DRM_XE_OA_PROPERTY_OA_DISABLED.

>
> > +
> > +	/**
> > +	 * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific
> > +	 * @exec_queue_id. Perf queries can be executed on this exec queue.
> > +	 */
> > +	DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID,
> > +
> > +	/**
> > +	 * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to
> > +	 * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0.
> > +	 */
> > +	DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE,
> > +
> > +	DRM_XE_OA_PROPERTY_MAX /* non-ABI */
> > +};
> > +
> > +/**
> > + * struct drm_xe_oa_open_param - Params for opening an OA stream
> > + *
> > + * Stream params are specified as a chain of @drm_xe_ext_set_property
> > + * struct's, with @property values from enum @drm_xe_oa_property_id and
> > + * @xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY
> > + */
> > +struct drm_xe_oa_open_param {
> > +#define DRM_XE_OA_EXTENSION_SET_PROPERTY	0
> > +	/** @extensions: Pointer to the first extension struct */
> > +	__u64 extensions;
> > +};
> > +
> > +/** enum drm_xe_oa_record_type - Type of OA packet read from OA fd */
> > +enum drm_xe_oa_record_type {
> > +	/** @DRM_XE_OA_RECORD_SAMPLE: Regular OA data sample */
> > +	DRM_XE_OA_RECORD_SAMPLE = 1,
> > +
> > +	/** @DRM_XE_OA_RECORD_OA_REPORT_LOST: Status indicating lost OA reports */
> > +	DRM_XE_OA_RECORD_OA_REPORT_LOST = 2,
> > +
> > +	/**
> > +	 * @DRM_XE_OA_RECORD_OA_BUFFER_LOST: Status indicating lost OA
> > +	 * reports and OA buffer reset in the process
> > +	 */
> > +	DRM_XE_OA_RECORD_OA_BUFFER_LOST = 3,
> > +
> > +	DRM_XE_OA_RECORD_MAX /* non-ABI */
> > +};
> > +
> > +/** struct drm_xe_oa_record_header - Header for OA packets read from OA fd */
> > +struct drm_xe_oa_record_header {
> > +	/** @type: Of enum @drm_xe_oa_record_type */
> > +	__u16 type;
> > +	/** @pad: MBZ */
> > +	__u16 pad;
> > +	/** @size: size in bytes */
> > +	__u32 size;
> > +};
>
> I think we want to drop the header completely, but I guess that's still
> wip. Any plans to allow read/write of STATUS reg? I am thinking i915 can
> clear the register automatically but store the last cleared value that can
> be returned to the user on a read or something similar where user only ever
> needs to read it. I don't think user will try to recover anything if there
> is an error. In case of error the capture is just reinitiated. Again, need
> UMD confirmation here.

I have eliminated the report header, so the read just provides the OA
buffer data in integral number of reports (num_reports = read_size /
format_size). Also provided a new stream fd ioctl DRM_XE_PERF_IOCTL_STATUS,
which returns the status reg and clears it (so for now user can only read
the status register, not write it). Take a look at xe_oa_status_locked() in
the patch "drm/xe/oa/uapi: Read file_operation". If there is an error, it's
the user's responsibility to reinitiate the capture (by say using
disable/enable).

>
> > +
> > /**
> >  * struct drm_xe_oa_config - OA metric configuration
> >  *
> > --
> > 2.41.0
> >

Thanks.
--
Ashutosh


More information about the Intel-xe mailing list