[Intel-xe] [PATCH 01/10] drm/xe/oa: Introduce OA uapi
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Wed Aug 16 01:11:55 UTC 2023
On Tue, Aug 15, 2023 at 03:03:45PM -0700, Dixit, Ashutosh wrote:
>On Mon, 07 Aug 2023 18:31:50 -0700, Ashutosh Dixit wrote:
>>
>
>A couple of other comments on the the uAPI.
>
>> OA uapi allows userspace to:
>> * Read streams of performance counters written by hardware
>> * Configure (and reconfigure) which sets of perf counters are captured as
>> part of OA streams
>> * Configure other properties (such as format and periodicity) of such
>> captures.
>> * Query associated parameters such as OA unit timestamp freq, oa_unit_id's
>> for hw engines and OA ioctl version
>>
>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
>> ---
>> include/uapi/drm/xe_drm.h | 257 +++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 256 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>> index 86f16d50e9ccc..b4ab07c285245 100644
>> --- a/include/uapi/drm/xe_drm.h
>> +++ b/include/uapi/drm/xe_drm.h
>> @@ -111,6 +111,9 @@ struct xe_user_extension {
>> #define DRM_XE_WAIT_USER_FENCE 0x0b
>> #define DRM_XE_VM_MADVISE 0x0c
>> #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0d
>> +#define DRM_XE_OA_OPEN 0x36
>> +#define DRM_XE_OA_ADD_CONFIG 0x37
>> +#define DRM_XE_OA_REMOVE_CONFIG 0x38
>>
>> /* Must be kept compact -- no holes */
>> #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
>> @@ -127,6 +130,9 @@ struct xe_user_extension {
>> #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
>> #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
>> #define DRM_IOCTL_XE_VM_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
>> +#define DRM_IOCTL_XE_OA_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OA_OPEN, struct drm_xe_oa_open_param)
>> +#define DRM_IOCTL_XE_OA_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OA_ADD_CONFIG, struct drm_xe_oa_config)
>> +#define DRM_IOCTL_XE_OA_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OA_REMOVE_CONFIG, __u64)
>
>In current and future HW, there are other HW blocks which generate
>performance counter streams. There is an ask to expose these streams also
>using the uapi introduced here. Specifically the ask is to replace "OA" in
>these and the ioctls below by "PERF" (similar to i915) and support OA
>streams as only one of the perf counter stream types supported under
>PERF. But the PERF interface would be able to support other types of perf
>streams too.
>
>We are planning to make this change in the next version of these patches.
>
>>
>> /**
>> * enum drm_xe_memory_class - Supported memory classes.
>> @@ -261,7 +267,8 @@ struct drm_xe_query_config {
>> #define XE_QUERY_CONFIG_GT_COUNT 4
>> #define XE_QUERY_CONFIG_MEM_REGION_COUNT 5
>> #define XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY 6
>> -#define XE_QUERY_CONFIG_NUM_PARAM (XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1)
>> +#define XE_QUERY_OA_IOCTL_VERSION 7
>> +#define XE_QUERY_CONFIG_NUM_PARAM (XE_QUERY_OA_IOCTL_VERSION + 1)
>> /** @info: array of elements containing the config info */
>> __u64 info[];
>> };
>> @@ -298,6 +305,7 @@ struct drm_xe_query_gts {
>> __u64 native_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */
>> __u64 slow_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */
>> __u64 inaccessible_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */
>> + __u64 oa_timestamp_freq;
>> __u64 reserved[8];
>> } gts[];
>> };
>> @@ -753,6 +761,7 @@ struct drm_xe_engine_class_instance {
>>
>> __u16 engine_instance;
>> __u16 gt_id;
>> + __u16 oa_unit_id;
>> };
>>
>> struct drm_xe_exec_queue_create {
>> @@ -1056,6 +1065,252 @@ struct drm_xe_vm_madvise {
>> __u64 reserved[2];
>> };
>>
>> +enum drm_xe_oa_format {
>> + XE_OA_FORMAT_C4_B8 = 7,
>> +
>> + /* Gen8+ */
>> + XE_OA_FORMAT_A12,
>> + XE_OA_FORMAT_A12_B8_C8,
>> + XE_OA_FORMAT_A32u40_A4u32_B8_C8,
>> +
>> + /* DG2 */
>> + XE_OAR_FORMAT_A32u40_A4u32_B8_C8,
>> + XE_OA_FORMAT_A24u40_A14u32_B8_C8,
>> +
>> + /* MTL OAM */
>> + XE_OAM_FORMAT_MPEC8u64_B8_C8,
>> + XE_OAM_FORMAT_MPEC8u32_B8_C8,
>> +
>> + XE_OA_FORMAT_MAX /* non-ABI */
>> +};
>> +
>> +enum drm_xe_oa_property_id {
>> + /**
>> + * Open the stream for a specific exec queue id (as used with
>> + * drm_xe_exec). A stream opened for a specific exec queue id this
>> + * way won't typically require root privileges.
>> + */
>> + DRM_XE_OA_PROP_EXEC_QUEUE_ID = 1,
>> +
>> + /**
>> + * A value of 1 requests the inclusion of raw OA unit reports as
>> + * part of stream samples.
>> + */
>> + DRM_XE_OA_PROP_SAMPLE_OA,
>> +
>> + /**
>> + * The value specifies which set of OA unit metrics should be
>> + * configured, defining the contents of any OA unit reports.
>> + */
>> + DRM_XE_OA_PROP_OA_METRICS_SET,
>> +
>> + /**
>> + * The value specifies the size and layout of OA unit reports.
>> + */
>> + DRM_XE_OA_PROP_OA_FORMAT,
>> +
>> + /**
>> + * Specifying this property implicitly requests periodic OA unit
>> + * sampling and (at least on Haswell) the sampling frequency is derived
>> + * from this exponent as follows:
>> + *
>> + * 80ns * 2^(period_exponent + 1)
>> + */
>> + DRM_XE_OA_PROP_OA_EXPONENT,
>> +
>> + /**
>> + * Specifying this property is only valid when specify a context to
>> + * filter with DRM_XE_OA_PROP_ENGINE_ID. Specifying this property
>> + * will hold preemption of the particular engine we want to gather
>> + * performance data about.
>> + */
>> + DRM_XE_OA_PROP_HOLD_PREEMPTION,
>> +
>> + /**
>> + * Specifying this pins all contexts to the specified SSEU power
>> + * configuration for the duration of the recording.
>> + *
>> + * This parameter's value is a pointer to a struct
>> + * drm_xe_gem_context_param_sseu (TBD).
>> + */
>> + DRM_XE_OA_PROP_GLOBAL_SSEU,
>> +
>> + /**
>> + * This optional parameter specifies the timer interval in nanoseconds
>> + * at which the xe driver will check the OA buffer for available data.
>> + * Minimum allowed value is 100 microseconds. A default value is used by
>> + * the driver if this parameter is not specified. Note that larger timer
>> + * values will reduce cpu consumption during OA perf captures. However,
>> + * excessively large values would potentially result in OA buffer
>> + * overwrites as captures reach end of the OA buffer.
>> + */
>> + DRM_XE_OA_PROP_POLL_OA_PERIOD,
>> +
>> + /**
>> + * Multiple engines may be mapped to the same OA unit. The OA unit is
>> + * identified by class:instance of any engine mapped to it.
>> + *
>> + * This parameter specifies the engine class and must be passed along
>> + * with DRM_XE_OA_PROP_OA_ENGINE_INSTANCE.
>> + */
>> + DRM_XE_OA_PROP_OA_ENGINE_CLASS,
>> +
>> + /**
>> + * This parameter specifies the engine instance and must be passed along
>> + * with DRM_XE_OA_PROP_OA_ENGINE_CLASS.
>> + */
>> + DRM_XE_OA_PROP_OA_ENGINE_INSTANCE,
>
>Because oa_unit_id has been introduced above in
>drm_xe_engine_class_instance, should we remove these engine class/instance
>properties? Since it seems OA streams can be opened directly against the oa
>unit identified by oa_unit_id. And the driver can itself figure out the
>engine class/instance from the oa_unit_id?
Agree for the OA buffer use case.
For query, we have used the class:instance to enable query support for
specific engine instance. If we can work around that somehow, then we
can do away with these 2 params. More like enable query for all engines
that belong to the OAG unit. For render, it is straight forward since we
have one render per OAG unit. For compute, multiple compute instances
can map to one OAG unit and only one instance can support the query. So
user may want to choose which instance that is. Although, I would just
simplify it and support only instance 0 of compute. If UMD is okay with
it, then we should be good.
Media engines do not support query, so we should be good for OAM.
Thanks,
Umesh
>
>Separately, there is also the question of whether we want to share the Xe
>OA IGT code with i915 as was done for this series. Since duplicating the
>IGT code as well as all the perf tools seems a bit much.
>
>Thanks.
>--
>Ashutosh
More information about the Intel-xe
mailing list