[PATCH v2 5/8] RFC drm/xe/uapi: Add configs for Engine busyness

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Thu Dec 21 02:29:33 UTC 2023


On Thu, Dec 07, 2023 at 06:27:59PM +0530, Riana Tauro wrote:
>GuC provides engine busyness ticks as a 64 bit counter which count
>as clock ticks.
>
>Add configs to the uapi to expose Engine busyness via PMU.
>
>v2: add "__" prefix for internal helpers
>    add a simple helper for application usage (Aravind)
>
>Cc: Aravind Iddamsetty <aravind.iddamsetty at linux.intel.com>
>Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>---
> include/uapi/drm/xe_drm.h | 23 +++++++++++++++++++++++
> 1 file changed, 23 insertions(+)
>
>diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>index b5e7a4f673fa..074d63cb79df 100644
>--- a/include/uapi/drm/xe_drm.h
>+++ b/include/uapi/drm/xe_drm.h
>@@ -1107,6 +1107,10 @@ struct drm_xe_wait_user_fence {
>  *	fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
>  */
>
>+enum drm_xe_pmu_engine_sample {
>+	DRM_XE_PMU_SAMPLE_BUSY_TICKS = 0,
>+};
>+
> /*
>  * Top bits of every counter are GT id.
>  */
>@@ -1115,6 +1119,25 @@ struct drm_xe_wait_user_fence {
> #define ___DRM_XE_PMU_OTHER(gt, x) \
> 	(((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
>
>+#define __DRM_XE_PMU_SAMPLE_BITS (4)
>+#define __DRM_XE_PMU_SAMPLE_INSTANCE_BITS (8)
>+#define __DRM_XE_PMU_CLASS_SHIFT \
>+	(__DRM_XE_PMU_SAMPLE_BITS + __DRM_XE_PMU_SAMPLE_INSTANCE_BITS)
>+
>+/*
>+ * Engine configs offset - 0x1000
>+ */
>+#define __DRM_XE_PMU_ENGINE_OFFSET(gt) \
>+	(___DRM_XE_PMU_OTHER(gt, 0xfff) + 1)

Thinking this should be (___DRM_XE_PMU_OTHER(gt, 0xfffff) + 1) because 
class is also 8 bits. This part is not any different from i915, so not 
sure if you intended to change it for XE.

>+
>+#define __DRM_XE_PMU_ENGINE(gt, class, instance, sample) \
>+	(((class) << __DRM_XE_PMU_CLASS_SHIFT | \
>+	(instance) << __DRM_XE_PMU_SAMPLE_BITS | \
>+	(sample)) + __DRM_XE_PMU_ENGINE_OFFSET(gt))

What's the idea here? Engines are limited to a specific GT and the user 
is also passing in the GT in the config?

I think the gt should just be shifted to __DRM_XE_PMU_GT_SHIFT and ORed with the engine counter, more like:

#define ___DRM_XE_PMU_GT_EVENT(gt, x) \
	(((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))


#define __DRM_XE_PMU_ENGINE_EVENT(class, instance, sample) \
	(((class) << __DRM_XE_PMU_CLASS_SHIFT | \
	(instance) << __DRM_XE_PMU_SAMPLE_BITS | \
	(sample)))

#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
	___DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE_EVENT(class, instance, DRM_XE_PMU_SAMPLE_BUSY_TICKS))

Just an example, naming is up to you.

For group counters, the logic you have is fine as long as you set class to 8 bits:

#define DRM_XE_PMU_RENDER_GROUP_BUSY(gt) \
	___DRM_XE_PMU_GT_EVENT(gt, ___DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + 0)

#define DRM_XE_PMU_COPY_GROUP_BUSY(gt) \
	___DRM_XE_PMU_GT_EVENT(gt, ___DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + 1)

___DRM_XE_PMU_GT_EVENT(gt, 0xfffff) is same as __DRM_XE_PMU_ENGINE_EVENT(0xff, 0xff, 0xf).

Thanks,
Umesh


>+
>+#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
>+	__DRM_XE_PMU_ENGINE(gt, class, instance, DRM_XE_PMU_SAMPLE_BUSY_TICKS)
>+
> #define DRM_XE_PMU_RENDER_GROUP_BUSY(gt)	___DRM_XE_PMU_OTHER(gt, 0)
> #define DRM_XE_PMU_COPY_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 1)
> #define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 2)
>-- 
>2.40.0
>


More information about the Intel-xe mailing list