[PATCH v2 5/8] RFC drm/xe/uapi: Add configs for Engine busyness
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Thu Dec 21 02:29:33 UTC 2023
On Thu, Dec 07, 2023 at 06:27:59PM +0530, Riana Tauro wrote:
>GuC provides engine busyness ticks as a 64 bit counter which count
>as clock ticks.
>
>Add configs to the uapi to expose Engine busyness via PMU.
>
>v2: add "__" prefix for internal helpers
> add a simple helper for application usage (Aravind)
>
>Cc: Aravind Iddamsetty <aravind.iddamsetty at linux.intel.com>
>Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>---
> include/uapi/drm/xe_drm.h | 23 +++++++++++++++++++++++
> 1 file changed, 23 insertions(+)
>
>diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>index b5e7a4f673fa..074d63cb79df 100644
>--- a/include/uapi/drm/xe_drm.h
>+++ b/include/uapi/drm/xe_drm.h
>@@ -1107,6 +1107,10 @@ struct drm_xe_wait_user_fence {
> * fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
> */
>
>+enum drm_xe_pmu_engine_sample {
>+ DRM_XE_PMU_SAMPLE_BUSY_TICKS = 0,
>+};
>+
> /*
> * Top bits of every counter are GT id.
> */
>@@ -1115,6 +1119,25 @@ struct drm_xe_wait_user_fence {
> #define ___DRM_XE_PMU_OTHER(gt, x) \
> (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
>
>+#define __DRM_XE_PMU_SAMPLE_BITS (4)
>+#define __DRM_XE_PMU_SAMPLE_INSTANCE_BITS (8)
>+#define __DRM_XE_PMU_CLASS_SHIFT \
>+ (__DRM_XE_PMU_SAMPLE_BITS + __DRM_XE_PMU_SAMPLE_INSTANCE_BITS)
>+
>+/*
>+ * Engine configs offset - 0x1000
>+ */
>+#define __DRM_XE_PMU_ENGINE_OFFSET(gt) \
>+ (___DRM_XE_PMU_OTHER(gt, 0xfff) + 1)
Thinking this should be (___DRM_XE_PMU_OTHER(gt, 0xfffff) + 1) because
class is also 8 bits. This part is not any different from i915, so not
sure if you intended to change it for XE.
>+
>+#define __DRM_XE_PMU_ENGINE(gt, class, instance, sample) \
>+ (((class) << __DRM_XE_PMU_CLASS_SHIFT | \
>+ (instance) << __DRM_XE_PMU_SAMPLE_BITS | \
>+ (sample)) + __DRM_XE_PMU_ENGINE_OFFSET(gt))
What's the idea here? Engines are limited to a specific GT and the user
is also passing in the GT in the config?
I think the gt should just be shifted to __DRM_XE_PMU_GT_SHIFT and ORed with the engine counter, more like:
#define ___DRM_XE_PMU_GT_EVENT(gt, x) \
(((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
#define __DRM_XE_PMU_ENGINE_EVENT(class, instance, sample) \
(((class) << __DRM_XE_PMU_CLASS_SHIFT | \
(instance) << __DRM_XE_PMU_SAMPLE_BITS | \
(sample)))
#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
___DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE_EVENT(class, instance, DRM_XE_PMU_SAMPLE_BUSY_TICKS))
Just an example, naming is up to you.
For group counters, the logic you have is fine as long as you set class to 8 bits:
#define DRM_XE_PMU_RENDER_GROUP_BUSY(gt) \
___DRM_XE_PMU_GT_EVENT(gt, ___DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + 0)
#define DRM_XE_PMU_COPY_GROUP_BUSY(gt) \
___DRM_XE_PMU_GT_EVENT(gt, ___DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + 1)
___DRM_XE_PMU_GT_EVENT(gt, 0xfffff) is same as __DRM_XE_PMU_ENGINE_EVENT(0xff, 0xff, 0xf).
Thanks,
Umesh
>+
>+#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
>+ __DRM_XE_PMU_ENGINE(gt, class, instance, DRM_XE_PMU_SAMPLE_BUSY_TICKS)
>+
> #define DRM_XE_PMU_RENDER_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 0)
> #define DRM_XE_PMU_COPY_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 1)
> #define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 2)
>--
>2.40.0
>
More information about the Intel-xe
mailing list