[Intel-xe] [04/21] drm/xe/oa: Module init/exit and probe/remove
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Fri Oct 20 07:08:18 UTC 2023
On 19/09/2023 19:10, Ashutosh Dixit wrote:
> Perform OA initialization at module init and probe time:
>
> * Setup perf_stream_paranoid and oa_max_sample_rate files in /proc
> * Setup metrics sysfs directories to expose which metrics configurations
> are available
> * Setup OA groups which associate hw engines with OA units
> * Initialize OA units
>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
> ---
> drivers/gpu/drm/xe/Makefile | 1 +
> drivers/gpu/drm/xe/xe_device.c | 11 +
> drivers/gpu/drm/xe/xe_device_types.h | 4 +
> drivers/gpu/drm/xe/xe_gt_types.h | 4 +
> drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
> drivers/gpu/drm/xe/xe_module.c | 5 +
> drivers/gpu/drm/xe/xe_oa.c | 309 ++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_oa.h | 18 ++
> 8 files changed, 354 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/xe_oa.c
> create mode 100644 drivers/gpu/drm/xe/xe_oa.h
>
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index cc95a46b5e4d3..a40c4827b9c85 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -84,6 +84,7 @@ xe-y += xe_bb.o \
> xe_mmio.o \
> xe_mocs.o \
> xe_module.o \
> + xe_oa.o \
> xe_pat.o \
> xe_pci.o \
> xe_pcode.o \
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index b6bcb6c3482e7..2c3dac6340f04 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -25,6 +25,7 @@
> #include "xe_irq.h"
> #include "xe_mmio.h"
> #include "xe_module.h"
> +#include "xe_oa.h"
> #include "xe_pcode.h"
> #include "xe_pm.h"
> #include "xe_query.h"
> @@ -323,6 +324,10 @@ int xe_device_probe(struct xe_device *xe)
> goto err_irq_shutdown;
> }
>
> + err = xe_oa_init(xe);
> + if (err)
> + goto err_irq_shutdown;
> +
> err = xe_display_init(xe);
> if (err)
> goto err_irq_shutdown;
> @@ -333,6 +338,8 @@ int xe_device_probe(struct xe_device *xe)
>
> xe_display_register(xe);
>
> + xe_oa_register(xe);
> +
> xe_debugfs_register(xe);
>
> xe_pmu_register(&xe->pmu);
> @@ -363,10 +370,14 @@ static void xe_device_remove_display(struct xe_device *xe)
>
> void xe_device_remove(struct xe_device *xe)
> {
> + xe_oa_unregister(xe);
> +
> xe_device_remove_display(xe);
>
> xe_display_fini(xe);
>
> + xe_oa_fini(xe);
> +
> xe_irq_shutdown(xe);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index a82f28c6a3a01..8161407913607 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -17,6 +17,7 @@
> #include "xe_platform_types.h"
> #include "xe_pmu.h"
> #include "xe_step_types.h"
> +#include "xe_oa.h"
>
> #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> #include "soc/intel_pch.h"
> @@ -365,6 +366,9 @@ struct xe_device {
> /** @pmu: performance monitoring unit */
> struct xe_pmu pmu;
>
> + /** @oa: oa perf counter subsystem */
> + struct xe_oa oa;
> +
> /* private: */
>
> #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index d4310be3e1e7c..dc700198f33f7 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -13,6 +13,7 @@
> #include "xe_reg_sr_types.h"
> #include "xe_sa_types.h"
> #include "xe_uc_types.h"
> +#include "xe_oa.h"
>
> struct xe_exec_queue_ops;
> struct xe_migrate;
> @@ -347,6 +348,9 @@ struct xe_gt {
> /** @oob: bitmap with active OOB workaroudns */
> unsigned long *oob;
> } wa_active;
> +
> + /** @oa: oa perf counter subsystem per gt info */
> + struct xe_oa_gt oa;
> };
>
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> index cd4bc1412a3ff..c38674c827c91 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> @@ -146,6 +146,8 @@ struct xe_hw_engine {
> enum xe_hw_engine_id engine_id;
> /** @eclass: pointer to per hw engine class interface */
> struct xe_hw_engine_class_intf *eclass;
> + /** @oa_group: oa unit for this hw engine */
> + struct xe_oa_group *oa_group;
> };
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
> index 7194595e7f312..5bf957b127f0f 100644
> --- a/drivers/gpu/drm/xe/xe_module.c
> +++ b/drivers/gpu/drm/xe/xe_module.c
> @@ -11,6 +11,7 @@
> #include "xe_drv.h"
> #include "xe_hw_fence.h"
> #include "xe_module.h"
> +#include "xe_oa.h"
> #include "xe_pci.h"
> #include "xe_pmu.h"
> #include "xe_sched_job.h"
> @@ -68,6 +69,10 @@ static const struct init_funcs init_funcs[] = {
> .init = xe_register_pci_driver,
> .exit = xe_unregister_pci_driver,
> },
> + {
> + .init = xe_oa_sysctl_register,
> + .exit = xe_oa_sysctl_unregister,
> + },
> };
>
> static int __init xe_init(void)
> diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> new file mode 100644
> index 0000000000000..fae067e73c027
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_oa.c
> @@ -0,0 +1,309 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#include <linux/anon_inodes.h>
> +#include <linux/nospec.h>
> +#include <linux/sizes.h>
> +#include <linux/uuid.h>
> +
> +#include <drm/xe_drm.h>
> +#include <drm/drm_drv.h>
> +
> +#include "regs/xe_oa_regs.h"
> +#include "xe_gt.h"
> +#include "xe_device.h"
> +#include "xe_oa.h"
> +
> +static u32 xe_oa_stream_paranoid = true;
> +static int xe_oa_sample_rate_hard_limit;
> +static u32 xe_oa_max_sample_rate = 100000;
> +
> +static const struct xe_oa_format oa_formats[] = {
> + [XE_OA_FORMAT_C4_B8] = { 7, 64 },
> + [XE_OA_FORMAT_A12] = { 0, 64 },
> + [XE_OA_FORMAT_A12_B8_C8] = { 2, 128 },
> + [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
> + [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
> + [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 },
> + [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, TYPE_OAM, HDR_64_BIT },
> + [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, TYPE_OAM, HDR_64_BIT },
> +};
> +
> +static struct ctl_table_header *sysctl_header;
> +
> +void xe_oa_register(struct xe_device *xe)
> +{
> + struct xe_oa *oa = &xe->oa;
> +
> + if (!oa->xe)
> + return;
> +
> + oa->metrics_kobj = kobject_create_and_add("metrics",
> + &xe->drm.primary->kdev->kobj);
> +}
> +
> +void xe_oa_unregister(struct xe_device *xe)
> +{
> + struct xe_oa *oa = &xe->oa;
> +
> + if (!oa->metrics_kobj)
> + return;
> +
> + kobject_put(oa->metrics_kobj);
> + oa->metrics_kobj = NULL;
> +}
> +
> +static u32 num_oa_groups_per_gt(struct xe_gt *gt)
> +{
> + return 1;
> +}
> +
> +static u32 __oam_engine_group(struct xe_hw_engine *hwe)
> +{
> + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) {
> + /*
> + * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
> + * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
> + */
> + drm_WARN_ON(&hwe->gt->tile->xe->drm,
> + hwe->gt->info.type != XE_GT_TYPE_MEDIA);
> +
> + return OA_GROUP_OAM_SAMEDIA_0;
> + }
> +
> + return OA_GROUP_INVALID;
> +}
> +
> +static u32 __oa_engine_group(struct xe_hw_engine *hwe)
> +{
> + switch (hwe->class) {
> + case XE_ENGINE_CLASS_RENDER:
> + return OA_GROUP_OAG;
> +
> + case XE_ENGINE_CLASS_VIDEO_DECODE:
> + case XE_ENGINE_CLASS_VIDEO_ENHANCE:
> + return __oam_engine_group(hwe);
> +
> + default:
> + return OA_GROUP_INVALID;
> + }
> +}
> +
> +static struct xe_oa_regs __oam_regs(u32 base)
> +{
> + return (struct xe_oa_regs) {
> + base,
> + GEN12_OAM_HEAD_POINTER(base),
> + GEN12_OAM_TAIL_POINTER(base),
> + GEN12_OAM_BUFFER(base),
> + GEN12_OAM_CONTEXT_CONTROL(base),
> + GEN12_OAM_CONTROL(base),
> + GEN12_OAM_DEBUG(base),
> + GEN12_OAM_STATUS(base),
> + GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
> + };
> +}
> +
> +static struct xe_oa_regs __oag_regs(void)
> +{
> + return (struct xe_oa_regs) {
> + 0,
> + GEN12_OAG_OAHEADPTR,
> + GEN12_OAG_OATAILPTR,
> + GEN12_OAG_OABUFFER,
> + GEN12_OAG_OAGLBCTXCTRL,
> + GEN12_OAG_OACONTROL,
> + GEN12_OAG_OA_DEBUG,
> + GEN12_OAG_OASTATUS,
> + GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
> + };
> +}
> +
> +static void xe_oa_init_groups(struct xe_gt *gt)
> +{
> + const u32 mtl_oa_base[] = {
> + [OA_GROUP_OAM_SAMEDIA_0] = 0x393000,
> + };
> + int i, num_groups = gt->oa.num_oa_groups;
> +
> + for (i = 0; i < num_groups; i++) {
> + struct xe_oa_group *g = >->oa.group[i];
> +
> + /* Fused off engines can result in a group with num_engines == 0 */
> + if (g->num_engines == 0)
> + continue;
> +
> + if (i == OA_GROUP_OAG && gt->info.type != XE_GT_TYPE_MEDIA) {
> + g->regs = __oag_regs();
> + g->type = TYPE_OAG;
> + } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
> + g->regs = __oam_regs(mtl_oa_base[i]);
> + g->type = TYPE_OAM;
> + }
> +
> + /* Set oa_unit_ids now to ensure ids remain contiguous. */
> + g->oa_unit_id = gt->tile->xe->oa.oa_unit_ids++;
> + }
> +}
> +
> +static int xe_oa_init_gt(struct xe_gt *gt)
> +{
> + u32 num_groups = num_oa_groups_per_gt(gt);
> + struct xe_hw_engine *hwe;
> + enum xe_hw_engine_id id;
> + struct xe_oa_group *g;
> +
> + g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
> + if (!g)
> + return -ENOMEM;
> +
> + for_each_hw_engine(hwe, gt, id) {
> + u32 index = __oa_engine_group(hwe);
> +
> + hwe->oa_group = NULL;
> + if (index < num_groups) {
> + g[index].num_engines++;
> + hwe->oa_group = &g[index];
> + }
> + }
> +
> + gt->oa.num_oa_groups = num_groups;
> + gt->oa.group = g;
> +
> + xe_oa_init_groups(gt);
> +
> + return 0;
> +}
> +
> +static int xe_oa_init_engine_groups(struct xe_oa *oa)
> +{
> + struct xe_gt *gt;
> + int i, ret;
> +
> + for_each_gt(gt, oa->xe, i) {
> + ret = xe_oa_init_gt(gt);
> + if (ret)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +static void oa_format_add(struct xe_oa *oa, enum drm_xe_oa_format format)
> +{
> + __set_bit(format, oa->format_mask);
> +}
> +
> +static void xe_oa_init_supported_formats(struct xe_oa *oa)
> +{
> + switch (oa->xe->info.platform) {
> + case XE_ALDERLAKE_S:
> + case XE_ALDERLAKE_P:
case XE_ALDERLAKE_N:
case XE_DG1:
case XE_TIGERLAKE:
case XE_ROCKETLAKE:
Those are essentially the same from the OA register/format point of view.
> + oa_format_add(oa, XE_OA_FORMAT_A12);
> + oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8);
> + oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8);
> + oa_format_add(oa, XE_OA_FORMAT_C4_B8);
> + break;
> +
> + case XE_DG2:
> + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8);
> + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8);
> + break;
> +
> + case XE_METEORLAKE:
> + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8);
> + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8);
> + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8);
> + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8);
> + break;
> +
> + default:
> + drm_err(&oa->xe->drm, "Unknown platform\n");
> + }
> +}
> +
> +int xe_oa_init(struct xe_device *xe)
> +{
> + struct xe_oa *oa = &xe->oa;
> + struct xe_gt *gt;
> + int i, ret;
> +
> + /* Support OA only with GuC submission and Gen12+ */
> + if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12))
> + return 0;
> +
> + oa->xe = xe;
> + oa->oa_formats = oa_formats;
> +
> + for_each_gt(gt, xe, i)
> + mutex_init(>->oa.lock);
> +
> + /* Choose a representative limit */
> + xe_oa_sample_rate_hard_limit = xe_root_mmio_gt(xe)->info.clock_freq / 2;
> +
> + mutex_init(&oa->metrics_lock);
> + idr_init_base(&oa->metrics_idr, 1);
> +
> + ret = xe_oa_init_engine_groups(oa);
> + if (ret) {
> + drm_err(&xe->drm, "OA initialization failed %d\n", ret);
> + return ret;
> + }
> +
> + xe_oa_init_supported_formats(oa);
> +
> + oa->xe = xe;
> + return 0;
> +}
> +
> +void xe_oa_fini(struct xe_device *xe)
> +{
> + struct xe_oa *oa = &xe->oa;
> + struct xe_gt *gt;
> + int i;
> +
> + if (!oa->xe)
> + return;
> +
> + for_each_gt(gt, xe, i)
> + kfree(gt->oa.group);
> +
> + idr_destroy(&oa->metrics_idr);
> +
> + oa->xe = NULL;
> +}
> +
> +static struct ctl_table oa_ctl_table[] = {
> + {
> + .procname = "perf_stream_paranoid",
> + .data = &xe_oa_stream_paranoid,
> + .maxlen = sizeof(xe_oa_stream_paranoid),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = SYSCTL_ZERO,
> + .extra2 = SYSCTL_ONE,
> + },
> + {
> + .procname = "oa_max_sample_rate",
> + .data = &xe_oa_max_sample_rate,
> + .maxlen = sizeof(xe_oa_max_sample_rate),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = SYSCTL_ZERO,
> + .extra2 = &xe_oa_sample_rate_hard_limit,
> + },
> + {}
> +};
> +
> +int xe_oa_sysctl_register(void)
> +{
> + sysctl_header = register_sysctl("dev/xe", oa_ctl_table);
> + return 0;
> +}
> +
> +void xe_oa_sysctl_unregister(void)
> +{
> + unregister_sysctl_table(sysctl_header);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h
> new file mode 100644
> index 0000000000000..ba4ba80fd34cb
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_oa.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#ifndef _XE_OA_H_
> +#define _XE_OA_H_
> +
> +#include "xe_oa_types.h"
> +
> +int xe_oa_init(struct xe_device *xe);
> +void xe_oa_fini(struct xe_device *xe);
> +void xe_oa_register(struct xe_device *xe);
> +void xe_oa_unregister(struct xe_device *xe);
> +int xe_oa_sysctl_register(void);
> +void xe_oa_sysctl_unregister(void);
> +
> +#endif
More information about the Intel-xe
mailing list