[PATCH] drm/i915/pmu: Drop custom hotplug code

Lucas De Marchi lucas.demarchi at intel.com
Thu Jan 23 19:26:46 UTC 2025


On Thu, Jan 23, 2025 at 06:06:29PM +0000, Tvrtko Ursulin wrote:
>
>On 23/01/2025 16:27, Lucas De Marchi wrote:
>>On Thu, Jan 23, 2025 at 09:43:35AM +0000, Tvrtko Ursulin wrote:
>>>
>>>On 20/01/2025 22:57, Lucas De Marchi wrote:
>>>>On Mon, Jan 20, 2025 at 10:08:39AM -0500, Liang, Kan wrote:
>>>>>
>>>>>
>>>>>On 2025-01-16 5:24 p.m., Lucas De Marchi wrote:
>>>>>>Since commit 4ba4f1afb6a9 ("perf: Generic hotplug support 
>>>>>>for a PMU with
>>>>>>a scope"), there's generic support for system-wide counters and
>>>>>>integration with cpu hotplug. Set our scope to PERF_PMU_SCOPE_SYS_WIDE
>>>>>>instead of all the boilerplate code for handling hotplug.
>>>>>>
>>>>>>Cc: Kan Liang <kan.liang at linux.intel.com>
>>>>>>Cc: Peter Zijlstra (Intel) <peterz at infradead.org>
>>>>>>Cc: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>>>>>>Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>>>>>>---
>>>>>> drivers/gpu/drm/i915/i915_module.c |   2 -
>>>>>> drivers/gpu/drm/i915/i915_pmu.c    | 114 
>>>>>>+----------------------------
>>>>>> drivers/gpu/drm/i915/i915_pmu.h    |  11 ---
>>>>>> 3 files changed, 1 insertion(+), 126 deletions(-)
>>>>>>
>>>>>>diff --git a/drivers/gpu/drm/i915/i915_module.c 
>>>>>>b/drivers/gpu/drm/i915/i915_module.c
>>>>>>index 7ed6d70389af9..7affe07f84f45 100644
>>>>>>--- a/drivers/gpu/drm/i915/i915_module.c
>>>>>>+++ b/drivers/gpu/drm/i915/i915_module.c
>>>>>>@@ -71,8 +71,6 @@ static const struct {
>>>>>>     { .init = i915_vma_resource_module_init,
>>>>>>       .exit = i915_vma_resource_module_exit },
>>>>>>     { .init = i915_mock_selftests },
>>>>>>-    { .init = i915_pmu_init,
>>>>>>-      .exit = i915_pmu_exit },
>>>>>>     { .init = i915_pci_register_driver,
>>>>>>       .exit = i915_pci_unregister_driver },
>>>>>>     { .init = i915_perf_sysctl_register,
>>>>>>diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
>>>>>>b/drivers/gpu/drm/i915/i915_pmu.c
>>>>>>index e55db036be1bb..652964ef0643c 100644
>>>>>>--- a/drivers/gpu/drm/i915/i915_pmu.c
>>>>>>+++ b/drivers/gpu/drm/i915/i915_pmu.c
>>>>>>@@ -28,9 +28,6 @@
>>>>>>      BIT(I915_SAMPLE_WAIT) | \
>>>>>>      BIT(I915_SAMPLE_SEMA))
>>>>>>
>>>>>>-static cpumask_t i915_pmu_cpumask;
>>>>>>-static unsigned int i915_pmu_target_cpu = -1;
>>>>>>-
>>>>>> static struct i915_pmu *event_to_pmu(struct perf_event *event)
>>>>>> {
>>>>>>     return container_of(event->pmu, struct i915_pmu, base);
>>>>>>@@ -642,10 +639,6 @@ static int i915_pmu_event_init(struct 
>>>>>>perf_event *event)
>>>>>>     if (event->cpu < 0)
>>>>>>         return -EINVAL;
>>>>>>
>>>>>>-    /* only allow running on one cpu at a time */
>>>>>>-    if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
>>>>>>-        return -EINVAL;
>>>>>>-
>>>>>>     if (is_engine_event(event))
>>>>>>         ret = engine_event_init(event);
>>>>>>     else
>>>>>>@@ -940,23 +933,6 @@ static ssize_t 
>>>>>>i915_pmu_event_show(struct device *dev,
>>>>>>     return sprintf(buf, "config=0x%lx\n", eattr->val);
>>>>>> }
>>>>>>
>>>>>>-static ssize_t cpumask_show(struct device *dev,
>>>>>>-                struct device_attribute *attr, char *buf)
>>>>>>-{
>>>>>>-    return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
>>>>>>-}
>>>>>>-
>>>>>>-static DEVICE_ATTR_RO(cpumask);
>>>>>>-
>>>>>>-static struct attribute *i915_cpumask_attrs[] = {
>>>>>>-    &dev_attr_cpumask.attr,
>>>>>>-    NULL,
>>>>>>-};
>>>>>>-
>>>>>>-static const struct attribute_group i915_pmu_cpumask_attr_group = {
>>>>>>-    .attrs = i915_cpumask_attrs,
>>>>>>-};
>>>>>>-
>>>>>> #define __event(__counter, __name, __unit) \
>>>>>> { \
>>>>>>     .counter = (__counter), \
>>>>>>@@ -1173,92 +1149,12 @@ static void 
>>>>>>free_event_attributes(struct i915_pmu *pmu)
>>>>>>     pmu->pmu_attr = NULL;
>>>>>> }
>>>>>>
>>>>>>-static int i915_pmu_cpu_online(unsigned int cpu, struct 
>>>>>>hlist_node *node)
>>>>>>-{
>>>>>>-    struct i915_pmu *pmu = hlist_entry_safe(node, 
>>>>>>typeof(*pmu), cpuhp.node);
>>>>>>-
>>>>>>-    /* Select the first online CPU as a designated reader. */
>>>>>>-    if (cpumask_empty(&i915_pmu_cpumask))
>>>>>>-        cpumask_set_cpu(cpu, &i915_pmu_cpumask);
>>>>>>-
>>>>>>-    return 0;
>>>>>>-}
>>>>>>-
>>>>>>-static int i915_pmu_cpu_offline(unsigned int cpu, struct 
>>>>>>hlist_node *node)
>>>>>>-{
>>>>>>-    struct i915_pmu *pmu = hlist_entry_safe(node, 
>>>>>>typeof(*pmu), cpuhp.node);
>>>>>>-    unsigned int target = i915_pmu_target_cpu;
>>>>>>-
>>>>>>-    /*
>>>>>>-     * Unregistering an instance generates a CPU offline 
>>>>>>event which we must
>>>>>>-     * ignore to avoid incorrectly modifying the shared 
>>>>>>i915_pmu_cpumask.
>>>>>>-     */
>>>>>>-    if (!pmu->registered)
>>>>>>-        return 0;
>>>>>>-
>>>>>>-    if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
>>>>>>-        target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
>>>>>>-
>>>>>
>>>>>I'm not familar with the i915 PMU, but it seems suggest a core scope
>>>>>PMU, not a system-wide scope.
>>>>
>>>>counter is in a complete separate device - it doesn't depend on core or
>>>>die or pkg - not sure why it cared about topology_sibling_cpumask here.
>>>>
>>>>Also, in my tests it always chose cpu0 that is the boot cpu and can't be
>>>>offlined. Looking at our CI it seems this entire code is not tested at
>>>>all: the only test that in theory would exercise this just skips since
>>>>cpu0 can't go offline - https://intel-gfx-ci.01.org/tree/drm-tip/shards-all.html?testfilter=hotplug
>>>
>>>s/not tested at all/not currently tested/
>>>
>>>commit e59e74dc48a309cb848ffc3d76a0d61aa6803c05
>>>Author: Thomas Gleixner <tglx at linutronix.de>
>>>Date:   Fri May 12 23:07:04 2023 +0200
>>>
>>>   x86/topology: Remove CPU0 hotplug option
>>>
>>>So test worked for ~6 years and then transitioned to skip. :shrug:
>>
>>right, indeed looking at the CI repo I see it there:
>>
>>$ git grep CONFIG_BOOTPARAM_HOTPLUG_CPU0 
>>kconfig/debug:CONFIG_BOOTPARAM_HOTPLUG_CPU0=y
>>kconfig/debug-kasan:CONFIG_BOOTPARAM_HOTPLUG_CPU0=y
>>
>>$ git log --oneline -G CONFIG_BOOTPARAM_HOTPLUG_CPU0 kconfig/debug
>>3553bf4 kconfig: Dump current kconfigs used by CI
>>
>>
>>I sent this to igt yesterday:
>>https://patchwork.freedesktop.org/patch/633357/?series=143874&rev=1
>>
>>I will reword that commit message to note that it was actually tested
>>before that kernel commit. The test worked because it only tried to 
>>toggle the
>>cpu0 and thus a migration succeeded to a sibling cpu, right?
>
>No, it was off-lining all CPUs one by one.

right, I missed the cpu++ at the end

>
>If topology_sibling_cpumask is not right then maybe that did not show 
>because of the one by one nature? I mean if the test was trying to 
>offline multiple CPUs ie. leave multiple off lines, maybe then it 
>would break?

yes, I guess so.

>
>>For the kernel commit message I can add a comment about the counter
>>being indeed a system one and the use of topology_sibling_cpumask()
>>being wrong here. With that, can you review this patch?
>
>I don't know about this new system wide vs something else stuff. It 
>will not interfere with the fact there can be multiple instances of 
>the same PMU driver, one per device?

no. Each of them will do a separate call to perf_pmu_register() with a
different name. The cpumask attr is created on each of them and hotplug
handled individually (from the perf_pmu pov they are separate pmu
drivers).

Lucas De Marchi

>
>I can review but cannot promise it will be quick.
>
>Regards,
>
>Tvrtko
>
>>>
>>>Regards,
>>>
>>>Tvrtko
>>>
>>>>>
>>>>>>-        /* Migrate events if there is a valid target */
>>>>>>-        if (target < nr_cpu_ids) {
>>>>>>-            cpumask_set_cpu(target, &i915_pmu_cpumask);
>>>>>>-            i915_pmu_target_cpu = target;
>>>>>>-        }
>>>>>>-    }
>>>>>>-
>>>>>>-    if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
>>>>>>-        perf_pmu_migrate_context(&pmu->base, cpu, target);
>>>>>>-        pmu->cpuhp.cpu = target;
>>>>>>-    }
>>>>>>-
>>>>>>-    return 0;
>>>>>>-}
>>>>>>-
>>>>>>-static enum cpuhp_state cpuhp_state = CPUHP_INVALID;
>>>>>>-
>>>>>>-int i915_pmu_init(void)
>>>>>>-{
>>>>>>-    int ret;
>>>>>>-
>>>>>>-    ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
>>>>>>-                      "perf/x86/intel/i915:online",
>>>>>>-                      i915_pmu_cpu_online,
>>>>>>-                      i915_pmu_cpu_offline);
>>>>>>-    if (ret < 0)
>>>>>>-        pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
>>>>>>-              ret);
>>>>>>-    else
>>>>>>-        cpuhp_state = ret;
>>>>>>-
>>>>>>-    return 0;
>>>>>>-}
>>>>>>-
>>>>>>-void i915_pmu_exit(void)
>>>>>>-{
>>>>>>-    if (cpuhp_state != CPUHP_INVALID)
>>>>>>-        cpuhp_remove_multi_state(cpuhp_state);
>>>>>>-}
>>>>>>-
>>>>>>-static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
>>>>>>-{
>>>>>>-    if (cpuhp_state == CPUHP_INVALID)
>>>>>>-        return -EINVAL;
>>>>>>-
>>>>>>-    return cpuhp_state_add_instance(cpuhp_state, &pmu->cpuhp.node);
>>>>>>-}
>>>>>>-
>>>>>>-static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
>>>>>>-{
>>>>>>-    cpuhp_state_remove_instance(cpuhp_state, &pmu->cpuhp.node);
>>>>>>-}
>>>>>>-
>>>>>> void i915_pmu_register(struct drm_i915_private *i915)
>>>>>> {
>>>>>>     struct i915_pmu *pmu = &i915->pmu;
>>>>>>     const struct attribute_group *attr_groups[] = {
>>>>>>         &i915_pmu_format_attr_group,
>>>>>>         &pmu->events_attr_group,
>>>>>>-        &i915_pmu_cpumask_attr_group,
>>>>>>         NULL
>>>>>>     };
>>>>>>     int ret = -ENOMEM;
>>>>>>@@ -1266,7 +1162,6 @@ void i915_pmu_register(struct 
>>>>>>drm_i915_private *i915)
>>>>>>     spin_lock_init(&pmu->lock);
>>>>>>     hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>>>>>>     pmu->timer.function = i915_sample;
>>>>>>-    pmu->cpuhp.cpu = -1;
>>>>>>     init_rc6(pmu);
>>>>>>
>>>>>>     if (IS_DGFX(i915)) {
>>>>>>@@ -1295,6 +1190,7 @@ void i915_pmu_register(struct 
>>>>>>drm_i915_private *i915)
>>>>>>
>>>>>>     pmu->base.module    = THIS_MODULE;
>>>>>>     pmu->base.task_ctx_nr    = perf_invalid_context;
>>>>>>+    pmu->base.scope        = PERF_PMU_SCOPE_SYS_WIDE;
>>>>>>     pmu->base.event_init    = i915_pmu_event_init;
>>>>>>     pmu->base.add        = i915_pmu_event_add;
>>>>>>     pmu->base.del        = i915_pmu_event_del;
>>>>>>@@ -1307,16 +1203,10 @@ void i915_pmu_register(struct 
>>>>>>drm_i915_private *i915)
>>>>>>     if (ret)
>>>>>>         goto err_groups;
>>>>>>
>>>>>>-    ret = i915_pmu_register_cpuhp_state(pmu);
>>>>>>-    if (ret)
>>>>>>-        goto err_unreg;
>>>>>>-
>>>>>>     pmu->registered = true;
>>>>>>
>>>>>>     return;
>>>>>>
>>>>>>-err_unreg:
>>>>>>-    perf_pmu_unregister(&pmu->base);
>>>>>> err_groups:
>>>>>>     kfree(pmu->base.attr_groups);
>>>>>> err_attr:
>>>>>>@@ -1340,8 +1230,6 @@ void i915_pmu_unregister(struct 
>>>>>>drm_i915_private *i915)
>>>>>>
>>>>>>     hrtimer_cancel(&pmu->timer);
>>>>>>
>>>>>>-    i915_pmu_unregister_cpuhp_state(pmu);
>>>>>>-
>>>>>>     perf_pmu_unregister(&pmu->base);
>>>>>>     kfree(pmu->base.attr_groups);
>>>>>>     if (IS_DGFX(i915))
>>>>>>diff --git a/drivers/gpu/drm/i915/i915_pmu.h 
>>>>>>b/drivers/gpu/drm/i915/i915_pmu.h
>>>>>>index 8e66d63d0c9f9..53bce3d8bfbaf 100644
>>>>>>--- a/drivers/gpu/drm/i915/i915_pmu.h
>>>>>>+++ b/drivers/gpu/drm/i915/i915_pmu.h
>>>>>>@@ -56,13 +56,6 @@ struct i915_pmu_sample {
>>>>>> };
>>>>>>
>>>>>> struct i915_pmu {
>>>>>>-    /**
>>>>>>-     * @cpuhp: Struct used for CPU hotplug handling.
>>>>>>-     */
>>>>>>-    struct {
>>>>>>-        struct hlist_node node;
>>>>>>-        unsigned int cpu;
>>>>>>-    } cpuhp;
>>>>>>     /**
>>>>>>      * @base: PMU base.
>>>>>>      */
>>>>>>@@ -155,15 +148,11 @@ struct i915_pmu {
>>>>>> };
>>>>>>
>>>>>> #ifdef CONFIG_PERF_EVENTS
>>>>>>-int i915_pmu_init(void);
>>>>>>-void i915_pmu_exit(void);
>>>>>> void i915_pmu_register(struct drm_i915_private *i915);
>>>>>> void i915_pmu_unregister(struct drm_i915_private *i915);
>>>>>> void i915_pmu_gt_parked(struct intel_gt *gt);
>>>>>> void i915_pmu_gt_unparked(struct intel_gt *gt);
>>>>>> #else
>>>>>>-static inline int i915_pmu_init(void) { return 0; }
>>>>>>-static inline void i915_pmu_exit(void) {}
>>>>>> static inline void i915_pmu_register(struct 
>>>>>>drm_i915_private *i915) {}
>>>>>> static inline void i915_pmu_unregister(struct 
>>>>>>drm_i915_private *i915) {}
>>>>>> static inline void i915_pmu_gt_parked(struct intel_gt *gt) {}
>>>>>


More information about the Intel-gfx mailing list