[PATCH 1/6] drm/i915/display: add support for DMC wakelocks

Gustavo Sousa gustavo.sousa at intel.com
Tue Feb 20 16:45:06 UTC 2024


Quoting Luca Coelho (2024-02-07 07:30:02-03:00)
>In order to reduce the DC5->DC2 restore time, wakelocks have been
>introduced in DMC so the driver can tell it when registers and other
>memory areas are going to be accessed and keep their respective blocks
>awake.
>
>Implement this in the driver by adding the concept of DMC wakelocks.
>When the driver needs to access memory which lies inside pre-defined
>ranges, it will tell DMC to set the wakelock, access the memory, then
>wait for a while and clear the wakelock.
>
>The wakelock state is protected in the driver with spinlocks to
>prevent concurrency issues.
>

I think it would be good to have a "BSpec: 71583" trailer here.

>Signed-off-by: Luca Coelho <luciano.coelho at intel.com>
>---
> drivers/gpu/drm/i915/Makefile                 |   1 +
> drivers/gpu/drm/i915/display/intel_de.h       |  35 +++-
> drivers/gpu/drm/i915/display/intel_display.c  |   1 +
> .../gpu/drm/i915/display/intel_display_core.h |   4 +
> .../drm/i915/display/intel_display_driver.c   |   1 +
> drivers/gpu/drm/i915/display/intel_dmc_regs.h |  32 +++
> drivers/gpu/drm/i915/display/intel_dmc_wl.c   | 185 ++++++++++++++++++
> drivers/gpu/drm/i915/display/intel_dmc_wl.h   |  35 ++++
> drivers/gpu/drm/xe/Makefile                   |   1 +
> 9 files changed, 293 insertions(+), 2 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/display/intel_dmc_wl.c
> create mode 100644 drivers/gpu/drm/i915/display/intel_dmc_wl.h
>
>diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>index f7d13a7dcd63..05f0f54cc98e 100644
>--- a/drivers/gpu/drm/i915/Makefile
>+++ b/drivers/gpu/drm/i915/Makefile
>@@ -312,6 +312,7 @@ i915-y += \
>         display/intel_tc.o \
>         display/intel_vblank.o \
>         display/intel_vga.o \
>+        display/intel_dmc_wl.o \

This is in the wrong sorting position :-)

>         display/intel_wm.o \
>         display/skl_scaler.o \
>         display/skl_universal_plane.o \
>diff --git a/drivers/gpu/drm/i915/display/intel_de.h b/drivers/gpu/drm/i915/display/intel_de.h
>index 42552d8c151e..ef24748d522d 100644
>--- a/drivers/gpu/drm/i915/display/intel_de.h
>+++ b/drivers/gpu/drm/i915/display/intel_de.h
>@@ -42,16 +42,47 @@ intel_de_write(struct drm_i915_private *i915, i915_reg_t reg, u32 val)
> }
> 
> static inline u32
>-intel_de_rmw(struct drm_i915_private *i915, i915_reg_t reg, u32 clear, u32 set)
>+__intel_de_rmw_nowl(struct drm_i915_private *i915, i915_reg_t reg,
>+                    u32 clear, u32 set)
> {
>         return intel_uncore_rmw(&i915->uncore, reg, clear, set);
> }
> 
>+static inline u32
>+intel_de_rmw(struct drm_i915_private *i915, i915_reg_t reg, u32 clear, u32 set)
>+{
>+        u32 val;
>+
>+        intel_dmc_wl_get(i915, reg);
>+
>+        val = __intel_de_rmw_nowl(i915, reg, clear, set);
>+
>+        intel_dmc_wl_put(i915, reg);
>+
>+        return val;
>+}
>+
>+static inline int
>+__intel_wait_for_register_nowl(struct drm_i915_private *i915, i915_reg_t reg,
>+                               u32 mask, u32 value, unsigned int timeout)
>+{
>+        return intel_wait_for_register(&i915->uncore, reg, mask,
>+                                       value, timeout);
>+}
>+
> static inline int
> intel_de_wait_for_register(struct drm_i915_private *i915, i915_reg_t reg,
>                            u32 mask, u32 value, unsigned int timeout)
> {
>-        return intel_wait_for_register(&i915->uncore, reg, mask, value, timeout);
>+        int ret;
>+
>+        intel_dmc_wl_get(i915, reg);
>+
>+        ret = __intel_wait_for_register_nowl(i915, reg, mask, value, timeout);
>+
>+        intel_dmc_wl_put(i915, reg);
>+
>+        return ret;
> }
> 
> static inline int
>diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
>index c3a46272d3a3..f38146690298 100644
>--- a/drivers/gpu/drm/i915/display/intel_display.c
>+++ b/drivers/gpu/drm/i915/display/intel_display.c
>@@ -115,6 +115,7 @@
> #include "intel_vdsc_regs.h"
> #include "intel_vga.h"
> #include "intel_vrr.h"
>+#include "intel_dmc_wl.h"

Unused include here.

> #include "intel_wm.h"
> #include "skl_scaler.h"
> #include "skl_universal_plane.h"
>diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h
>index a90f1aa201be..ec2c16744992 100644
>--- a/drivers/gpu/drm/i915/display/intel_display_core.h
>+++ b/drivers/gpu/drm/i915/display/intel_display_core.h
>@@ -26,6 +26,7 @@
> #include "intel_global_state.h"
> #include "intel_gmbus.h"
> #include "intel_opregion.h"
>+#include "intel_dmc_wl.h"
> #include "intel_wm_types.h"
> 
> struct task_struct;
>@@ -531,7 +532,10 @@ struct intel_display {
>         struct intel_overlay *overlay;
>         struct intel_display_params params;
>         struct intel_vbt_data vbt;
>+        struct intel_dmc_wl wl;
>         struct intel_wm wm;
>+
>+

Spurious blank lines.

> };
> 
> #endif /* __INTEL_DISPLAY_CORE_H__ */
>diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
>index ecf9cb74734b..491d1a38a313 100644
>--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
>+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
>@@ -197,6 +197,7 @@ void intel_display_driver_early_probe(struct drm_i915_private *i915)
>         intel_dpll_init_clock_hook(i915);
>         intel_init_display_hooks(i915);
>         intel_fdi_init_hook(i915);
>+        intel_dmc_wl_init(i915);
> }
> 
> /* part #1: call before irq install */
>diff --git a/drivers/gpu/drm/i915/display/intel_dmc_regs.h b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
>index 90d0dbb41cfe..ef8642ced911 100644
>--- a/drivers/gpu/drm/i915/display/intel_dmc_regs.h
>+++ b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
>@@ -97,4 +97,36 @@
> #define TGL_DMC_DEBUG3                _MMIO(0x101090)
> #define DG1_DMC_DEBUG3                _MMIO(0x13415c)
> 
>+/**
>+ * DOC: DMC wakelock support

I'm not sure if there are documented rules about the placement of
documentation, but I'm used to see documentation text on the *.c file
where the implementation of the feature resides.

>+ *
>+ * The wakelock mechanism in DMC is used to make sure it doesn't enter
>+ * deeper power-saving modes in order to reduce the latency in
>+ * DC5->DC2 restore.

When I read the spec, I get the impression that the main
objective/use-case of DMC Wakelocks is to provide the driver a way of
controlling the exit and re-entry of DC states when programming certain
registers (as opposed to having the hardware do the exit/re-entry for
every register programming). Is that right? Should we make the statement
above more specific then?

By the way, do we have a way of measuring improvements on performance
with this new feature?

>+ *
>+ * The mechanism can be enabled and disabled by writing to the
>+ * DMC_WAKELOCK_CFG register.  There are also 13 control registers
>+ * that can be used to hold and release different wakelocks.  In the
>+ * current implementation, we only need one wakelock, so only
>+ * DMC_WAKELOCK1_CTL is used.  The other definitions are here for
>+ * potential future use.
>+ */
>+#define DMC_WAKELOCK_CFG        _MMIO(0x8F1B0)
>+#define  DMC_WAKELOCK_CFG_ENABLE REG_BIT(31)
>+#define DMC_WAKELOCK1_CTL        _MMIO(0x8F140)

Since we are only using DMC_WAKELOCK1_CTL, I think we can postpone the
definition of extra offsets to when (or if) we need them.

>+#define DMC_WAKELOCK2_CTL        _MMIO(0x8F144)
>+#define DMC_WAKELOCK3_CTL        _MMIO(0x8F148)
>+#define DMC_WAKELOCK4_CTL        _MMIO(0x8F14C)
>+#define DMC_WAKELOCK5_CTL        _MMIO(0x8F150)
>+#define DMC_WAKELOCK6_CTL        _MMIO(0x8F154)
>+#define DMC_WAKELOCK7_CTL        _MMIO(0x8F158)
>+#define DMC_WAKELOCK8_CTL        _MMIO(0x8F15C)
>+#define DMC_WAKELOCK9_CTL        _MMIO(0x8F160)
>+#define DMC_WAKELOCK10_CTL        _MMIO(0x8F164)
>+#define DMC_WAKELOCK11_CTL        _MMIO(0x8F168)
>+#define DMC_WAKELOCK12_CTL        _MMIO(0x8F16C)
>+#define DMC_WAKELOCK13_CTL        _MMIO(0x8F170)
>+#define  DMC_WAKELOCK_CTL_REQ         REG_BIT(31)
>+#define  DMC_WAKELOCK_CTL_ACK         REG_BIT(15)
>+
> #endif /* __INTEL_DMC_REGS_H__ */
>diff --git a/drivers/gpu/drm/i915/display/intel_dmc_wl.c b/drivers/gpu/drm/i915/display/intel_dmc_wl.c
>new file mode 100644
>index 000000000000..abe1ea92cf65
>--- /dev/null
>+++ b/drivers/gpu/drm/i915/display/intel_dmc_wl.c
>@@ -0,0 +1,185 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Copyright (C) 2023 Intel Corporation
>+ */
>+
>+#include <linux/kernel.h>
>+
>+#include "intel_de.h"
>+#include "intel_dmc_regs.h"
>+#include "intel_dmc_wl.h"
>+
>+#define DMC_WAKELOCK_CTL_TIMEOUT 100

The documented timeout value is 5ms. Have we observed longer time to
ack?

>+
>+static struct intel_dmc_wl_range lnl_wl_range[] = {
>+        { .start = 0x60000, .end = 0x7ffff },
>+};
>+
>+static void __intel_dmc_wl_release(struct drm_i915_private *i915)
>+{
>+        struct intel_dmc_wl *wl = &i915->display.wl;
>+
>+        WARN_ON(refcount_read(&wl->refcount));
>+
>+        queue_delayed_work(i915->unordered_wq, &wl->work,
>+                           msecs_to_jiffies(2000));


Is there any rationale behind this number? We are basically holding DC
"off" for about 2 seconds here. That said, I'm not sure what would be a
good number.

>+}
>+
>+static void intel_dmc_wl_work(struct work_struct *work)
>+{
>+        struct intel_dmc_wl *wl =
>+                container_of(work, struct intel_dmc_wl, work.work);
>+        struct drm_i915_private *i915 =
>+                container_of(wl, struct drm_i915_private, display.wl);
>+        unsigned long flags;
>+        u32 ctl_value;
>+
>+        spin_lock_irqsave(&wl->lock, flags);
>+
>+        ctl_value = __intel_de_rmw_nowl(i915, DMC_WAKELOCK1_CTL,
>+                                        DMC_WAKELOCK_CTL_REQ, 0);

Variable ctl_value is set but unused.

>+
>+        __intel_wait_for_register_nowl(i915,  DMC_WAKELOCK1_CTL,
>+                                       DMC_WAKELOCK_CTL_ACK, 0,
>+                                       DMC_WAKELOCK_CTL_TIMEOUT);

We should probably have a rate-limited warn if there is a timeout.

>+
>+        spin_unlock_irqrestore(&wl->lock, flags);
>+}
>+
>+static bool intel_dmc_wl_check_range(u32 address)
>+{
>+        int i;
>+        bool wl_needed = false;
>+
>+        for (i = 0; i < ARRAY_SIZE(lnl_wl_range); i++) {
>+                if (address >= lnl_wl_range[i].start &&
>+                    address <= lnl_wl_range[i].end) {
>+                        wl_needed = true;
>+                        break;
>+                }
>+        }
>+
>+        return wl_needed;
>+}
>+
>+void intel_dmc_wl_init(struct drm_i915_private *i915)
>+{
>+        struct intel_dmc_wl *wl = &i915->display.wl;
>+        unsigned long flags;
>+
>+        spin_lock_init(&wl->lock);
>+
>+        spin_lock_irqsave(&wl->lock, flags);
>+        refcount_set(&wl->refcount, 0);
>+        spin_unlock_irqrestore(&wl->lock, flags);

I think we do not need the lock/unlock here, since this is the
initialization path.

>+}
>+
>+void intel_dmc_wl_enable(struct drm_i915_private *i915)
>+{
>+        struct intel_dmc_wl *wl = &i915->display.wl;
>+        unsigned long flags;
>+
>+        spin_lock_irqsave(&wl->lock, flags);
>+
>+        if (wl->enabled)
>+                goto out_unlock;
>+
>+        INIT_DELAYED_WORK(&wl->work, intel_dmc_wl_work);

Why do we need to initialize the work structure late (and possibly
repeatedly)? Is there anything keeping us from doing it in
intel_dmc_wl_init()?

>+
>+        /*
>+         * Enable wakelock in DMC.  We shouldn't try to take the
>+         * wakelock, because we're just enabling it, so call the
>+         * non-locking version directly here.
>+         */
>+        __intel_de_rmw_nowl(i915, DMC_WAKELOCK_CFG, 0, DMC_WAKELOCK_CFG_ENABLE);
>+
>+        wl->enabled = true;
>+
>+out_unlock:
>+        spin_unlock_irqrestore(&wl->lock, flags);
>+        return;
>+}
>+
>+void intel_dmc_wl_disable(struct drm_i915_private *i915)
>+{
>+        struct intel_dmc_wl *wl = &i915->display.wl;
>+        unsigned long flags;
>+
>+        spin_lock_irqsave(&wl->lock, flags);
>+
>+        /* FIXME: should we cancel the work? */

Do we have guarantee that there will be no outstanding wakelock
"refs" when this function is called?

 - If that is a guarantee, then, at this point, we might have that (i)
   the work is pending; or (ii) it has been scheduled an is or will be
   waiting on wl->lock; or (iii) the work is already done. One thing I
   think we could do in this case is to cancel_delayed_work() here and
   also have the work bail out if wl->disabled.

 - Now, if we have no such guarantee, then I believe this is a bit more
   involved. We would need to wait for all refs to be released, but one
   issue is that we are holding wl->lock at this point, meaning that we
   need to think how to properly solve this.

>+
>+        if (!wl->enabled)
>+                goto out_unlock;
>+
>+        drm_info(&i915->drm, "DMC_WAKELOCK_CFG refcount %d\n",
>+                 refcount_read(&wl->refcount));
>+
>+        /* Disable wakelock in DMC */
>+        __intel_de_rmw_nowl(i915, DMC_WAKELOCK_CFG, DMC_WAKELOCK_CFG_ENABLE, 0);

Looks like we are only doing step (3) of the disabling sequence in this
function.

That said, I get the impression that intel_dmc_wl_disable() is in fact
supposed to be part of the "DC off" operation, which means that step (1)
and (2) would already be in place at this point.

Furthermore, considering that we would wait for pending "refs" to
be done like suggested above, I believe step (4) would end up being
unnecessary.

>+
>+        drm_info(&i915->drm, "DMC_WAKELOCK_CFG read 0x%0x\n",
>+                 intel_de_read(i915, DMC_WAKELOCK_CFG));
>+
>+        refcount_set(&wl->refcount, 0);
>+        wl->enabled = false;
>+
>+out_unlock:
>+        spin_unlock_irqrestore(&wl->lock, flags);
>+        return;
>+}
>+
>+void intel_dmc_wl_get(struct drm_i915_private *i915, i915_reg_t reg)
>+{
>+        struct intel_dmc_wl *wl = &i915->display.wl;
>+        unsigned long flags;
>+        u32 ctl_value;
>+
>+        if (!intel_dmc_wl_check_range(reg.reg))
>+                return;
>+
>+        spin_lock_irqsave(&wl->lock, flags);
>+
>+        if (!wl->enabled)
>+                goto out_unlock;
>+
>+        if (refcount_inc_not_zero(&wl->refcount))
>+                goto out_unlock;
>+

At this point, we will have that refcount is zero, meaning that there
might be a pending "release work", which would cause inconsistency if it
gets executed while there are pending wakelock "refs".

That's why I think we need to do 2 things: (1) cancel the work with
cancel_delayed_work() and (2) make intel_dmc_wl_work() bail out if
refcount is zero, since the worker might have already started and is
waiting on wl->lock while we are here.

>+        refcount_set(&wl->refcount, 1);
>+
>+        ctl_value = __intel_de_rmw_nowl(i915, DMC_WAKELOCK1_CTL, 0,
>+                                        DMC_WAKELOCK_CTL_REQ);

Variable ctl_value is set but unused.

>+
>+        __intel_wait_for_register_nowl(i915,  DMC_WAKELOCK1_CTL,
>+                                       DMC_WAKELOCK_CTL_ACK,
>+                                       DMC_WAKELOCK_CTL_ACK,
>+                                       DMC_WAKELOCK_CTL_TIMEOUT);

We should probably raise a rate-limited warning if there is a timeout
here.

>+
>+
>+out_unlock:
>+        spin_unlock_irqrestore(&wl->lock, flags);
>+}
>+
>+void intel_dmc_wl_put(struct drm_i915_private *i915, i915_reg_t reg)
>+{
>+        struct intel_dmc_wl *wl = &i915->display.wl;
>+        unsigned long flags;
>+
>+        if (!intel_dmc_wl_check_range(reg.reg))
>+                return;
>+
>+        spin_lock_irqsave(&wl->lock, flags);
>+
>+        if (!wl->enabled || !refcount_read(&wl->refcount))

Shouldn't we treat a zero refcount as a bug? Shouldn't we raise a
rate-limited warning in that case?

>+                goto out_unlock;
>+
>+        if (refcount_dec_and_test(&wl->refcount)) {
>+                __intel_dmc_wl_release(i915);
>+
>+                goto out_unlock;
>+        }
>+
>+out_unlock:
>+        spin_unlock_irqrestore(&wl->lock, flags);
>+}
>diff --git a/drivers/gpu/drm/i915/display/intel_dmc_wl.h b/drivers/gpu/drm/i915/display/intel_dmc_wl.h
>new file mode 100644
>index 000000000000..3de6c8dc1c19
>--- /dev/null
>+++ b/drivers/gpu/drm/i915/display/intel_dmc_wl.h
>@@ -0,0 +1,35 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Copyright (C) 2023 Intel Corporation
>+ */
>+
>+#ifndef __INTEL_WAKELOCK_H__
>+#define __INTEL_WAKELOCK_H__
>+
>+#include <linux/types.h>
>+#include <linux/workqueue.h>
>+#include <linux/refcount.h>
>+
>+#include "i915_reg_defs.h"
>+
>+struct drm_i915_private;
>+
>+struct intel_dmc_wl {

I have the impression that we could have an intel_dmc_wl_types.h to have
this struct definition, but I'm not sure if I'm being overzealous here.

My rationale here is that we would be separating the struct bits from
the interface. The things that will really need to "know" the internals
of struct intel_dmc_wl are struct intel_display and the implementation
itself (i.e. intel_dmc_wl.c).

>+        spinlock_t lock;
>+        bool enabled;
>+        refcount_t refcount;

Looks like we are protecting both enabled and refcount members with the
spin lock. Do we really need refcount to be a refcount_t? Would a simple
int suffice?

>+        struct delayed_work work;
>+};
>+
>+struct intel_dmc_wl_range {

I believe this struct definition belongs to intel_dmc_wl.c, since it is
not necessary elsewhere.

--
Gustavo Sousa

>+        u32 start;
>+        u32 end;
>+};
>+
>+void intel_dmc_wl_init(struct drm_i915_private *i915);
>+void intel_dmc_wl_enable(struct drm_i915_private *i915);
>+void intel_dmc_wl_disable(struct drm_i915_private *i915);
>+void intel_dmc_wl_get(struct drm_i915_private *i915, i915_reg_t reg);
>+void intel_dmc_wl_put(struct drm_i915_private *i915, i915_reg_t reg);
>+
>+#endif /* __INTEL_WAKELOCK_H__ */
>diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>index a38849acc9fe..474ce58b5e3c 100644
>--- a/drivers/gpu/drm/xe/Makefile
>+++ b/drivers/gpu/drm/xe/Makefile
>@@ -281,6 +281,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
>         i915-display/intel_vdsc.o \
>         i915-display/intel_vga.o \
>         i915-display/intel_vrr.o \
>+        i915-display/intel_dmc_wl.o \
>         i915-display/intel_wm.o \
>         i915-display/skl_scaler.o \
>         i915-display/skl_universal_plane.o \
>-- 
>2.39.2
>


More information about the Intel-xe mailing list