[Intel-gfx] [PATCH v3 7/8] drm/i915/mtl: Add support for PM DEMAND

Fri May 12 20:43:22 UTC 2023

Quoting Govindapillai, Vinod (2023-05-11 20:24:55)
>Hello
>
>Thanks for the comments. Pls see some inline replies..
>
>On Thu, 2023-04-27 at 17:24 -0300, Gustavo Sousa wrote:
>> Quoting Vinod Govindapillai (2023-04-27 12:00:15)
>> > From: Mika Kahola <mika.kahola at intel.com>
>> > 
>> > Display14 introduces a new way to instruct the PUnit with
>> > power and bandwidth requirements of DE. Add the functionality
>> > to program the registers and handle waits using interrupts.
>> > The current wait time for timeouts is programmed for 10 msecs to
>> > factor in the worst case scenarios. Changes made to use REG_BIT
>> > for a register that we touched(GEN8_DE_MISC_IER _MMIO).
>> > 
>> > Wa_14016740474 is added which applies to Xe_LPD+ display
>> > 
>> > v2: checkpatch warning fixes, simplify program pmdemand part
>> > 
>> > v3: update to dbufs and pipes values to pmdemand register(stan)
>> >    Removed the macro usage in update_pmdemand_values()
>> > 
>> > Bspec: 66451, 64636, 64602, 64603
>> > Cc: Matt Atwood <matthew.s.atwood at intel.com>
>> > Cc: Matt Roper <matthew.d.roper at intel.com>
>> > Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>> > Cc: Gustavo Sousa <gustavo.sousa at intel.com>
>> > Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
>> > Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada at intel.com>
>> > Signed-off-by: Gustavo Sousa <gustavo.sousa at intel.com>
>> > Signed-off-by: Mika Kahola <mika.kahola at intel.com>
>> > Signed-off-by: Vinod Govindapillai <vinod.govindapillai at intel.com>
>> > ---
>> > drivers/gpu/drm/i915/Makefile                 |   3 +-
>> > drivers/gpu/drm/i915/display/intel_display.c  |   7 +
>> > .../gpu/drm/i915/display/intel_display_core.h |   6 +
>> > .../drm/i915/display/intel_display_driver.c   |   7 +
>> > .../drm/i915/display/intel_display_power.c    |   8 +
>> > drivers/gpu/drm/i915/display/intel_pmdemand.c | 455 ++++++++++++++++++
>> > drivers/gpu/drm/i915/display/intel_pmdemand.h |  24 +
>> > drivers/gpu/drm/i915/i915_irq.c               |  21 +-
>> > drivers/gpu/drm/i915/i915_reg.h               |  36 +-
>> > 9 files changed, 562 insertions(+), 5 deletions(-)
>> > create mode 100644 drivers/gpu/drm/i915/display/intel_pmdemand.c
>> > create mode 100644 drivers/gpu/drm/i915/display/intel_pmdemand.h
>> > 
>> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>> > index 9af76e376ca9..eb899fa86e51 100644
>> > --- a/drivers/gpu/drm/i915/Makefile
>> > +++ b/drivers/gpu/drm/i915/Makefile
>> > @@ -281,7 +281,8 @@ i915-y += \
>> >        display/i9xx_wm.o \
>> >        display/skl_scaler.o \
>> >        display/skl_universal_plane.o \
>> > -  display/skl_watermark.o
>> > +  display/skl_watermark.o \
>> > +  display/intel_pmdemand.o
>> > i915-$(CONFIG_ACPI) += \
>> >        display/intel_acpi.o \
>> >        display/intel_opregion.o
>> > diff --git a/drivers/gpu/drm/i915/display/intel_display.c
>> > b/drivers/gpu/drm/i915/display/intel_display.c
>> > index bf391a6cd8d6..f98e235fadc6 100644
>> > --- a/drivers/gpu/drm/i915/display/intel_display.c
>> > +++ b/drivers/gpu/drm/i915/display/intel_display.c
>> > @@ -99,6 +99,7 @@
>> > #include "intel_pcode.h"
>> > #include "intel_pipe_crc.h"
>> > #include "intel_plane_initial.h"
>> > +#include "intel_pmdemand.h"
>> > #include "intel_pps.h"
>> > #include "intel_psr.h"
>> > #include "intel_sdvo.h"
>> > @@ -6306,6 +6307,10 @@ int intel_atomic_check(struct drm_device *dev,
>> >                        return ret;
>> >        }
>> > 
>> > +  ret = intel_pmdemand_atomic_check(state);
>> > +  if (ret)
>> > +          goto fail;
>> > +
>> >        ret = intel_atomic_check_crtcs(state);
>> >        if (ret)
>> >                goto fail;
>> > @@ -6960,6 +6965,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
>> >        }
>> > 
>> >        intel_sagv_pre_plane_update(state);
>> > +  intel_pmdemand_pre_plane_update(state);
>> > 
>> >        /* Complete the events for pipes that have now been disabled */
>> >        for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
>> > @@ -7070,6 +7076,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
>> >                intel_verify_planes(state);
>> > 
>> >        intel_sagv_post_plane_update(state);
>> > +  intel_pmdemand_post_plane_update(state);
>> > 
>> >        drm_atomic_helper_commit_hw_done(&state->base);
>> > 
>> > diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h
>> > b/drivers/gpu/drm/i915/display/intel_display_core.h
>> > index 9f66d734edf6..9471a052aa57 100644
>> > --- a/drivers/gpu/drm/i915/display/intel_display_core.h
>> > +++ b/drivers/gpu/drm/i915/display/intel_display_core.h
>> > @@ -345,6 +345,12 @@ struct intel_display {
>> >                struct intel_global_obj obj;
>> >        } dbuf;
>> > 
>> > +  struct {
>> > +          wait_queue_head_t waitqueue;
>> > +          struct mutex lock;
>> > +          struct intel_global_obj obj;
>> > +  } pmdemand;
>> > +
>> >        struct {
>> >                /*
>> >                 * dkl.phy_lock protects against concurrent access of the
>> > diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c
>> > b/drivers/gpu/drm/i915/display/intel_display_driver.c
>> > index 60ce10fc7205..79853d8c3240 100644
>> > --- a/drivers/gpu/drm/i915/display/intel_display_driver.c
>> > +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
>> > @@ -47,6 +47,7 @@
>> > #include "intel_opregion.h"
>> > #include "intel_overlay.h"
>> > #include "intel_plane_initial.h"
>> > +#include "intel_pmdemand.h"
>> > #include "intel_pps.h"
>> > #include "intel_quirks.h"
>> > #include "intel_vga.h"
>> > @@ -211,6 +212,8 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915)
>> >        if (ret < 0)
>> >                goto cleanup_vga;
>> > 
>> > +  intel_pmdemand_init(i915);
>> > +
>> >        intel_power_domains_init_hw(i915, false);
>> > 
>> >        if (!HAS_DISPLAY(i915))
>> > @@ -240,6 +243,10 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915)
>> >        if (ret)
>> >                goto cleanup_vga_client_pw_domain_dmc;
>> > 
>> > +  ret = intel_pmdemand_state_init(i915);
>> > +  if (ret)
>> > +          goto cleanup_vga_client_pw_domain_dmc;
>> > +
>> >        init_llist_head(&i915->display.atomic_helper.free_list);
>> >        INIT_WORK(&i915->display.atomic_helper.free_work,
>> >                  intel_atomic_helper_free_state_worker);
>> > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c
>> > b/drivers/gpu/drm/i915/display/intel_display_power.c
>> > index 5150069f3f82..f5c5a486efbc 100644
>> > --- a/drivers/gpu/drm/i915/display/intel_display_power.c
>> > +++ b/drivers/gpu/drm/i915/display/intel_display_power.c
>> > @@ -20,6 +20,7 @@
>> > #include "intel_mchbar_regs.h"
>> > #include "intel_pch_refclk.h"
>> > #include "intel_pcode.h"
>> > +#include "intel_pmdemand.h"
>> > #include "intel_pps_regs.h"
>> > #include "intel_snps_phy.h"
>> > #include "skl_watermark.h"
>> > @@ -1085,6 +1086,10 @@ static void gen9_dbuf_enable(struct drm_i915_private *dev_priv)
>> >        dev_priv->display.dbuf.enabled_slices =
>> >                intel_enabled_dbuf_slices_mask(dev_priv);
>> > 
>> > +  if (DISPLAY_VER(dev_priv) >= 14)
>> > +          intel_program_dbuf_pmdemand(dev_priv, BIT(DBUF_S1) |
>> > +                                      dev_priv->display.dbuf.enabled_slices);
>> > +
>> >        /*
>> >         * Just power up at least 1 slice, we will
>> >         * figure out later which slices we have and what we need.
>> > @@ -1096,6 +1101,9 @@ static void gen9_dbuf_enable(struct drm_i915_private *dev_priv)
>> > static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
>> > {
>> >        gen9_dbuf_slices_update(dev_priv, 0);
>> > +
>> > +  if (DISPLAY_VER(dev_priv) >= 14)
>> > +          intel_program_dbuf_pmdemand(dev_priv, 0);
>> > }
>> > 
>> > static void gen12_dbuf_slices_config(struct drm_i915_private *dev_priv)
>> > diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.c
>> > b/drivers/gpu/drm/i915/display/intel_pmdemand.c
>> > new file mode 100644
>> > index 000000000000..df6429e7059d
>> > --- /dev/null
>> > +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.c
>> > @@ -0,0 +1,455 @@
>> > +// SPDX-License-Identifier: MIT
>> > +/*
>> > + * Copyright © 2024 Intel Corporation
>> > + */
>> > +
>> > +#include <linux/bitops.h>
>> > +
>> > +#include "i915_drv.h"
>> > +#include "i915_reg.h"
>> > +#include "intel_bw.h"
>> > +#include "intel_cdclk.h"
>> > +#include "intel_cx0_phy.h"
>> > +#include "intel_de.h"
>> > +#include "intel_display.h"
>> > +#include "intel_display_trace.h"
>> > +#include "intel_pmdemand.h"
>> > +#include "skl_watermark.h"
>> > +
>> > +struct intel_pmdemand_state {
>> > +  struct intel_global_state base;
>> > +
>> > +  u16 qclk_gv_bw;
>> > +  u8 voltage_index;
>> > +  u8 qclk_gv_index;
>> > +  u8 active_pipes;
>> > +  u8 dbufs;
>> > +  u8 active_phys_plls_mask;
>> 
>> Is u8 enough for the mask? The enum phy shows 9 possible PHY_* members.
>> Also, I think having BUILD_BUG_ON() somewhere in this file to make sure
>> we have enough bits would be nice.
>Thanks. updated.
>> 
>> > +  u16 cdclk_freq_mhz;
>> > +  u16 ddiclk_freq_mhz;
>> > +  u8 scalers;
>> > +};
>> > +
>> > +#define to_intel_pmdemand_state(x) container_of((x), struct intel_pmdemand_state, base)
>> > +
>> > +static struct intel_global_state *
>> > +intel_pmdemand_duplicate_state(struct intel_global_obj *obj)
>> > +{
>> > +  struct intel_pmdemand_state *pmdmnd_state;
>> > +
>> > +  pmdmnd_state = kmemdup(obj->state, sizeof(*pmdmnd_state), GFP_KERNEL);
>> > +  if (!pmdmnd_state)
>> > +          return NULL;
>> > +
>> > +  return &pmdmnd_state->base;
>> > +}
>> > +
>> > +static void intel_pmdemand_destroy_state(struct intel_global_obj *obj,
>> > +                                   struct intel_global_state *state)
>> > +{
>> > +  kfree(state);
>> > +}
>> > +
>> > +static const struct intel_global_state_funcs intel_pmdemand_funcs = {
>> > +  .atomic_duplicate_state = intel_pmdemand_duplicate_state,
>> > +  .atomic_destroy_state = intel_pmdemand_destroy_state,
>> > +};
>> > +
>> > +static struct intel_pmdemand_state *
>> > +intel_atomic_get_pmdemand_state(struct intel_atomic_state *state)
>> > +{
>> > +  struct drm_i915_private *i915 = to_i915(state->base.dev);
>> > +  struct intel_global_state *pmdemand_state;
>> > +
>> > +  pmdemand_state =
>> > +          intel_atomic_get_global_obj_state(state,
>> > +                                            &i915->display.pmdemand.obj);
>> > +  if (IS_ERR(pmdemand_state))
>> > +          return ERR_CAST(pmdemand_state);
>> > +
>> > +  return to_intel_pmdemand_state(pmdemand_state);
>> > +}
>> > +
>> > +static struct intel_pmdemand_state *
>> > +intel_atomic_get_old_pmdemand_state(struct intel_atomic_state *state)
>> > +{
>> > +  struct drm_i915_private *i915 = to_i915(state->base.dev);
>> > +  struct intel_global_state *pmdemand_state;
>> > +
>> > +  pmdemand_state = intel_atomic_get_old_global_obj_state(state, &i915->display.pmdemand.obj);
>> 
>> Wouldn't it be safer if we returned early here when pmdemand_state is
>> NULL?
>> 
>> I think to_intel_pmdemand_state(NULL) pmdemand_state just happens to
>> work (i.e. still returns NULL) because the "base" member is at the
>> beginning of the struct. However, we shouldn't rely on that IMO.
>
>Well. It is a valid point. But the base pointer is the first member for exaclty the reason you
>pointed out. So  to prevent someone from accidentally move that "base" from that position, I added a
>BUILD_BUG_ON() check. There are few other state objects which lack such a check. I will address that
>as a separate patch.

Hm... Not sure I am totally convinced here. If we are enforcing (with
BUILD_BUG_ON()) the base to be the first member, then what is the point in using
container_of() instead of just a simple cast?

I believe one of the points of using container_of() is that it alows we *not* to
enforce a strict layout of the containing struct for things to work.

Now, if for some (yet unkown) reason we need to move the "base" member in the
future, it would be difficult to find all places where the pointer could be NULL
but we relied one the assumption that "base" would always be at offset 0.

>
>> 
>> > +
>> > +  return to_intel_pmdemand_state(pmdemand_state);
>> > +}
>> > +
>> > +static struct intel_pmdemand_state *
>> > +intel_atomic_get_new_pmdemand_state(struct intel_atomic_state *state)
>> > +{
>> > +  struct drm_i915_private *i915 = to_i915(state->base.dev);
>> > +  struct intel_global_state *pmdemand_state;
>> > +
>> > +  pmdemand_state = intel_atomic_get_new_global_obj_state(state, &i915->display.pmdemand.obj);
>> 
>> Just as with intel_atomic_get_old_pmdemand_state(), shouldn't we return
>> early if pmdemand_state is NULL here?
>> 
>> > +
>> > +  return to_intel_pmdemand_state(pmdemand_state);
>> > +}
>> > +
>> > +int intel_pmdemand_state_init(struct drm_i915_private *i915)
>> > +{
>> > +  struct intel_pmdemand_state *pmdemand_state;
>> > +
>> > +  pmdemand_state = kzalloc(sizeof(*pmdemand_state), GFP_KERNEL);
>> > +  if (!pmdemand_state)
>> > +          return -ENOMEM;
>> > +
>> > +  intel_atomic_global_obj_init(i915, &i915->display.pmdemand.obj,
>> > +                               &pmdemand_state->base,
>> > +                               &intel_pmdemand_funcs);
>> > +
>> > +
>> > +  if (IS_MTL_DISPLAY_STEP(i915, STEP_A0, STEP_C0))
>> > +          /* Wa_14016740474 */
>> > +          intel_de_rmw(i915, XELPD_CHICKEN_DCPR_3, 0, DMD_RSP_TIMEOUT_DISABLE);
>> > +
>> > +  return 0;
>> > +}
>> > +
>> > +void intel_pmdemand_init(struct drm_i915_private *i915)
>> > +{
>> > +  mutex_init(&i915->display.pmdemand.lock);
>> > +  init_waitqueue_head(&i915->display.pmdemand.waitqueue);
>> > +}
>> 
>> The functions intel_pmdemand_state_init() and intel_pmdemand_init() are
>> both called from the same place. Furthermore,
>> intel_pmdemand_state_init() isn't only initializing the state, as the
>> Wa_14016740474 workaround is programmed there. Could we have only the
>> function intel_pmdemand_init() and incorporate what
>> intel_pmdemand_state_init() does in it?
>
>I tried that earlier and wasn't possible. Because the intel_power_domains_init_hw() will call the
>pmdemand debuf config. And I can't recall the other issue. But I will confirm this again and update
>as I am not able to try this locally.

Well, regarding intel_power_domains_init_hw(), wouldn't the solution be to call
the single init function for PM Demand before that one is called? Am I missing
something?

>> 
>> > +
>> > +static bool pmdemand_needs_update(struct intel_atomic_state *state)
>> > +{
>> > +  bool changed = false;
>> > +  struct intel_crtc *crtc;
>> > +  int i;
>> > +  const struct intel_bw_state *new_bw_state, *old_bw_state;
>> > +  const struct intel_cdclk_state *new_cdclk_state;
>> > +  const struct intel_crtc_state *new_crtc_state, *old_crtc_state;
>> > +  const struct intel_dbuf_state *new_dbuf_state, *old_dbuf_state;
>> > +
>> > +  for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
>> > +                                      new_crtc_state, i) {
>> > +          new_bw_state = intel_atomic_get_new_bw_state(state);
>> > +          old_bw_state = intel_atomic_get_old_bw_state(state);
>> > +
>> > +          new_dbuf_state = intel_atomic_get_new_dbuf_state(state);
>> > +          old_dbuf_state = intel_atomic_get_old_dbuf_state(state);
>> > +
>> > +          new_cdclk_state = intel_atomic_get_new_cdclk_state(state);
>> > +
>> > +          if ((new_bw_state && new_bw_state->qgv_point_peakbw !=
>> > +               old_bw_state->qgv_point_peakbw) ||
>> > +              (new_dbuf_state && new_dbuf_state->active_pipes !=
>> > +               old_dbuf_state->active_pipes) || new_cdclk_state)
>> > +                  changed = true;
>> > +
>> > +          /*
>> > +           * break needs to be removed, if some crtc_state dependent
>> > +           * parameters are added here
>> > +           */
>> > +          break;
>> > +  }
>> > +
>> > +  return changed;
>> > +}
>> > +
>> > +int intel_pmdemand_atomic_check(struct intel_atomic_state *state)
>> > +{
>> > +  struct drm_i915_private *i915 = to_i915(state->base.dev);
>> > +  int port_clock = 0;
>> > +  struct intel_crtc *crtc;
>> > +  struct intel_encoder *encoder;
>> > +  const struct intel_bw_state *new_bw_state;
>> > +  const struct intel_cdclk_state *new_cdclk_state;
>> > +  const struct intel_crtc_state *new_crtc_state, *old_crtc_state;
>> > +  const struct intel_dbuf_state *new_dbuf_state;
>> > +  struct intel_pmdemand_state *new_pmdemand_state;
>> > +  enum phy phy;
>> > +  int i, ret;
>> > +
>> > +  if (DISPLAY_VER(i915) < 14)
>> > +          return 0;
>> > +
>> > +  if (!pmdemand_needs_update(state))
>> > +          return 0;
>> > +
>> > +  new_pmdemand_state = intel_atomic_get_pmdemand_state(state);
>> > +  if (IS_ERR(new_pmdemand_state))
>> > +          return PTR_ERR(new_pmdemand_state);
>> > +
>> > +  ret = intel_atomic_lock_global_state(&new_pmdemand_state->base);
>> > +  if (ret)
>> > +          return ret;
>> > +
>> > +  /* Punit figures out the voltage index based on bandwidth*/
>> > +  new_bw_state = intel_atomic_get_bw_state(state);
>> > +  if (IS_ERR(new_bw_state))
>> > +          return PTR_ERR(new_bw_state);
>> > +
>> > +  /* firmware will calculate the qclck_gc_index, requirement is set to 0 */
>> > +  new_pmdemand_state->qclk_gv_index = 0;
>> > +  new_pmdemand_state->qclk_gv_bw =
>> > +          min_t(u16, new_bw_state->qgv_point_peakbw, 0xffff);
>> > +
>> > +  new_dbuf_state = intel_atomic_get_dbuf_state(state);
>> > +  if (IS_ERR(new_dbuf_state))
>> > +          return PTR_ERR(new_dbuf_state);
>> > +
>> > +  i = hweight8(new_dbuf_state->active_pipes);
>> > +  new_pmdemand_state->active_pipes = min(i, 3);
>> > +
>> > +  new_cdclk_state = intel_atomic_get_cdclk_state(state);
>> > +  if (IS_ERR(new_cdclk_state))
>> > +          return PTR_ERR(new_cdclk_state);
>> > +
>> > +  new_pmdemand_state->voltage_index =
>> > +          new_cdclk_state->logical.voltage_level;
>> > +  /* KHz to MHz */
>> > +  new_pmdemand_state->cdclk_freq_mhz =
>> > +          DIV_ROUND_UP(new_cdclk_state->logical.cdclk, 1000);
>> > +
>> > +  new_pmdemand_state->active_phys_plls_mask = 0;
>> > +
>> > +  for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
>> > +                                      new_crtc_state, i) {
>> > +          if (!new_crtc_state->hw.active)
>> > +                  continue;
>> > +
>> > +          encoder = intel_get_crtc_new_encoder(state, new_crtc_state);
>> > +          if (!encoder)
>> > +                  continue;
>> > +
>> > +          phy = intel_port_to_phy(i915, encoder->port);
>> > +
>> > +          if (intel_is_c10phy(i915, phy))
>> > +                  new_pmdemand_state->active_phys_plls_mask |= BIT(phy);
>> > +
>> > +          port_clock = max(port_clock, new_crtc_state->port_clock);
>> > +  }
>> 
>> As previously noted in https://patchwork.freedesktop.org/patch/530495 ,
>> I'm under the impression that this loop would not let us account for al
>> active crtcs, only those currently being touched by this atomic
>> transaction. Am I wrong to assume that
>> for_each_oldnew_intel_crtc_in_state() would only iterate over crtcs
>> touched by the atomic update?
>
>I checked the intel_bw_check_data_rate() which should be doing something similar to find out about
>the datarate from eachi pipe and figure out the total data rate. So I thought this should be
>sufficient. 

I took a quick look at intel_bw_check_data_rate()'s implementation and I think
it works there because struct intel_bw_state has arrays containing values for
each pipe, meaning that those not included in the atomic update would already
contain the current values from previous transactions.

For the number of active phy PLLs, I believe we can handle it similarly to how
it is done by intel_calc_active_pipes().

Now, for the maximum port_clock, maybe we could use the array approach like done
with intel_bw_check_data_rate(). So we can always have all port_clock values
here.

>
>> 
>> > +
>> > +  /* To MHz */
>> > +  new_pmdemand_state->ddiclk_freq_mhz = DIV_ROUND_UP(port_clock, 1000);
>> > +
>> > +  /*
>> > +   * Setting scalers to max as it can not be calculated during flips and
>> > +   * fastsets without taking global states locks.
>> > +   */
>> > +  new_pmdemand_state->scalers = 7;
>> > +
>> > +  ret = intel_atomic_serialize_global_state(&new_pmdemand_state->base);
>> > +  if (ret)
>> > +          return ret;
>> > +
>> > +  return 0;
>> > +}
>> > +
>> > +static bool intel_pmdemand_check_prev_transaction(struct drm_i915_private *i915)
>> > +{
>> > +  return !((intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1)) &
>> > +            XELPDP_PMDEMAND_REQ_ENABLE) ||
>> > +          (intel_de_read(i915, GEN12_DCPR_STATUS_1) &
>> > +           XELPDP_PMDEMAND_INFLIGHT_STATUS));
>> > +}
>> > +
>> > +static bool intel_pmdemand_req_complete(struct drm_i915_private *i915)
>> > +{
>> > +  return !(intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1)) &
>> > +           XELPDP_PMDEMAND_REQ_ENABLE);
>> > +}
>> > +
>> > +static int intel_pmdemand_wait(struct drm_i915_private *i915)
>> > +{
>> > +  DEFINE_WAIT(wait);
>> > +  int ret;
>> > +  const unsigned int timeout_ms = 10;
>> > +
>> > +  ret = wait_event_timeout(i915->display.pmdemand.waitqueue,
>> > +                           intel_pmdemand_req_complete(i915),
>> > +                           msecs_to_jiffies_timeout(timeout_ms));
>> > +  if (ret == 0)
>> > +          drm_err(&i915->drm,
>> > +                  "timed out waiting for Punit PM Demand Response\n");
>> > +
>> > +  return ret;
>> > +}
>> > +
>> > +/* Required to be programmed during Display Init Sequences. */
>> > +void intel_program_dbuf_pmdemand(struct drm_i915_private *i915,
>> > +                           u8 dbuf_slices)
>> > +{
>> > +  u32 dbufs = min_t(u32, hweight8(dbuf_slices), 3);
>> > +
>> > +  mutex_lock(&i915->display.pmdemand.lock);
>> > +  if (drm_WARN_ON(&i915->drm,
>> > +                  !intel_pmdemand_check_prev_transaction(i915)))
>> > +          goto unlock;
>> > +
>> > +  intel_de_rmw(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0),
>> > +               XELPDP_PMDEMAND_DBUFS_MASK, XELPDP_PMDEMAND_DBUFS(dbufs));
>> > +  intel_de_rmw(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1), 0,
>> > +               XELPDP_PMDEMAND_REQ_ENABLE);
>> > +
>> > +  intel_pmdemand_wait(i915);
>> > +
>> > +unlock:
>> > +  mutex_unlock(&i915->display.pmdemand.lock);
>> > +}
>> > +
>> > +static void update_pmdemand_values(const struct intel_pmdemand_state *new,
>> > +                             const struct intel_pmdemand_state *old,
>> > +                             u32 *reg1, u32 *reg2)
>> > +{
>> > +  u32 plls, tmp;
>> > +
>> > +  /*
>> > +   * The pmdemand parameter updates happens in two steps. Pre plane and
>> > +   * post plane updates. During the pre plane, as DE might still be
>> > +   * handling with some old operations, to avoid unwanted performance
>> > +   * issues, program the pmdemand parameters with higher of old and new
>> > +   * values. And then after once settled, use the new parameter values
>> > +   * as part of the post plane update.
>> > +   */
>> > +
>> > +  /* Set 1*/
>> > +  *reg1 &= ~XELPDP_PMDEMAND_QCLK_GV_BW_MASK;
>> > +  tmp = old ? max(old->qclk_gv_bw, new->qclk_gv_bw) : new->qclk_gv_bw;
>> > +  *reg1 |= XELPDP_PMDEMAND_QCLK_GV_BW(tmp);
>> > +
>> > +  *reg1 &= ~XELPDP_PMDEMAND_VOLTAGE_INDEX_MASK;
>> > +  tmp = old ? max(old->voltage_index, new->voltage_index) :
>> > +              new->voltage_index;
>> > +  *reg1 |= XELPDP_PMDEMAND_VOLTAGE_INDEX(tmp);
>> > +
>> > +  *reg1 &= ~XELPDP_PMDEMAND_QCLK_GV_INDEX_MASK;
>> > +  tmp = old ? max(old->qclk_gv_index, new->qclk_gv_index) :
>> > +              new->qclk_gv_index;
>> > +  *reg1 |= XELPDP_PMDEMAND_QCLK_GV_INDEX(tmp);
>> > +
>> > +  *reg1 &= ~XELPDP_PMDEMAND_PIPES_MASK;
>> > +  tmp = old ? max(old->active_pipes, new->active_pipes) :
>> > +              new->active_pipes;
>> > +  *reg1 |= XELPDP_PMDEMAND_PIPES(tmp);
>> > +
>> > +  *reg1 &= ~XELPDP_PMDEMAND_PHYS_MASK;
>> > +  plls = hweight32(new->active_phys_plls_mask);
>> > +  if (old)
>> > +          plls = max(plls, hweight32(old->active_phys_plls_mask));
>> > +  *reg1 |= XELPDP_PMDEMAND_PHYS(plls);
>> 
>> If plls > 7, we would be potentially programming this wrong (e.g. for
>> plls=8, we would setting the field to 0).
>
>Thanks for pointing that out. Fixed.
>
>> 
>> > +
>> > +  /* Set 2*/
>> > +  *reg2 &= ~XELPDP_PMDEMAND_CDCLK_FREQ_MASK;
>> > +  tmp = old ? max(old->cdclk_freq_mhz, new->cdclk_freq_mhz) :
>> > +              new->cdclk_freq_mhz;
>> > +  *reg2 |= XELPDP_PMDEMAND_CDCLK_FREQ(tmp);
>> > +
>> > +  *reg2 &= ~XELPDP_PMDEMAND_DDICLK_FREQ_MASK;
>> > +  tmp = old ? max(old->ddiclk_freq_mhz, new->ddiclk_freq_mhz) :
>> > +              new->ddiclk_freq_mhz;
>> > +  *reg2 |= XELPDP_PMDEMAND_DDICLK_FREQ(tmp);
>> > +
>> > +  /* Hard code scalers to 7*/
>> 
>> I think this comment can be dropped: the hardcoding happens in
>> intel_pmdemand_atomic_check().
>
>Done
>> 
>> > +  *reg2 &= ~XELPDP_PMDEMAND_SCALERS_MASK;
>> > +  tmp = old ? max(old->scalers, new->scalers) : new->scalers;
>> > +  *reg2 |= XELPDP_PMDEMAND_SCALERS(tmp);
>> > +
>> > +  /*
>> > +   * Active_PLLs starts with 1 because of CDCLK PLL.
>> > +   * TODO: Missing to account genlock filter when it gets used.
>> > +   */
>> > +  *reg2 &= ~XELPDP_PMDEMAND_PLLS_MASK;
>> 
>> I think we are missing the ternary operator here to select the maximum
>> value for the pre-plane case.
>As pet the above comments, it is just the plls from step earlier + 1

Ah, got it. Thanks.

>
>> 
>> > +  *reg2 |= XELPDP_PMDEMAND_PLLS(plls + 1);
>> > +}
>> > +
>> > +static void intel_program_pmdemand(struct drm_i915_private *i915,
>> > +                             const struct intel_pmdemand_state *new,
>> > +                             const struct intel_pmdemand_state *old)
>> > +{
>> > +  bool changed = false;
>> > +  u32 reg1, mod_reg1;
>> > +  u32 reg2, mod_reg2;
>> > +
>> > +  mutex_lock(&i915->display.pmdemand.lock);
>> > +  if (drm_WARN_ON(&i915->drm,
>> > +                  !intel_pmdemand_check_prev_transaction(i915)))
>> > +          goto unlock;
>> 
>> According to the spec, we should wait and timeout after 10ms here.
>> 
>> > +
>> > +  reg1 = intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0));
>> > +  mod_reg1 = reg1;
>> > +
>> > +  reg2 = intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1));
>> > +  mod_reg2 = reg2;
>> > +
>> > +  update_pmdemand_values(new, old, &mod_reg1, &mod_reg2);
>> > +
>> > +  if (reg1 != mod_reg1) {
>> > +          intel_de_write(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0),
>> > +                         mod_reg1);
>> > +          changed = true;
>> > +  }
>> > +
>> > +  if (reg2 != mod_reg2) {
>> > +          intel_de_write(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1),
>> > +                         mod_reg2);
>> > +          changed = true;
>> > +  }
>> > +
>> > +  /* Initiate pm demand request only if register values are changed */
>> > +  if (changed) {
>> 
>> Nitpick: we could have
>> 
>>     if (!changed)
>>             goto unlock;
>> 
>> and dedent the block below.
>
>Done.
>> 
>> > +          drm_dbg_kms(&i915->drm,
>> > +                      "initate pmdemand request values: (0x%x 0x%x)\n",
>> > +                      mod_reg1, mod_reg2);
>> > +
>> > +          intel_de_rmw(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1), 0,
>> > +                       XELPDP_PMDEMAND_REQ_ENABLE);
>> > +
>> > +          intel_pmdemand_wait(i915);
>> > +  }
>> > +
>> > +unlock:
>> > +  mutex_unlock(&i915->display.pmdemand.lock);
>> > +}
>> > +
>> > +static bool
>> > +intel_pmdemand_state_changed(const struct intel_pmdemand_state *new,
>> > +                       const struct intel_pmdemand_state *old)
>> > +{
>> > +  return memcmp(&new->qclk_gv_bw, &old->qclk_gv_bw,
>> > +                sizeof(*new) - offsetof(typeof(*new), qclk_gv_bw)) != 0;
>> > +}
>> > +
>> > +void intel_pmdemand_pre_plane_update(struct intel_atomic_state *state)
>> > +{
>> > +  struct drm_i915_private *i915 = to_i915(state->base.dev);
>> > +  const struct intel_pmdemand_state *new_pmdmnd_state =
>> > +          intel_atomic_get_new_pmdemand_state(state);
>> > +  const struct intel_pmdemand_state *old_pmdmnd_state =
>> > +          intel_atomic_get_old_pmdemand_state(state);
>> > +
>> > +  if (DISPLAY_VER(i915) < 14)
>> > +          return;
>> > +
>> > +  if (!new_pmdmnd_state ||
>> > +      !intel_pmdemand_state_changed(new_pmdmnd_state, old_pmdmnd_state))
>> > +          return;
>> > +
>> > +  intel_program_pmdemand(i915, new_pmdmnd_state, old_pmdmnd_state);
>> > +}
>> > +
>> > +void intel_pmdemand_post_plane_update(struct intel_atomic_state *state)
>> > +{
>> > +  struct drm_i915_private *i915 = to_i915(state->base.dev);
>> > +  const struct intel_pmdemand_state *new_pmdmnd_state =
>> > +          intel_atomic_get_new_pmdemand_state(state);
>> > +  const struct intel_pmdemand_state *old_pmdmnd_state =
>> > +          intel_atomic_get_old_pmdemand_state(state);
>> > +
>> > +  if (DISPLAY_VER(i915) < 14)
>> > +          return;
>> > +
>> > +  if (!new_pmdmnd_state ||
>> > +      !intel_pmdemand_state_changed(new_pmdmnd_state, old_pmdmnd_state))
>> > +          return;
>> > +
>> > +  intel_program_pmdemand(i915, new_pmdmnd_state, NULL);
>> > +}
>> > diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.h
>> > b/drivers/gpu/drm/i915/display/intel_pmdemand.h
>> > new file mode 100644
>> > index 000000000000..0114f4e0225a
>> > --- /dev/null
>> > +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.h
>> > @@ -0,0 +1,24 @@
>> > +/* SPDX-License-Identifier: MIT */
>> > +/*
>> > + * Copyright © 2023 Intel Corporation
>> > + */
>> > +
>> > +#ifndef __INTEL_PMDEMAND_H__
>> > +#define __INTEL_PMDEMAND_H__
>> > +
>> > +#include <linux/types.h>
>> > +
>> > +struct drm_i915_private;
>> > +struct intel_atomic_state;
>> > +struct intel_crtc_state;
>> > +struct intel_plane_state;
>> > +
>> > +void intel_pmdemand_init(struct drm_i915_private *i915);
>> > +int intel_pmdemand_state_init(struct drm_i915_private *i915);
>> > +void intel_program_dbuf_pmdemand(struct drm_i915_private *i915,
>> > +                           u8 dbuf_slices);
>> > +void intel_pmdemand_pre_plane_update(struct intel_atomic_state *state);
>> > +void intel_pmdemand_post_plane_update(struct intel_atomic_state *state);
>> > +int intel_pmdemand_atomic_check(struct intel_atomic_state *state);
>> > +
>> > +#endif /* __INTEL_PMDEMAND_H__ */
>> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>> > index 2b94b8ca8ec9..907fa3aee179 100644
>> > --- a/drivers/gpu/drm/i915/i915_irq.c
>> > +++ b/drivers/gpu/drm/i915/i915_irq.c
>> > @@ -41,6 +41,7 @@
>> > #include "display/intel_fifo_underrun.h"
>> > #include "display/intel_hotplug.h"
>> > #include "display/intel_lpe_audio.h"
>> > +#include "display/intel_pmdemand.h"
>> > #include "display/intel_psr.h"
>> > #include "display/intel_psr_regs.h"
>> > 
>> > @@ -1986,12 +1987,25 @@ static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv)
>> >                return GEN8_DE_PIPE_IRQ_FAULT_ERRORS;
>> > }
>> > 
>> > +static void intel_pmdemand_irq_handler(struct drm_i915_private *dev_priv)
>> > +{
>> > +  wake_up_all(&dev_priv->display.pmdemand.waitqueue);
>> > +}
>> > +
>> > static void
>> > gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir)
>> > {
>> >        bool found = false;
>> > 
>> > -  if (iir & GEN8_DE_MISC_GSE) {
>> > +  if (DISPLAY_VER(dev_priv) >= 14 &&
>> > +      (iir & (XELPDP_PMDEMAND_RSP | XELPDP_PMDEMAND_RSPTOUT_ERR))) {
>> > +          if (iir & XELPDP_PMDEMAND_RSPTOUT_ERR)
>> 
>> I think we should have the (iir & (XELPDP_PMDEMAND_RSP |
>> XELPDP_PMDEMAND_RSPTOUT_ERR)) part as nested if statement here.
>> Otherwise, when the interrupt did not happen, we could endup checking
>> for the GEN8_DE_MISC_GSE even when DISPLAY_VER(dev_priv) >= 14.
>> 
>> Even though we know that iir & GEN8_DE_MISC_GSE would be false in this
>> situation (because both XELPDP_PMDEMAND_RSPTOUT_ERR and GEN8_DE_MISC_GSE
>> map to the same bit), I think having that one checked only for previous
>> display engines would sound more correct semantically speaking.
>
>Thanks. updated.
>
>> 
>> > +                  drm_dbg(&dev_priv->drm,
>> > +                          "Error waiting for Punit PM Demand Response\n");
>> > +
>> > +          intel_pmdemand_irq_handler(dev_priv);
>> > +          found = true;
>> > +  } else if (iir & GEN8_DE_MISC_GSE) {
>> >                intel_opregion_asle_intr(dev_priv);
>> >                found = true;
>> >        }
>> > @@ -3742,7 +3756,10 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
>> >        if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv))
>> >                de_port_masked |= BXT_DE_PORT_GMBUS;
>> > 
>> > -  if (DISPLAY_VER(dev_priv) >= 11) {
>> > +  if (DISPLAY_VER(dev_priv) >= 14)
>> > +          de_misc_masked |= XELPDP_PMDEMAND_RSPTOUT_ERR |
>> > +                            XELPDP_PMDEMAND_RSP;
>> > +  else if (DISPLAY_VER(dev_priv) >= 11) {
>> >                enum port port;
>> > 
>> >                if (intel_bios_is_dsi_present(dev_priv, &port))
>> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> > index dde6e91055bd..60c007aea1ce 100644
>> > --- a/drivers/gpu/drm/i915/i915_reg.h
>> > +++ b/drivers/gpu/drm/i915/i915_reg.h
>> > @@ -4426,8 +4426,10 @@
>> > #define GEN8_DE_MISC_IMR _MMIO(0x44464)
>> > #define GEN8_DE_MISC_IIR _MMIO(0x44468)
>> > #define GEN8_DE_MISC_IER _MMIO(0x4446c)
>> > -#define  GEN8_DE_MISC_GSE         (1 << 27)
>> > -#define  GEN8_DE_EDP_PSR          (1 << 19)
>> > +#define  XELPDP_PMDEMAND_RSPTOUT_ERR      REG_BIT(27)
>> > +#define  GEN8_DE_MISC_GSE         REG_BIT(27)
>> > +#define  GEN8_DE_EDP_PSR          REG_BIT(19)
>> > +#define  XELPDP_PMDEMAND_RSP              REG_BIT(3)
>> > 
>> > #define GEN8_PCU_ISR _MMIO(0x444e0)
>> > #define GEN8_PCU_IMR _MMIO(0x444e4)
>> > @@ -4512,6 +4514,33 @@
>> > #define  XELPDP_DP_ALT_HPD_LONG_DETECT         REG_BIT(1)
>> > #define  XELPDP_DP_ALT_HPD_SHORT_DETECT                REG_BIT(0)
>> > 
>> > +#define XELPDP_INITIATE_PMDEMAND_REQUEST(dword)           _MMIO(0x45230 + 4 * (dword))
>> > +#define  XELPDP_PMDEMAND_QCLK_GV_BW_MASK          REG_GENMASK(31, 16)
>> > +#define  XELPDP_PMDEMAND_QCLK_GV_BW(x)                   
>> > REG_FIELD_PREP(XELPDP_PMDEMAND_QCLK_GV_BW_MASK, x)
>> > +#define  XELPDP_PMDEMAND_VOLTAGE_INDEX_MASK               REG_GENMASK(14, 12)
>> > +#define  XELPDP_PMDEMAND_VOLTAGE_INDEX(x)        
>> > REG_FIELD_PREP(XELPDP_PMDEMAND_VOLTAGE_INDEX_MASK, x)
>> > +#define  XELPDP_PMDEMAND_QCLK_GV_INDEX_MASK               REG_GENMASK(11, 8)
>> > +#define  XELPDP_PMDEMAND_QCLK_GV_INDEX(x)        
>> > REG_FIELD_PREP(XELPDP_PMDEMAND_QCLK_GV_INDEX_MASK, x)
>> > +#define  XELPDP_PMDEMAND_PIPES_MASK                       REG_GENMASK(7, 6)
>> > +#define  XELPDP_PMDEMAND_PIPES(x)                 REG_FIELD_PREP(XELPDP_PMDEMAND_PIPES_MASK, x)
>> > +#define  XELPDP_PMDEMAND_DBUFS_MASK                       REG_GENMASK(5, 4)
>> > +#define  XELPDP_PMDEMAND_DBUFS(x)                 REG_FIELD_PREP(XELPDP_PMDEMAND_DBUFS_MASK, x)
>> > +#define  XELPDP_PMDEMAND_PHYS_MASK                        REG_GENMASK(2, 0)
>> > +#define  XELPDP_PMDEMAND_PHYS(x)                  REG_FIELD_PREP(XELPDP_PMDEMAND_PHYS_MASK, x)
>> > +
>> > +#define  XELPDP_PMDEMAND_REQ_ENABLE                       REG_BIT(31)
>> > +#define  XELPDP_PMDEMAND_CDCLK_FREQ_MASK          REG_GENMASK(30, 20)
>> > +#define  XELPDP_PMDEMAND_CDCLK_FREQ(x)                   
>> > REG_FIELD_PREP(XELPDP_PMDEMAND_CDCLK_FREQ_MASK, x)
>> > +#define  XELPDP_PMDEMAND_DDICLK_FREQ_MASK         REG_GENMASK(18, 8)
>> > +#define  XELPDP_PMDEMAND_DDICLK_FREQ(x)                  
>> > REG_FIELD_PREP(XELPDP_PMDEMAND_DDICLK_FREQ_MASK, x)
>> > +#define  XELPDP_PMDEMAND_SCALERS_MASK                     REG_GENMASK(6, 4)
>> > +#define  XELPDP_PMDEMAND_SCALERS(x)                      
>> > REG_FIELD_PREP(XELPDP_PMDEMAND_SCALERS_MASK, x)
>> > +#define  XELPDP_PMDEMAND_PLLS_MASK                        REG_GENMASK(2, 0)
>> > +#define  XELPDP_PMDEMAND_PLLS(x)                  REG_FIELD_PREP(XELPDP_PMDEMAND_PLLS_MASK, x)
>> > +
>> > +#define GEN12_DCPR_STATUS_1                               _MMIO(0x46440)
>> > +#define  XELPDP_PMDEMAND_INFLIGHT_STATUS          REG_BIT(26)
>> > +
>> > #define ILK_DISPLAY_CHICKEN2   _MMIO(0x42004)
>> > /* Required on all Ironlake and Sandybridge according to the B-Spec. */
>> > #define   ILK_ELPIN_409_SELECT REG_BIT(25)
>> > @@ -4671,6 +4700,9 @@
>> > #define   DCPR_SEND_RESP_IMM                   REG_BIT(25)
>> > #define   DCPR_CLEAR_MEMSTAT_DIS               REG_BIT(24)
>> > 
>> > +#define XELPD_CHICKEN_DCPR_3                      _MMIO(0x46438)
>> > +#define   DMD_RSP_TIMEOUT_DISABLE         REG_BIT(19)
>> > +
>> > #define SKL_DFSM                       _MMIO(0x51000)
>> > #define   SKL_DFSM_DISPLAY_PM_DISABLE  (1 << 27)
>> > #define   SKL_DFSM_DISPLAY_HDCP_DISABLE        (1 << 25)
>> > -- 
>> > 2.34.1
>> > 
>