[Intel-gfx] [PATCH v4] drm/i915: Restore GT performance in headless mode with DMC loaded
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Nov 29 15:21:23 UTC 2017
On 29/11/2017 15:06, Imre Deak wrote:
> On Wed, Nov 29, 2017 at 02:30:30PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> It seems that the DMC likes to transition between the DC states a lot when
>> there are no connected displays (no active power domains) during command
>> submission.
>>
>> This activity on DC states has a negative impact on the performance of the
>> chip with huge latencies observed in the interrupt handlers and elsewhere.
>> Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
>> eight.
>>
>> Work around it by introducing a new power domain named,
>> POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
>> held for the duration of command submission activity.
>>
>> v2:
>> * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
>> * Protect macro body with braces. (Jani Nikula)
>>
>> v3:
>> * Add dedicated power domain for clarity. (Chris, Imre)
>> * Commit message and comment text updates.
>> * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC
>> firmware release.
>>
>> v4:
>> * Power domain should be inner to device runtime pm. (Chris)
>> * Simplify NEEDS_CSR_GT_PERF_WA macro. (Chris)
>> * Handle async DMC loading by moving the GT_IRQ power domain logic into
>> intel_runtime_pm. (Daniel, Chris)
>> * Include small core GEN9 as well. (Imre)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
>> Testcase: igt/gem_exec_nop/headless
>> Cc: Imre Deak <imre.deak at intel.com>
>> Acked-by: Chris Wilson <chris at chris-wilson.co.uk> (v2)
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 5 ++++
>> drivers/gpu/drm/i915/i915_gem.c | 2 ++
>> drivers/gpu/drm/i915/i915_gem_request.c | 14 +++++++++++
>> drivers/gpu/drm/i915/intel_csr.c | 29 +++++++++++++---------
>> drivers/gpu/drm/i915/intel_runtime_pm.c | 44 +++++++++++++++++++++++++++------
>> 5 files changed, 76 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index bddd65839f60..37b3da4fd0d4 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -398,6 +398,7 @@ enum intel_display_power_domain {
>> POWER_DOMAIN_AUX_D,
>> POWER_DOMAIN_GMBUS,
>> POWER_DOMAIN_MODESET,
>> + POWER_DOMAIN_GT_IRQ,
>> POWER_DOMAIN_INIT,
>>
>> POWER_DOMAIN_NUM,
>> @@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
>> #define GT_FREQUENCY_MULTIPLIER 50
>> #define GEN9_FREQ_SCALER 3
>>
>> +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
>> + ((dev_priv)->csr.dmc_payload && \
>> + IS_GEN9(dev_priv) && !IS_SKYLAKE(dev_priv))
>> +
>> #include "i915_trace.h"
>>
>> static inline bool intel_vtd_active(void)
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 354b0546a191..a6f522e2d1a1 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -3381,6 +3381,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
>>
>> if (INTEL_GEN(dev_priv) >= 6)
>> gen6_rps_idle(dev_priv);
>> +
>> + intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
>> intel_runtime_pm_put(dev_priv);
>> out_unlock:
>> mutex_unlock(&dev_priv->drm.struct_mutex);
>> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
>> index a90bdd26571f..c28a4ceb016d 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_request.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
>> @@ -252,6 +252,20 @@ static void mark_busy(struct drm_i915_private *i915)
>> GEM_BUG_ON(!i915->gt.active_requests);
>>
>> intel_runtime_pm_get_noresume(i915);
>> +
>> + /*
>> + * It seems that the DMC likes to transition between the DC states a lot
>> + * when there are no connected displays (no active power domains) during
>> + * command submission.
>> + *
>> + * This activity has negative impact on the performance of the chip with
>> + * huge latencies observed in the interrupt handler and elsewhere.
>> + *
>> + * Work around it by grabbing a GT IRQ power domain whilst there is any
>> + * GT activity, preventing any DC state transitions.
>> + */
>> + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
>> +
>> i915->gt.awake = true;
>>
>> intel_enable_gt_powersave(i915);
>> diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
>> index 07e4f7bc4412..8b188e9f283b 100644
>> --- a/drivers/gpu/drm/i915/intel_csr.c
>> +++ b/drivers/gpu/drm/i915/intel_csr.c
>> @@ -403,24 +403,33 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
>>
>> static void csr_load_work_fn(struct work_struct *work)
>> {
>> - struct drm_i915_private *dev_priv;
>> - struct intel_csr *csr;
>> + struct drm_i915_private *dev_priv =
>> + container_of(work, typeof(*dev_priv), csr.work);
>> + struct intel_csr *csr = &dev_priv->csr;
>> const struct firmware *fw = NULL;
>> + u32 *dmc_payload;
>>
>> - dev_priv = container_of(work, typeof(*dev_priv), csr.work);
>> - csr = &dev_priv->csr;
>> + request_firmware(&fw, csr->fw_path, &dev_priv->drm.pdev->dev);
>>
>> - request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev);
>> - if (fw)
>> - dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw);
>> + if (fw) {
>> + dmc_payload = parse_csr_fw(dev_priv, fw);
>> + release_firmware(fw);
>> + } else {
>> + dmc_payload = NULL;
>> + }
>>
>> - if (dev_priv->csr.dmc_payload) {
>> + /* Lock to document memory barrier towards NEEDS_CSR_GT_PERF_WA. */
>> + mutex_lock(&dev_priv->power_domains.lock);
>> + csr->dmc_payload = dmc_payload;
>> + mutex_unlock(&dev_priv->power_domains.lock);
>> +
>> + if (csr->dmc_payload) {
>> intel_csr_load_program(dev_priv);
>>
>> intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
>>
>> DRM_INFO("Finished loading DMC firmware %s (v%u.%u)\n",
>> - dev_priv->csr.fw_path,
>> + csr->fw_path,
>> CSR_VERSION_MAJOR(csr->version),
>> CSR_VERSION_MINOR(csr->version));
>> } else {
>> @@ -431,8 +440,6 @@ static void csr_load_work_fn(struct work_struct *work)
>> dev_notice(dev_priv->drm.dev, "DMC firmware homepage: %s",
>> INTEL_UC_FIRMWARE_URL);
>> }
>> -
>> - release_firmware(fw);
>> }
>>
>> /**
>> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
>> index 8315499452dc..915124a2e945 100644
>> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
>> @@ -130,6 +130,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain)
>> return "INIT";
>> case POWER_DOMAIN_MODESET:
>> return "MODESET";
>> + case POWER_DOMAIN_GT_IRQ:
>> + return "GT_IRQ";
>> default:
>> MISSING_CASE(domain);
>> return "?";
>> @@ -1448,6 +1450,9 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv,
>> struct i915_power_domains *power_domains = &dev_priv->power_domains;
>> struct i915_power_well *power_well;
>>
>> + if (domain == POWER_DOMAIN_GT_IRQ && !NEEDS_CSR_GT_PERF_WA(dev_priv))
>> + return;
>> +
>> for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain))
>> intel_power_well_get(dev_priv, power_well);
>>
>> @@ -1518,6 +1523,19 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
>> return is_enabled;
>> }
>>
>> +static void
>> +__intel_display_power_put_domain(struct drm_i915_private *dev_priv,
>> + enum intel_display_power_domain domain)
>> +{
>> + struct i915_power_domains *power_domains = &dev_priv->power_domains;
>> + struct i915_power_well *power_well;
>> +
>> + power_domains->domain_use_count[domain]--;
>> +
>> + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain))
>> + intel_power_well_put(dev_priv, power_well);
>> +}
>> +
>> /**
>> * intel_display_power_put - release a power domain reference
>> * @dev_priv: i915 device instance
>> @@ -1530,21 +1548,30 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
>> void intel_display_power_put(struct drm_i915_private *dev_priv,
>> enum intel_display_power_domain domain)
>> {
>> - struct i915_power_domains *power_domains;
>> - struct i915_power_well *power_well;
>> -
>> - power_domains = &dev_priv->power_domains;
>> + struct i915_power_domains *power_domains = &dev_priv->power_domains;
>>
>> mutex_lock(&power_domains->lock);
>>
>> + if (domain == POWER_DOMAIN_GT_IRQ) {
>> + /*
>> + * To handle asynchronous DMC loading we have to be careful to
>> + * keep the use count on POWER_DOMAIN_GT_IRQ balanced.
>> + *
>> + * If there was GT activity before the DMC was loaded, we will
>> + * have skipped obtaining the power domain so must not decrement
>> + * it now.
>> + */
>> + if (!power_domains->domain_use_count[domain])
>> + goto out;
>> + }
>
> Is it possible to have any GT activity before
> intel_power_domains_init_hw() / intel_csr_ucode_init()? I couldn't spot
> anything at least. Calling into the power well code before those isn't
> correct in any case. If so, the INIT power ref taken in
> intel_csr_ucode_init() makes sure the above scenario can't happen and
> then we don't need dmc_payload check in NEEDS_CSR_GT_PERF_WA() either.
intel_csr_ucode_init schedules a worker to actually load the firmware so
unless I am missing some synchronisation point it can happen.
Simpler solution would be to only base the NEEDS_CSR_GT_PERF_WA on
HAS_CSR, but then if someone removes the firmware deliberately we lose
power keeping this power well up needlessly, no?
Regards,
Tvrtko
>
>> +
>> WARN(!power_domains->domain_use_count[domain],
>> "Use count on domain %s is already zero\n",
>> intel_display_power_domain_str(domain));
>> - power_domains->domain_use_count[domain]--;
>>
>> - for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain))
>> - intel_power_well_put(dev_priv, power_well);
>> + __intel_display_power_put_domain(dev_priv, domain);
>>
>> +out:
>> mutex_unlock(&power_domains->lock);
>>
>> intel_runtime_pm_put(dev_priv);
>> @@ -1705,6 +1732,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
>> BIT_ULL(POWER_DOMAIN_INIT))
>> #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \
>> SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \
>> + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \
>> BIT_ULL(POWER_DOMAIN_MODESET) | \
>> BIT_ULL(POWER_DOMAIN_AUX_A) | \
>> BIT_ULL(POWER_DOMAIN_INIT))
>> @@ -1727,6 +1755,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
>> BIT_ULL(POWER_DOMAIN_INIT))
>> #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \
>> BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \
>> + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \
>> BIT_ULL(POWER_DOMAIN_MODESET) | \
>> BIT_ULL(POWER_DOMAIN_AUX_A) | \
>> BIT_ULL(POWER_DOMAIN_INIT))
>> @@ -1785,6 +1814,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
>> BIT_ULL(POWER_DOMAIN_INIT))
>> #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS ( \
>> GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \
>> + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \
>> BIT_ULL(POWER_DOMAIN_MODESET) | \
>> BIT_ULL(POWER_DOMAIN_AUX_A) | \
>> BIT_ULL(POWER_DOMAIN_INIT))
>> --
>> 2.14.1
>>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
More information about the Intel-gfx
mailing list