[Intel-gfx] [PATCH 5/6] drm/i915: dynamically allocate forcewake domains
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jun 19 14:22:17 UTC 2019
On 19/06/2019 00:06, Daniele Ceraolo Spurio wrote:
> On 6/18/19 2:23 AM, Tvrtko Ursulin wrote:
>> On 17/06/2019 19:09, Daniele Ceraolo Spurio wrote:
>>> We'd like to introduce a display uncore with no forcewake domains, so
>>> let's avoid wasting memory and be ready to allocate only what we need.
>>> Even without multiple uncore, we still don't need all the domains on all
>>> gens.
>>
>> Looks good in principle, only a few conversation points below.
>>
>>>
>>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>>> ---
>>> drivers/gpu/drm/i915/intel_uncore.c | 155 ++++++++++++++++++----------
>>> drivers/gpu/drm/i915/intel_uncore.h | 13 +--
>>> 2 files changed, 102 insertions(+), 66 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>>> b/drivers/gpu/drm/i915/intel_uncore.c
>>> index c0f5567ee096..7f311827c3ef 100644
>>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>>> @@ -344,7 +344,7 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
>>> {
>>> struct intel_uncore_forcewake_domain *domain =
>>> container_of(timer, struct
>>> intel_uncore_forcewake_domain, timer);
>>> - struct intel_uncore *uncore = forcewake_domain_to_uncore(domain);
>>> + struct intel_uncore *uncore = domain->uncore;
>>> unsigned long irqflags;
>>> assert_rpm_device_not_suspended(uncore->rpm);
>>> @@ -1291,23 +1291,23 @@ do { \
>>> (uncore)->funcs.read_fw_domains = x##_reg_read_fw_domains; \
>>> } while (0)
>>> -static void fw_domain_init(struct intel_uncore *uncore,
>>> +static int fw_domain_init(struct intel_uncore *uncore,
>>> enum forcewake_domain_id domain_id,
>>> i915_reg_t reg_set,
>>> i915_reg_t reg_ack)
>>> {
>>> struct intel_uncore_forcewake_domain *d;
>>> - if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT))
>>> - return;
>>> -
>>> - d = &uncore->fw_domain[domain_id];
>>> + GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT);
>>> - WARN_ON(d->wake_count);
>>
>> I wonder what was this for.
>>
>> Do you want to add some protection against double init of the same
>> domain?
>>
>
> just a GEM_BUG_ON maybe? It shouldn't really ever happen unless we're
> moving something around.
Yes, sounds right.
>
>>> + d = kzalloc(sizeof(*d), GFP_KERNEL);
>>> + if (!d)
>>> + return -ENOMEM;
>>> WARN_ON(!i915_mmio_reg_valid(reg_set));
>>> WARN_ON(!i915_mmio_reg_valid(reg_ack));
>>> + d->uncore = uncore;
>>> d->wake_count = 0;
>>> d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set);
>>> d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack);
>>> @@ -1324,7 +1324,6 @@ static void fw_domain_init(struct intel_uncore
>>> *uncore,
>>> BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 <<
>>> FW_DOMAIN_ID_MEDIA_VEBOX0));
>>> BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 <<
>>> FW_DOMAIN_ID_MEDIA_VEBOX1));
>>> -
>>> d->mask = BIT(domain_id);
>>> hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>>> @@ -1333,6 +1332,10 @@ static void fw_domain_init(struct intel_uncore
>>> *uncore,
>>> uncore->fw_domains |= BIT(domain_id);
>>> fw_domain_reset(d);
>>> +
>>> + uncore->fw_domain[domain_id] = d;
>>> +
>>> + return 0;
>>> }
>>> static void fw_domain_fini(struct intel_uncore *uncore,
>>> @@ -1340,77 +1343,92 @@ static void fw_domain_fini(struct
>>> intel_uncore *uncore,
>>> {
>>> struct intel_uncore_forcewake_domain *d;
>>> - if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT))
>>> - return;
>>> + GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT);
>>> - d = &uncore->fw_domain[domain_id];
>>> + d = fetch_and_zero(&uncore->fw_domain[domain_id]);
>>
>> Maybe early if !d here and function flow just carries on?
>>
>
> will do.
>
>>> + uncore->fw_domains &= ~BIT(domain_id);
>>> - WARN_ON(d->wake_count);
>>> - WARN_ON(hrtimer_cancel(&d->timer));
>>> - memset(d, 0, sizeof(*d));
>>> + if (d) {
>>> + WARN_ON(d->wake_count);
>>> + WARN_ON(hrtimer_cancel(&d->timer));
>>> + kfree(d);
>>> + }
>>> +}
>>> - uncore->fw_domains &= ~BIT(domain_id);
>>> +static void intel_uncore_fw_domains_fini(struct intel_uncore *uncore)
>>> +{
>>> + struct intel_uncore_forcewake_domain *d;
>>> + int tmp;
>>> +
>>> + for_each_fw_domain(d, uncore, tmp)
>>> + fw_domain_fini(uncore, d->id);
>>> }
>>> -static void intel_uncore_fw_domains_init(struct intel_uncore *uncore)
>>> +static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
>>> {
>>> struct drm_i915_private *i915 = uncore->i915;
>>> + int ret;
>>> GEM_BUG_ON(!intel_uncore_has_forcewake(uncore));
>>> +#define __fw_domain_init(id__, set__, ack__) \
>>> + ret = fw_domain_init(uncore, (id__), (set__), (ack__)); \
>>> + if (ret) \
>>> + goto out_clean;
>>
>> Hidden control flow is slightly objectionable but I don't offer any
>> nice alternatives so I guess will have to pass. Or maybe accumulate
>> the error (err |= fw_domain_init(..)) as you go and then cleanup at
>> the end if any failed?
>
> I'd prefer to avoid accumulating the error since it'd just cause us to
> having to unroll more domains when we could've stopped early.
Although unrolling happens close to never-ever so it isn't a practical
concern..
>
>>
>> On the other hand the idea slightly conflicts with my other suggestion
>> to rename existing fw_domain_init to __fw_domain_init and call the
>> macro fw_domain_init and avoid all the churn below.
>>
>
> I'll pick this suggestion among the 2, unless there is another
> suggestion on how to avoid the hidden goto.
.. but okay, hidden goto is also only a concern if someone adds new init
steps before the fw_domain_init steps, oblivious that cleanup is also
needed. Perhaps the out_clean label would be enough of a clue for that case.
>
>>> +
>>> if (INTEL_GEN(i915) >= 11) {
>>> int i;
>>> - uncore->funcs.force_wake_get =
>>> - fw_domains_get_with_fallback;
>>> + uncore->funcs.force_wake_get = fw_domains_get_with_fallback;
>>> uncore->funcs.force_wake_put = fw_domains_put;
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> - FORCEWAKE_RENDER_GEN9,
>>> - FORCEWAKE_ACK_RENDER_GEN9);
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_BLITTER,
>>> - FORCEWAKE_BLITTER_GEN9,
>>> - FORCEWAKE_ACK_BLITTER_GEN9);
>>> + __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> + FORCEWAKE_RENDER_GEN9,
>>> + FORCEWAKE_ACK_RENDER_GEN9);
>>> + __fw_domain_init(FW_DOMAIN_ID_BLITTER,
>>> + FORCEWAKE_BLITTER_GEN9,
>>> + FORCEWAKE_ACK_BLITTER_GEN9);
>>> +
>>> for (i = 0; i < I915_MAX_VCS; i++) {
>>> if (!HAS_ENGINE(i915, _VCS(i)))
>>> continue;
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA_VDBOX0 + i,
>>> - FORCEWAKE_MEDIA_VDBOX_GEN11(i),
>>> - FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i));
>>> + __fw_domain_init(FW_DOMAIN_ID_MEDIA_VDBOX0 + i,
>>> + FORCEWAKE_MEDIA_VDBOX_GEN11(i),
>>> + FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i));
>>> }
>>> for (i = 0; i < I915_MAX_VECS; i++) {
>>> if (!HAS_ENGINE(i915, _VECS(i)))
>>> continue;
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA_VEBOX0 + i,
>>> - FORCEWAKE_MEDIA_VEBOX_GEN11(i),
>>> - FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
>>> + __fw_domain_init(FW_DOMAIN_ID_MEDIA_VEBOX0 + i,
>>> + FORCEWAKE_MEDIA_VEBOX_GEN11(i),
>>> + FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
>>> }
>>> } else if (IS_GEN_RANGE(i915, 9, 10)) {
>>> - uncore->funcs.force_wake_get =
>>> - fw_domains_get_with_fallback;
>>> + uncore->funcs.force_wake_get = fw_domains_get_with_fallback;
>>> uncore->funcs.force_wake_put = fw_domains_put;
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> - FORCEWAKE_RENDER_GEN9,
>>> - FORCEWAKE_ACK_RENDER_GEN9);
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_BLITTER,
>>> - FORCEWAKE_BLITTER_GEN9,
>>> - FORCEWAKE_ACK_BLITTER_GEN9);
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA,
>>> - FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9);
>>> + __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> + FORCEWAKE_RENDER_GEN9,
>>> + FORCEWAKE_ACK_RENDER_GEN9);
>>> + __fw_domain_init(FW_DOMAIN_ID_BLITTER,
>>> + FORCEWAKE_BLITTER_GEN9,
>>> + FORCEWAKE_ACK_BLITTER_GEN9);
>>> + __fw_domain_init(FW_DOMAIN_ID_MEDIA,
>>> + FORCEWAKE_MEDIA_GEN9,
>>> + FORCEWAKE_ACK_MEDIA_GEN9);
>>> } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
>>> uncore->funcs.force_wake_get = fw_domains_get;
>>> uncore->funcs.force_wake_put = fw_domains_put;
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> - FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA,
>>> - FORCEWAKE_MEDIA_VLV, FORCEWAKE_ACK_MEDIA_VLV);
>>> + __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> + FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
>>> + __fw_domain_init(FW_DOMAIN_ID_MEDIA,
>>> + FORCEWAKE_MEDIA_VLV, FORCEWAKE_ACK_MEDIA_VLV);
>>> } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) {
>>> uncore->funcs.force_wake_get =
>>> fw_domains_get_with_thread_status;
>>> uncore->funcs.force_wake_put = fw_domains_put;
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> - FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
>>> + __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> + FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
>>> } else if (IS_IVYBRIDGE(i915)) {
>>> u32 ecobus;
>>> @@ -1437,8 +1455,8 @@ static void intel_uncore_fw_domains_init(struct
>>> intel_uncore *uncore)
>>> __raw_uncore_write32(uncore, FORCEWAKE, 0);
>>> __raw_posting_read(uncore, ECOBUS);
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> - FORCEWAKE_MT, FORCEWAKE_MT_ACK);
>>> + __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> + FORCEWAKE_MT, FORCEWAKE_MT_ACK);
>>> spin_lock_irq(&uncore->lock);
>>> fw_domains_get_with_thread_status(uncore, FORCEWAKE_RENDER);
>>> @@ -1449,19 +1467,27 @@ static void
>>> intel_uncore_fw_domains_init(struct intel_uncore *uncore)
>>> if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
>>> DRM_INFO("No MT forcewake available on Ivybridge, this
>>> can result in issues\n");
>>> DRM_INFO("when using vblank-synced partial screen
>>> updates.\n");
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> - FORCEWAKE, FORCEWAKE_ACK);
>>> + __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> + FORCEWAKE, FORCEWAKE_ACK);
>>> }
>>> } else if (IS_GEN(i915, 6)) {
>>> uncore->funcs.force_wake_get =
>>> fw_domains_get_with_thread_status;
>>> uncore->funcs.force_wake_put = fw_domains_put;
>>> - fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> - FORCEWAKE, FORCEWAKE_ACK);
>>> + __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> + FORCEWAKE, FORCEWAKE_ACK);
>>> }
>>> +#undef __fw_domain_init
>>> +
>>> /* All future platforms are expected to require complex power
>>> gating */
>>> WARN_ON(uncore->fw_domains == 0);
>>> +
>>> + return 0;
>>> +
>>> +out_clean:
>>> + intel_uncore_fw_domains_fini(uncore);
>>> + return ret;
>>> }
>>> #define ASSIGN_FW_DOMAINS_TABLE(uncore, d) \
>>> @@ -1562,13 +1588,17 @@ static void uncore_raw_init(struct
>>> intel_uncore *uncore)
>>> }
>>> }
>>> -static void uncore_forcewake_init(struct intel_uncore *uncore)
>>> +static int uncore_forcewake_init(struct intel_uncore *uncore)
>>> {
>>> struct drm_i915_private *i915 = uncore->i915;
>>> + int ret;
>>> GEM_BUG_ON(!intel_uncore_has_forcewake(uncore));
>>> - intel_uncore_fw_domains_init(uncore);
>>> + ret = intel_uncore_fw_domains_init(uncore);
>>> + if (ret)
>>> + return ret;
>>> +
>>> forcewake_early_sanitize(uncore, 0);
>>> if (IS_GEN_RANGE(i915, 6, 7)) {
>>> @@ -1601,6 +1631,8 @@ static void uncore_forcewake_init(struct
>>> intel_uncore *uncore)
>>> uncore->pmic_bus_access_nb.notifier_call =
>>> i915_pmic_bus_access_notifier;
>>> iosf_mbi_register_pmic_bus_access_notifier(&uncore->pmic_bus_access_nb);
>>> +
>>> + return 0;
>>> }
>>> int intel_uncore_init_mmio(struct intel_uncore *uncore)
>>> @@ -1619,10 +1651,13 @@ int intel_uncore_init_mmio(struct
>>> intel_uncore *uncore)
>>> uncore->unclaimed_mmio_check = 1;
>>> - if (!intel_uncore_has_forcewake(uncore))
>>> + if (!intel_uncore_has_forcewake(uncore)) {
>>> uncore_raw_init(uncore);
>>> - else
>>> - uncore_forcewake_init(uncore);
>>> + } else {
>>> + ret = uncore_forcewake_init(uncore);
>>> + if (ret)
>>> + goto out_mmio_cleanup;
>>> + }
>>> /* make sure fw funcs are set if and only if we have fw*/
>>> GEM_BUG_ON(intel_uncore_has_forcewake(uncore) !=
>>> !!uncore->funcs.force_wake_get);
>>> @@ -1644,6 +1679,11 @@ int intel_uncore_init_mmio(struct intel_uncore
>>> *uncore)
>>> DRM_DEBUG("unclaimed mmio detected on uncore init,
>>> clearing\n");
>>> return 0;
>>> +
>>> +out_mmio_cleanup:
>>> + uncore_mmio_cleanup(uncore);
>>> +
>>> + return ret;
>>> }
>>> /*
>>> @@ -1689,6 +1729,7 @@ void intel_uncore_fini_mmio(struct intel_uncore
>>> *uncore)
>>> iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(
>>> &uncore->pmic_bus_access_nb);
>>> intel_uncore_forcewake_reset(uncore);
>>> + intel_uncore_fw_domains_fini(uncore);
>>> iosf_mbi_punit_release();
>>> }
>>> diff --git a/drivers/gpu/drm/i915/intel_uncore.h
>>> b/drivers/gpu/drm/i915/intel_uncore.h
>>> index 879252735bba..bbff281b880d 100644
>>> --- a/drivers/gpu/drm/i915/intel_uncore.h
>>> +++ b/drivers/gpu/drm/i915/intel_uncore.h
>>> @@ -126,6 +126,7 @@ struct intel_uncore {
>>> enum forcewake_domains fw_domains_saved; /* user domains saved
>>> for S3 */
>>> struct intel_uncore_forcewake_domain {
>>> + struct intel_uncore *uncore;
>>> enum forcewake_domain_id id;
>>> enum forcewake_domains mask;
>>> unsigned int wake_count;
>>> @@ -133,7 +134,7 @@ struct intel_uncore {
>>> struct hrtimer timer;
>>> u32 __iomem *reg_set;
>>> u32 __iomem *reg_ack;
>>> - } fw_domain[FW_DOMAIN_ID_COUNT];
>>> + } *fw_domain[FW_DOMAIN_ID_COUNT];
>>
>> The array itself could also be made dynamic, no? To much hassle for
>> very little gain?
>>
>
> With the current approach we don't know how many domains we have in
> advance, so we'd have to either add a table for that or realloc the
> array as we increase the number of domains. Both options don't seem
> worth the hassle IMO.
Agreed. It would also need changes in lookup so it looks even less
appealing that it did to me yesterday. :)
Regards,
Tvrtko
>
> Thanks,
> Daniele
>
>>> struct {
>>> unsigned int count;
>>> @@ -147,18 +148,12 @@ struct intel_uncore {
>>> /* Iterate over initialised fw domains */
>>> #define for_each_fw_domain_masked(domain__, mask__, uncore__, tmp__) \
>>> - for (tmp__ = (mask__); \
>>> - tmp__ ? (domain__ =
>>> &(uncore__)->fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
>>> + for (tmp__ = (mask__); tmp__ ;) \
>>> + for_each_if(domain__ =
>>> (uncore__)->fw_domain[__mask_next_bit(tmp__)])
>>> #define for_each_fw_domain(domain__, uncore__, tmp__) \
>>> for_each_fw_domain_masked(domain__, (uncore__)->fw_domains,
>>> uncore__, tmp__)
>>> -static inline struct intel_uncore *
>>> -forcewake_domain_to_uncore(const struct
>>> intel_uncore_forcewake_domain *d)
>>> -{
>>> - return container_of(d, struct intel_uncore, fw_domain[d->id]);
>>> -}
>>> -
>>> static inline bool
>>> intel_uncore_has_forcewake(const struct intel_uncore *uncore)
>>> {
>>>
>>
>> Regards,
>>
>> Tvrtko
>
More information about the Intel-gfx
mailing list