[Intel-gfx] [PATCH 5/6] drm/i915: dynamically allocate forcewake domains

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Jun 19 14:22:17 UTC 2019


On 19/06/2019 00:06, Daniele Ceraolo Spurio wrote:
> On 6/18/19 2:23 AM, Tvrtko Ursulin wrote:
>> On 17/06/2019 19:09, Daniele Ceraolo Spurio wrote:
>>> We'd like to introduce a display uncore with no forcewake domains, so
>>> let's avoid wasting memory and be ready to allocate only what we need.
>>> Even without multiple uncore, we still don't need all the domains on all
>>> gens.
>>
>> Looks good in principle, only a few conversation points below.
>>
>>>
>>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/intel_uncore.c | 155 ++++++++++++++++++----------
>>>   drivers/gpu/drm/i915/intel_uncore.h |  13 +--
>>>   2 files changed, 102 insertions(+), 66 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
>>> b/drivers/gpu/drm/i915/intel_uncore.c
>>> index c0f5567ee096..7f311827c3ef 100644
>>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>>> @@ -344,7 +344,7 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
>>>   {
>>>       struct intel_uncore_forcewake_domain *domain =
>>>              container_of(timer, struct 
>>> intel_uncore_forcewake_domain, timer);
>>> -    struct intel_uncore *uncore = forcewake_domain_to_uncore(domain);
>>> +    struct intel_uncore *uncore = domain->uncore;
>>>       unsigned long irqflags;
>>>       assert_rpm_device_not_suspended(uncore->rpm);
>>> @@ -1291,23 +1291,23 @@ do { \
>>>       (uncore)->funcs.read_fw_domains = x##_reg_read_fw_domains; \
>>>   } while (0)
>>> -static void fw_domain_init(struct intel_uncore *uncore,
>>> +static int fw_domain_init(struct intel_uncore *uncore,
>>>                  enum forcewake_domain_id domain_id,
>>>                  i915_reg_t reg_set,
>>>                  i915_reg_t reg_ack)
>>>   {
>>>       struct intel_uncore_forcewake_domain *d;
>>> -    if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT))
>>> -        return;
>>> -
>>> -    d = &uncore->fw_domain[domain_id];
>>> +    GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT);
>>> -    WARN_ON(d->wake_count);
>>
>> I wonder what was this for.
>>
>> Do you want to add some protection against double init of the same 
>> domain?
>>
> 
> just a GEM_BUG_ON maybe? It shouldn't really ever happen unless we're 
> moving something around.

Yes, sounds right.

> 
>>> +    d = kzalloc(sizeof(*d), GFP_KERNEL);
>>> +    if (!d)
>>> +        return -ENOMEM;
>>>       WARN_ON(!i915_mmio_reg_valid(reg_set));
>>>       WARN_ON(!i915_mmio_reg_valid(reg_ack));
>>> +    d->uncore = uncore;
>>>       d->wake_count = 0;
>>>       d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set);
>>>       d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack);
>>> @@ -1324,7 +1324,6 @@ static void fw_domain_init(struct intel_uncore 
>>> *uncore,
>>>       BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << 
>>> FW_DOMAIN_ID_MEDIA_VEBOX0));
>>>       BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << 
>>> FW_DOMAIN_ID_MEDIA_VEBOX1));
>>> -
>>>       d->mask = BIT(domain_id);
>>>       hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>>> @@ -1333,6 +1332,10 @@ static void fw_domain_init(struct intel_uncore 
>>> *uncore,
>>>       uncore->fw_domains |= BIT(domain_id);
>>>       fw_domain_reset(d);
>>> +
>>> +    uncore->fw_domain[domain_id] = d;
>>> +
>>> +    return 0;
>>>   }
>>>   static void fw_domain_fini(struct intel_uncore *uncore,
>>> @@ -1340,77 +1343,92 @@ static void fw_domain_fini(struct 
>>> intel_uncore *uncore,
>>>   {
>>>       struct intel_uncore_forcewake_domain *d;
>>> -    if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT))
>>> -        return;
>>> +    GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT);
>>> -    d = &uncore->fw_domain[domain_id];
>>> +    d = fetch_and_zero(&uncore->fw_domain[domain_id]);
>>
>> Maybe early if !d here and function flow just carries on?
>>
> 
> will do.
> 
>>> +    uncore->fw_domains &= ~BIT(domain_id);
>>> -    WARN_ON(d->wake_count);
>>> -    WARN_ON(hrtimer_cancel(&d->timer));
>>> -    memset(d, 0, sizeof(*d));
>>> +    if (d) {
>>> +        WARN_ON(d->wake_count);
>>> +        WARN_ON(hrtimer_cancel(&d->timer));
>>> +        kfree(d);
>>> +    }
>>> +}
>>> -    uncore->fw_domains &= ~BIT(domain_id);
>>> +static void intel_uncore_fw_domains_fini(struct intel_uncore *uncore)
>>> +{
>>> +    struct intel_uncore_forcewake_domain *d;
>>> +    int tmp;
>>> +
>>> +    for_each_fw_domain(d, uncore, tmp)
>>> +        fw_domain_fini(uncore, d->id);
>>>   }
>>> -static void intel_uncore_fw_domains_init(struct intel_uncore *uncore)
>>> +static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
>>>   {
>>>       struct drm_i915_private *i915 = uncore->i915;
>>> +    int ret;
>>>       GEM_BUG_ON(!intel_uncore_has_forcewake(uncore));
>>> +#define __fw_domain_init(id__, set__, ack__) \
>>> +    ret = fw_domain_init(uncore, (id__), (set__), (ack__)); \
>>> +    if (ret) \
>>> +        goto out_clean;
>>
>> Hidden control flow is slightly objectionable but I don't offer any 
>> nice alternatives so I guess will have to pass. Or maybe accumulate 
>> the error (err |= fw_domain_init(..)) as you go and then cleanup at 
>> the end if any failed?
> 
> I'd prefer to avoid accumulating the error since it'd just cause us to 
> having to unroll more domains when we could've stopped early.

Although unrolling happens close to never-ever so it isn't a practical 
concern..

> 
>>
>> On the other hand the idea slightly conflicts with my other suggestion 
>> to rename existing fw_domain_init to __fw_domain_init and call the 
>> macro fw_domain_init and avoid all the churn below.
>>
> 
> I'll pick this suggestion among the 2, unless there is another 
> suggestion on how to avoid the hidden goto.

.. but okay, hidden goto is also only a concern if someone adds new init 
steps before the fw_domain_init steps, oblivious that cleanup is also 
needed. Perhaps the out_clean label would be enough of a clue for that case.

> 
>>> +
>>>       if (INTEL_GEN(i915) >= 11) {
>>>           int i;
>>> -        uncore->funcs.force_wake_get =
>>> -            fw_domains_get_with_fallback;
>>> +        uncore->funcs.force_wake_get = fw_domains_get_with_fallback;
>>>           uncore->funcs.force_wake_put = fw_domains_put;
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> -                   FORCEWAKE_RENDER_GEN9,
>>> -                   FORCEWAKE_ACK_RENDER_GEN9);
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_BLITTER,
>>> -                   FORCEWAKE_BLITTER_GEN9,
>>> -                   FORCEWAKE_ACK_BLITTER_GEN9);
>>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> +                 FORCEWAKE_RENDER_GEN9,
>>> +                 FORCEWAKE_ACK_RENDER_GEN9);
>>> +        __fw_domain_init(FW_DOMAIN_ID_BLITTER,
>>> +                 FORCEWAKE_BLITTER_GEN9,
>>> +                 FORCEWAKE_ACK_BLITTER_GEN9);
>>> +
>>>           for (i = 0; i < I915_MAX_VCS; i++) {
>>>               if (!HAS_ENGINE(i915, _VCS(i)))
>>>                   continue;
>>> -            fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA_VDBOX0 + i,
>>> -                       FORCEWAKE_MEDIA_VDBOX_GEN11(i),
>>> -                       FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i));
>>> +            __fw_domain_init(FW_DOMAIN_ID_MEDIA_VDBOX0 + i,
>>> +                     FORCEWAKE_MEDIA_VDBOX_GEN11(i),
>>> +                     FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i));
>>>           }
>>>           for (i = 0; i < I915_MAX_VECS; i++) {
>>>               if (!HAS_ENGINE(i915, _VECS(i)))
>>>                   continue;
>>> -            fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA_VEBOX0 + i,
>>> -                       FORCEWAKE_MEDIA_VEBOX_GEN11(i),
>>> -                       FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
>>> +            __fw_domain_init(FW_DOMAIN_ID_MEDIA_VEBOX0 + i,
>>> +                     FORCEWAKE_MEDIA_VEBOX_GEN11(i),
>>> +                     FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
>>>           }
>>>       } else if (IS_GEN_RANGE(i915, 9, 10)) {
>>> -        uncore->funcs.force_wake_get =
>>> -            fw_domains_get_with_fallback;
>>> +        uncore->funcs.force_wake_get = fw_domains_get_with_fallback;
>>>           uncore->funcs.force_wake_put = fw_domains_put;
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> -                   FORCEWAKE_RENDER_GEN9,
>>> -                   FORCEWAKE_ACK_RENDER_GEN9);
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_BLITTER,
>>> -                   FORCEWAKE_BLITTER_GEN9,
>>> -                   FORCEWAKE_ACK_BLITTER_GEN9);
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA,
>>> -                   FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9);
>>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> +                 FORCEWAKE_RENDER_GEN9,
>>> +                 FORCEWAKE_ACK_RENDER_GEN9);
>>> +        __fw_domain_init(FW_DOMAIN_ID_BLITTER,
>>> +                 FORCEWAKE_BLITTER_GEN9,
>>> +                 FORCEWAKE_ACK_BLITTER_GEN9);
>>> +        __fw_domain_init(FW_DOMAIN_ID_MEDIA,
>>> +                 FORCEWAKE_MEDIA_GEN9,
>>> +                 FORCEWAKE_ACK_MEDIA_GEN9);
>>>       } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
>>>           uncore->funcs.force_wake_get = fw_domains_get;
>>>           uncore->funcs.force_wake_put = fw_domains_put;
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> -                   FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA,
>>> -                   FORCEWAKE_MEDIA_VLV, FORCEWAKE_ACK_MEDIA_VLV);
>>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> +                 FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
>>> +        __fw_domain_init(FW_DOMAIN_ID_MEDIA,
>>> +                 FORCEWAKE_MEDIA_VLV, FORCEWAKE_ACK_MEDIA_VLV);
>>>       } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) {
>>>           uncore->funcs.force_wake_get =
>>>               fw_domains_get_with_thread_status;
>>>           uncore->funcs.force_wake_put = fw_domains_put;
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> -                   FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
>>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> +                 FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
>>>       } else if (IS_IVYBRIDGE(i915)) {
>>>           u32 ecobus;
>>> @@ -1437,8 +1455,8 @@ static void intel_uncore_fw_domains_init(struct 
>>> intel_uncore *uncore)
>>>           __raw_uncore_write32(uncore, FORCEWAKE, 0);
>>>           __raw_posting_read(uncore, ECOBUS);
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> -                   FORCEWAKE_MT, FORCEWAKE_MT_ACK);
>>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> +                 FORCEWAKE_MT, FORCEWAKE_MT_ACK);
>>>           spin_lock_irq(&uncore->lock);
>>>           fw_domains_get_with_thread_status(uncore, FORCEWAKE_RENDER);
>>> @@ -1449,19 +1467,27 @@ static void 
>>> intel_uncore_fw_domains_init(struct intel_uncore *uncore)
>>>           if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
>>>               DRM_INFO("No MT forcewake available on Ivybridge, this 
>>> can result in issues\n");
>>>               DRM_INFO("when using vblank-synced partial screen 
>>> updates.\n");
>>> -            fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> -                       FORCEWAKE, FORCEWAKE_ACK);
>>> +            __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> +                     FORCEWAKE, FORCEWAKE_ACK);
>>>           }
>>>       } else if (IS_GEN(i915, 6)) {
>>>           uncore->funcs.force_wake_get =
>>>               fw_domains_get_with_thread_status;
>>>           uncore->funcs.force_wake_put = fw_domains_put;
>>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>>> -                   FORCEWAKE, FORCEWAKE_ACK);
>>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>>> +                 FORCEWAKE, FORCEWAKE_ACK);
>>>       }
>>> +#undef __fw_domain_init
>>> +
>>>       /* All future platforms are expected to require complex power 
>>> gating */
>>>       WARN_ON(uncore->fw_domains == 0);
>>> +
>>> +    return 0;
>>> +
>>> +out_clean:
>>> +    intel_uncore_fw_domains_fini(uncore);
>>> +    return ret;
>>>   }
>>>   #define ASSIGN_FW_DOMAINS_TABLE(uncore, d) \
>>> @@ -1562,13 +1588,17 @@ static void uncore_raw_init(struct 
>>> intel_uncore *uncore)
>>>       }
>>>   }
>>> -static void uncore_forcewake_init(struct intel_uncore *uncore)
>>> +static int uncore_forcewake_init(struct intel_uncore *uncore)
>>>   {
>>>       struct drm_i915_private *i915 = uncore->i915;
>>> +    int ret;
>>>       GEM_BUG_ON(!intel_uncore_has_forcewake(uncore));
>>> -    intel_uncore_fw_domains_init(uncore);
>>> +    ret = intel_uncore_fw_domains_init(uncore);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>>       forcewake_early_sanitize(uncore, 0);
>>>       if (IS_GEN_RANGE(i915, 6, 7)) {
>>> @@ -1601,6 +1631,8 @@ static void uncore_forcewake_init(struct 
>>> intel_uncore *uncore)
>>>       uncore->pmic_bus_access_nb.notifier_call = 
>>> i915_pmic_bus_access_notifier;
>>> iosf_mbi_register_pmic_bus_access_notifier(&uncore->pmic_bus_access_nb);
>>> +
>>> +    return 0;
>>>   }
>>>   int intel_uncore_init_mmio(struct intel_uncore *uncore)
>>> @@ -1619,10 +1651,13 @@ int intel_uncore_init_mmio(struct 
>>> intel_uncore *uncore)
>>>       uncore->unclaimed_mmio_check = 1;
>>> -    if (!intel_uncore_has_forcewake(uncore))
>>> +    if (!intel_uncore_has_forcewake(uncore)) {
>>>           uncore_raw_init(uncore);
>>> -    else
>>> -        uncore_forcewake_init(uncore);
>>> +    } else {
>>> +        ret = uncore_forcewake_init(uncore);
>>> +        if (ret)
>>> +            goto out_mmio_cleanup;
>>> +    }
>>>       /* make sure fw funcs are set if and only if we have fw*/
>>>       GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != 
>>> !!uncore->funcs.force_wake_get);
>>> @@ -1644,6 +1679,11 @@ int intel_uncore_init_mmio(struct intel_uncore 
>>> *uncore)
>>>           DRM_DEBUG("unclaimed mmio detected on uncore init, 
>>> clearing\n");
>>>       return 0;
>>> +
>>> +out_mmio_cleanup:
>>> +    uncore_mmio_cleanup(uncore);
>>> +
>>> +    return ret;
>>>   }
>>>   /*
>>> @@ -1689,6 +1729,7 @@ void intel_uncore_fini_mmio(struct intel_uncore 
>>> *uncore)
>>>           iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(
>>>               &uncore->pmic_bus_access_nb);
>>>           intel_uncore_forcewake_reset(uncore);
>>> +        intel_uncore_fw_domains_fini(uncore);
>>>           iosf_mbi_punit_release();
>>>       }
>>> diff --git a/drivers/gpu/drm/i915/intel_uncore.h 
>>> b/drivers/gpu/drm/i915/intel_uncore.h
>>> index 879252735bba..bbff281b880d 100644
>>> --- a/drivers/gpu/drm/i915/intel_uncore.h
>>> +++ b/drivers/gpu/drm/i915/intel_uncore.h
>>> @@ -126,6 +126,7 @@ struct intel_uncore {
>>>       enum forcewake_domains fw_domains_saved; /* user domains saved 
>>> for S3 */
>>>       struct intel_uncore_forcewake_domain {
>>> +        struct intel_uncore *uncore;
>>>           enum forcewake_domain_id id;
>>>           enum forcewake_domains mask;
>>>           unsigned int wake_count;
>>> @@ -133,7 +134,7 @@ struct intel_uncore {
>>>           struct hrtimer timer;
>>>           u32 __iomem *reg_set;
>>>           u32 __iomem *reg_ack;
>>> -    } fw_domain[FW_DOMAIN_ID_COUNT];
>>> +    } *fw_domain[FW_DOMAIN_ID_COUNT];
>>
>> The array itself could also be made dynamic, no? To much hassle for 
>> very little gain?
>>
> 
> With the current approach we don't know how many domains we have in 
> advance, so we'd have to either add a table for that or realloc the 
> array as we increase the number of domains. Both options don't seem 
> worth the hassle IMO.

Agreed. It would also need changes in lookup so it looks even less 
appealing that it did to me yesterday. :)

Regards,

Tvrtko


> 
> Thanks,
> Daniele
> 
>>>       struct {
>>>           unsigned int count;
>>> @@ -147,18 +148,12 @@ struct intel_uncore {
>>>   /* Iterate over initialised fw domains */
>>>   #define for_each_fw_domain_masked(domain__, mask__, uncore__, tmp__) \
>>> -    for (tmp__ = (mask__); \
>>> -         tmp__ ? (domain__ = 
>>> &(uncore__)->fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
>>> +    for (tmp__ = (mask__); tmp__ ;) \
>>> +        for_each_if(domain__ = 
>>> (uncore__)->fw_domain[__mask_next_bit(tmp__)])
>>>   #define for_each_fw_domain(domain__, uncore__, tmp__) \
>>>       for_each_fw_domain_masked(domain__, (uncore__)->fw_domains, 
>>> uncore__, tmp__)
>>> -static inline struct intel_uncore *
>>> -forcewake_domain_to_uncore(const struct 
>>> intel_uncore_forcewake_domain *d)
>>> -{
>>> -    return container_of(d, struct intel_uncore, fw_domain[d->id]);
>>> -}
>>> -
>>>   static inline bool
>>>   intel_uncore_has_forcewake(const struct intel_uncore *uncore)
>>>   {
>>>
>>
>> Regards,
>>
>> Tvrtko
> 


More information about the Intel-gfx mailing list