[Intel-gfx] [PATCH 5/6] drm/i915: dynamically allocate forcewake domains

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Tue Jun 18 23:06:39 UTC 2019



On 6/18/19 2:23 AM, Tvrtko Ursulin wrote:
> 
> On 17/06/2019 19:09, Daniele Ceraolo Spurio wrote:
>> We'd like to introduce a display uncore with no forcewake domains, so
>> let's avoid wasting memory and be ready to allocate only what we need.
>> Even without multiple uncore, we still don't need all the domains on all
>> gens.
> 
> Looks good in principle, only a few conversation points below.
> 
>>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_uncore.c | 155 ++++++++++++++++++----------
>>   drivers/gpu/drm/i915/intel_uncore.h |  13 +--
>>   2 files changed, 102 insertions(+), 66 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
>> b/drivers/gpu/drm/i915/intel_uncore.c
>> index c0f5567ee096..7f311827c3ef 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -344,7 +344,7 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
>>   {
>>       struct intel_uncore_forcewake_domain *domain =
>>              container_of(timer, struct intel_uncore_forcewake_domain, 
>> timer);
>> -    struct intel_uncore *uncore = forcewake_domain_to_uncore(domain);
>> +    struct intel_uncore *uncore = domain->uncore;
>>       unsigned long irqflags;
>>       assert_rpm_device_not_suspended(uncore->rpm);
>> @@ -1291,23 +1291,23 @@ do { \
>>       (uncore)->funcs.read_fw_domains = x##_reg_read_fw_domains; \
>>   } while (0)
>> -static void fw_domain_init(struct intel_uncore *uncore,
>> +static int fw_domain_init(struct intel_uncore *uncore,
>>                  enum forcewake_domain_id domain_id,
>>                  i915_reg_t reg_set,
>>                  i915_reg_t reg_ack)
>>   {
>>       struct intel_uncore_forcewake_domain *d;
>> -    if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT))
>> -        return;
>> -
>> -    d = &uncore->fw_domain[domain_id];
>> +    GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT);
>> -    WARN_ON(d->wake_count);
> 
> I wonder what was this for.
> 
> Do you want to add some protection against double init of the same domain?
> 

just a GEM_BUG_ON maybe? It shouldn't really ever happen unless we're 
moving something around.

>> +    d = kzalloc(sizeof(*d), GFP_KERNEL);
>> +    if (!d)
>> +        return -ENOMEM;
>>       WARN_ON(!i915_mmio_reg_valid(reg_set));
>>       WARN_ON(!i915_mmio_reg_valid(reg_ack));
>> +    d->uncore = uncore;
>>       d->wake_count = 0;
>>       d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set);
>>       d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack);
>> @@ -1324,7 +1324,6 @@ static void fw_domain_init(struct intel_uncore 
>> *uncore,
>>       BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << 
>> FW_DOMAIN_ID_MEDIA_VEBOX0));
>>       BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << 
>> FW_DOMAIN_ID_MEDIA_VEBOX1));
>> -
>>       d->mask = BIT(domain_id);
>>       hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> @@ -1333,6 +1332,10 @@ static void fw_domain_init(struct intel_uncore 
>> *uncore,
>>       uncore->fw_domains |= BIT(domain_id);
>>       fw_domain_reset(d);
>> +
>> +    uncore->fw_domain[domain_id] = d;
>> +
>> +    return 0;
>>   }
>>   static void fw_domain_fini(struct intel_uncore *uncore,
>> @@ -1340,77 +1343,92 @@ static void fw_domain_fini(struct intel_uncore 
>> *uncore,
>>   {
>>       struct intel_uncore_forcewake_domain *d;
>> -    if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT))
>> -        return;
>> +    GEM_BUG_ON(domain_id >= FW_DOMAIN_ID_COUNT);
>> -    d = &uncore->fw_domain[domain_id];
>> +    d = fetch_and_zero(&uncore->fw_domain[domain_id]);
> 
> Maybe early if !d here and function flow just carries on?
> 

will do.

>> +    uncore->fw_domains &= ~BIT(domain_id);
>> -    WARN_ON(d->wake_count);
>> -    WARN_ON(hrtimer_cancel(&d->timer));
>> -    memset(d, 0, sizeof(*d));
>> +    if (d) {
>> +        WARN_ON(d->wake_count);
>> +        WARN_ON(hrtimer_cancel(&d->timer));
>> +        kfree(d);
>> +    }
>> +}
>> -    uncore->fw_domains &= ~BIT(domain_id);
>> +static void intel_uncore_fw_domains_fini(struct intel_uncore *uncore)
>> +{
>> +    struct intel_uncore_forcewake_domain *d;
>> +    int tmp;
>> +
>> +    for_each_fw_domain(d, uncore, tmp)
>> +        fw_domain_fini(uncore, d->id);
>>   }
>> -static void intel_uncore_fw_domains_init(struct intel_uncore *uncore)
>> +static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
>>   {
>>       struct drm_i915_private *i915 = uncore->i915;
>> +    int ret;
>>       GEM_BUG_ON(!intel_uncore_has_forcewake(uncore));
>> +#define __fw_domain_init(id__, set__, ack__) \
>> +    ret = fw_domain_init(uncore, (id__), (set__), (ack__)); \
>> +    if (ret) \
>> +        goto out_clean;
> 
> Hidden control flow is slightly objectionable but I don't offer any nice 
> alternatives so I guess will have to pass. Or maybe accumulate the error 
> (err |= fw_domain_init(..)) as you go and then cleanup at the end if any 
> failed?

I'd prefer to avoid accumulating the error since it'd just cause us to 
having to unroll more domains when we could've stopped early.

> 
> On the other hand the idea slightly conflicts with my other suggestion 
> to rename existing fw_domain_init to __fw_domain_init and call the macro 
> fw_domain_init and avoid all the churn below.
> 

I'll pick this suggestion among the 2, unless there is another 
suggestion on how to avoid the hidden goto.

>> +
>>       if (INTEL_GEN(i915) >= 11) {
>>           int i;
>> -        uncore->funcs.force_wake_get =
>> -            fw_domains_get_with_fallback;
>> +        uncore->funcs.force_wake_get = fw_domains_get_with_fallback;
>>           uncore->funcs.force_wake_put = fw_domains_put;
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>> -                   FORCEWAKE_RENDER_GEN9,
>> -                   FORCEWAKE_ACK_RENDER_GEN9);
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_BLITTER,
>> -                   FORCEWAKE_BLITTER_GEN9,
>> -                   FORCEWAKE_ACK_BLITTER_GEN9);
>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>> +                 FORCEWAKE_RENDER_GEN9,
>> +                 FORCEWAKE_ACK_RENDER_GEN9);
>> +        __fw_domain_init(FW_DOMAIN_ID_BLITTER,
>> +                 FORCEWAKE_BLITTER_GEN9,
>> +                 FORCEWAKE_ACK_BLITTER_GEN9);
>> +
>>           for (i = 0; i < I915_MAX_VCS; i++) {
>>               if (!HAS_ENGINE(i915, _VCS(i)))
>>                   continue;
>> -            fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA_VDBOX0 + i,
>> -                       FORCEWAKE_MEDIA_VDBOX_GEN11(i),
>> -                       FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i));
>> +            __fw_domain_init(FW_DOMAIN_ID_MEDIA_VDBOX0 + i,
>> +                     FORCEWAKE_MEDIA_VDBOX_GEN11(i),
>> +                     FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i));
>>           }
>>           for (i = 0; i < I915_MAX_VECS; i++) {
>>               if (!HAS_ENGINE(i915, _VECS(i)))
>>                   continue;
>> -            fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA_VEBOX0 + i,
>> -                       FORCEWAKE_MEDIA_VEBOX_GEN11(i),
>> -                       FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
>> +            __fw_domain_init(FW_DOMAIN_ID_MEDIA_VEBOX0 + i,
>> +                     FORCEWAKE_MEDIA_VEBOX_GEN11(i),
>> +                     FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
>>           }
>>       } else if (IS_GEN_RANGE(i915, 9, 10)) {
>> -        uncore->funcs.force_wake_get =
>> -            fw_domains_get_with_fallback;
>> +        uncore->funcs.force_wake_get = fw_domains_get_with_fallback;
>>           uncore->funcs.force_wake_put = fw_domains_put;
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>> -                   FORCEWAKE_RENDER_GEN9,
>> -                   FORCEWAKE_ACK_RENDER_GEN9);
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_BLITTER,
>> -                   FORCEWAKE_BLITTER_GEN9,
>> -                   FORCEWAKE_ACK_BLITTER_GEN9);
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA,
>> -                   FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9);
>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>> +                 FORCEWAKE_RENDER_GEN9,
>> +                 FORCEWAKE_ACK_RENDER_GEN9);
>> +        __fw_domain_init(FW_DOMAIN_ID_BLITTER,
>> +                 FORCEWAKE_BLITTER_GEN9,
>> +                 FORCEWAKE_ACK_BLITTER_GEN9);
>> +        __fw_domain_init(FW_DOMAIN_ID_MEDIA,
>> +                 FORCEWAKE_MEDIA_GEN9,
>> +                 FORCEWAKE_ACK_MEDIA_GEN9);
>>       } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
>>           uncore->funcs.force_wake_get = fw_domains_get;
>>           uncore->funcs.force_wake_put = fw_domains_put;
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>> -                   FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_MEDIA,
>> -                   FORCEWAKE_MEDIA_VLV, FORCEWAKE_ACK_MEDIA_VLV);
>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>> +                 FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
>> +        __fw_domain_init(FW_DOMAIN_ID_MEDIA,
>> +                 FORCEWAKE_MEDIA_VLV, FORCEWAKE_ACK_MEDIA_VLV);
>>       } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) {
>>           uncore->funcs.force_wake_get =
>>               fw_domains_get_with_thread_status;
>>           uncore->funcs.force_wake_put = fw_domains_put;
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>> -                   FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>> +                 FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
>>       } else if (IS_IVYBRIDGE(i915)) {
>>           u32 ecobus;
>> @@ -1437,8 +1455,8 @@ static void intel_uncore_fw_domains_init(struct 
>> intel_uncore *uncore)
>>           __raw_uncore_write32(uncore, FORCEWAKE, 0);
>>           __raw_posting_read(uncore, ECOBUS);
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>> -                   FORCEWAKE_MT, FORCEWAKE_MT_ACK);
>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>> +                 FORCEWAKE_MT, FORCEWAKE_MT_ACK);
>>           spin_lock_irq(&uncore->lock);
>>           fw_domains_get_with_thread_status(uncore, FORCEWAKE_RENDER);
>> @@ -1449,19 +1467,27 @@ static void 
>> intel_uncore_fw_domains_init(struct intel_uncore *uncore)
>>           if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
>>               DRM_INFO("No MT forcewake available on Ivybridge, this 
>> can result in issues\n");
>>               DRM_INFO("when using vblank-synced partial screen 
>> updates.\n");
>> -            fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>> -                       FORCEWAKE, FORCEWAKE_ACK);
>> +            __fw_domain_init(FW_DOMAIN_ID_RENDER,
>> +                     FORCEWAKE, FORCEWAKE_ACK);
>>           }
>>       } else if (IS_GEN(i915, 6)) {
>>           uncore->funcs.force_wake_get =
>>               fw_domains_get_with_thread_status;
>>           uncore->funcs.force_wake_put = fw_domains_put;
>> -        fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
>> -                   FORCEWAKE, FORCEWAKE_ACK);
>> +        __fw_domain_init(FW_DOMAIN_ID_RENDER,
>> +                 FORCEWAKE, FORCEWAKE_ACK);
>>       }
>> +#undef __fw_domain_init
>> +
>>       /* All future platforms are expected to require complex power 
>> gating */
>>       WARN_ON(uncore->fw_domains == 0);
>> +
>> +    return 0;
>> +
>> +out_clean:
>> +    intel_uncore_fw_domains_fini(uncore);
>> +    return ret;
>>   }
>>   #define ASSIGN_FW_DOMAINS_TABLE(uncore, d) \
>> @@ -1562,13 +1588,17 @@ static void uncore_raw_init(struct 
>> intel_uncore *uncore)
>>       }
>>   }
>> -static void uncore_forcewake_init(struct intel_uncore *uncore)
>> +static int uncore_forcewake_init(struct intel_uncore *uncore)
>>   {
>>       struct drm_i915_private *i915 = uncore->i915;
>> +    int ret;
>>       GEM_BUG_ON(!intel_uncore_has_forcewake(uncore));
>> -    intel_uncore_fw_domains_init(uncore);
>> +    ret = intel_uncore_fw_domains_init(uncore);
>> +    if (ret)
>> +        return ret;
>> +
>>       forcewake_early_sanitize(uncore, 0);
>>       if (IS_GEN_RANGE(i915, 6, 7)) {
>> @@ -1601,6 +1631,8 @@ static void uncore_forcewake_init(struct 
>> intel_uncore *uncore)
>>       uncore->pmic_bus_access_nb.notifier_call = 
>> i915_pmic_bus_access_notifier;
>>       
>> iosf_mbi_register_pmic_bus_access_notifier(&uncore->pmic_bus_access_nb);
>> +
>> +    return 0;
>>   }
>>   int intel_uncore_init_mmio(struct intel_uncore *uncore)
>> @@ -1619,10 +1651,13 @@ int intel_uncore_init_mmio(struct intel_uncore 
>> *uncore)
>>       uncore->unclaimed_mmio_check = 1;
>> -    if (!intel_uncore_has_forcewake(uncore))
>> +    if (!intel_uncore_has_forcewake(uncore)) {
>>           uncore_raw_init(uncore);
>> -    else
>> -        uncore_forcewake_init(uncore);
>> +    } else {
>> +        ret = uncore_forcewake_init(uncore);
>> +        if (ret)
>> +            goto out_mmio_cleanup;
>> +    }
>>       /* make sure fw funcs are set if and only if we have fw*/
>>       GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != 
>> !!uncore->funcs.force_wake_get);
>> @@ -1644,6 +1679,11 @@ int intel_uncore_init_mmio(struct intel_uncore 
>> *uncore)
>>           DRM_DEBUG("unclaimed mmio detected on uncore init, 
>> clearing\n");
>>       return 0;
>> +
>> +out_mmio_cleanup:
>> +    uncore_mmio_cleanup(uncore);
>> +
>> +    return ret;
>>   }
>>   /*
>> @@ -1689,6 +1729,7 @@ void intel_uncore_fini_mmio(struct intel_uncore 
>> *uncore)
>>           iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(
>>               &uncore->pmic_bus_access_nb);
>>           intel_uncore_forcewake_reset(uncore);
>> +        intel_uncore_fw_domains_fini(uncore);
>>           iosf_mbi_punit_release();
>>       }
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.h 
>> b/drivers/gpu/drm/i915/intel_uncore.h
>> index 879252735bba..bbff281b880d 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.h
>> +++ b/drivers/gpu/drm/i915/intel_uncore.h
>> @@ -126,6 +126,7 @@ struct intel_uncore {
>>       enum forcewake_domains fw_domains_saved; /* user domains saved 
>> for S3 */
>>       struct intel_uncore_forcewake_domain {
>> +        struct intel_uncore *uncore;
>>           enum forcewake_domain_id id;
>>           enum forcewake_domains mask;
>>           unsigned int wake_count;
>> @@ -133,7 +134,7 @@ struct intel_uncore {
>>           struct hrtimer timer;
>>           u32 __iomem *reg_set;
>>           u32 __iomem *reg_ack;
>> -    } fw_domain[FW_DOMAIN_ID_COUNT];
>> +    } *fw_domain[FW_DOMAIN_ID_COUNT];
> 
> The array itself could also be made dynamic, no? To much hassle for very 
> little gain?
> 

With the current approach we don't know how many domains we have in 
advance, so we'd have to either add a table for that or realloc the 
array as we increase the number of domains. Both options don't seem 
worth the hassle IMO.

Thanks,
Daniele

>>       struct {
>>           unsigned int count;
>> @@ -147,18 +148,12 @@ struct intel_uncore {
>>   /* Iterate over initialised fw domains */
>>   #define for_each_fw_domain_masked(domain__, mask__, uncore__, tmp__) \
>> -    for (tmp__ = (mask__); \
>> -         tmp__ ? (domain__ = 
>> &(uncore__)->fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
>> +    for (tmp__ = (mask__); tmp__ ;) \
>> +        for_each_if(domain__ = 
>> (uncore__)->fw_domain[__mask_next_bit(tmp__)])
>>   #define for_each_fw_domain(domain__, uncore__, tmp__) \
>>       for_each_fw_domain_masked(domain__, (uncore__)->fw_domains, 
>> uncore__, tmp__)
>> -static inline struct intel_uncore *
>> -forcewake_domain_to_uncore(const struct intel_uncore_forcewake_domain 
>> *d)
>> -{
>> -    return container_of(d, struct intel_uncore, fw_domain[d->id]);
>> -}
>> -
>>   static inline bool
>>   intel_uncore_has_forcewake(const struct intel_uncore *uncore)
>>   {
>>
> 
> Regards,
> 
> Tvrtko


More information about the Intel-gfx mailing list