[PATCH 1/3] drm/xe/guc: move KLV helpers to common file

Fri Feb 28 22:21:29 UTC 2025

On 2/28/2025 1:47 PM, Michal Wajdeczko wrote:
>
> On 28.02.2025 00:59, Daniele Ceraolo Spurio wrote:
>>
>> On 2/27/2025 3:29 PM, Michal Wajdeczko wrote:
>>> On 27.02.2025 02:05, Daniele Ceraolo Spurio wrote:
>>>> The GuC uses the same format for all its KLVs, so having the functions
>>>> to set them in a common file will allow us to re-use it for different
>>>> types of KLVs and not just the WAs in ADS.
>>>> The next patch will make use of these functions for a new H2G command.
>>>>
>>>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>>>> Cc: John Harrison <John.C.Harrison at Intel.com>
>>>> ---
>>>>    drivers/gpu/drm/xe/xe_guc_ads.c         | 89 ++++++-------------------
>>>>    drivers/gpu/drm/xe/xe_guc_klv_helpers.c | 62 +++++++++++++++++
>>>>    drivers/gpu/drm/xe/xe_guc_klv_helpers.h |  7 ++
>>>>    3 files changed, 89 insertions(+), 69 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/
>>>> xe_guc_ads.c
>>>> index fab259adc380..ed3304a11812 100644
>>>> --- a/drivers/gpu/drm/xe/xe_guc_ads.c
>>>> +++ b/drivers/gpu/drm/xe/xe_guc_ads.c
>>>> @@ -22,6 +22,7 @@
>>>>    #include "xe_guc.h"
>>>>    #include "xe_guc_capture.h"
>>>>    #include "xe_guc_ct.h"
>>>> +#include "xe_guc_klv_helpers.h"
>>>>    #include "xe_hw_engine.h"
>>>>    #include "xe_lrc.h"
>>>>    #include "xe_map.h"
>>>> @@ -283,56 +284,6 @@ static size_t calculate_golden_lrc_size(struct
>>>> xe_guc_ads *ads)
>>>>        return total_size;
>>>>    }
>>>>    -static void guc_waklv_enable_one_word(struct xe_guc_ads *ads,
>>>> -                      enum xe_guc_klv_ids klv_id,
>>>> -                      u32 value,
>>>> -                      u32 *offset, u32 *remain)
>>>> -{
>>>> -    u32 size;
>>>> -    u32 klv_entry[] = {
>>>> -        /* 16:16 key/length */
>>>> -        FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
>>>> -        FIELD_PREP(GUC_KLV_0_LEN, 1),
>>>> -        value,
>>>> -        /* 1 dword data */
>>>> -    };
>>>> -
>>>> -    size = sizeof(klv_entry);
>>>> -
>>>> -    if (*remain < size) {
>>>> -        drm_warn(&ads_to_xe(ads)->drm,
>>>> -             "w/a klv buffer too small to add klv id %d\n", klv_id);
>>>> -    } else {
>>>> -        xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
>>>> -                 klv_entry, size);
>>>> -        *offset += size;
>>>> -        *remain -= size;
>>>> -    }
>>>> -}
>>>> -
>>>> -static void guc_waklv_enable_simple(struct xe_guc_ads *ads,
>>>> -                    enum xe_guc_klv_ids klv_id, u32 *offset, u32
>>>> *remain)
>>>> -{
>>>> -    u32 klv_entry[] = {
>>>> -        /* 16:16 key/length */
>>>> -        FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
>>>> -        FIELD_PREP(GUC_KLV_0_LEN, 0),
>>>> -        /* 0 dwords data */
>>>> -    };
>>>> -    u32 size;
>>>> -
>>>> -    size = sizeof(klv_entry);
>>>> -
>>>> -    if (xe_gt_WARN(ads_to_gt(ads), *remain < size,
>>>> -               "w/a klv buffer too small to add klv id %d\n", klv_id))
>>>> -        return;
>>>> -
>>>> -    xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
>>>> -             klv_entry, size);
>>>> -    *offset += size;
>>>> -    *remain -= size;
>>>> -}
>>>> -
>>>>    static void guc_waklv_init(struct xe_guc_ads *ads)
>>>>    {
>>>>        struct xe_gt *gt = ads_to_gt(ads);
>>>> @@ -343,22 +294,22 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
>>>>        remain = guc_ads_waklv_size(ads);
>>>>          if (XE_WA(gt, 14019882105))
>>>> -        guc_waklv_enable_simple(ads,
>>>> -
>>>> GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
>>>> -                    &offset, &remain);
>>>> +        xe_guc_klv_enable_simple(ads_to_guc(ads), ads_to_map(ads),
>>>> +
>>>> GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
>>>> +                     &offset, &remain);
>>>>        if (XE_WA(gt, 18024947630))
>>>> -        guc_waklv_enable_simple(ads,
>>>> -                    GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING,
>>>> -                    &offset, &remain);
>>>> +        xe_guc_klv_enable_simple(ads_to_guc(ads), ads_to_map(ads),
>>>> +                     GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING,
>>>> +                     &offset, &remain);
>>>>        if (XE_WA(gt, 16022287689))
>>>> -        guc_waklv_enable_simple(ads,
>>>> -
>>>> GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
>>>> -                    &offset, &remain);
>>>> +        xe_guc_klv_enable_simple(ads_to_guc(ads), ads_to_map(ads),
>>>> +
>>>> GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
>>>> +                     &offset, &remain);
>>>>          if (XE_WA(gt, 14022866841))
>>>> -        guc_waklv_enable_simple(ads,
>>>> -                    GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO,
>>>> -                    &offset, &remain);
>>>> +        xe_guc_klv_enable_simple(ads_to_guc(ads), ads_to_map(ads),
>>>> +                     GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO,
>>>> +                     &offset, &remain);
>>>>          /*
>>>>         * On RC6 exit, GuC will write register 0xB04 with the default
>>>> value provided. As of now,
>>>> @@ -366,15 +317,15 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
>>>>         * future, so GuC depends on KMD to send it the correct value.
>>>>         */
>>>>        if (XE_WA(gt, 13011645652))
>>>> -        guc_waklv_enable_one_word(ads,
>>>> -
>>>> GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE,
>>>> -                      0xC40,
>>>> -                      &offset, &remain);
>>>> +        xe_guc_klv_enable_one_word(ads_to_guc(ads), ads_to_map(ads),
>>>> +
>>>> GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE,
>>>> +                       0xC40,
>>>> +                       &offset, &remain);
>>>>          if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406))
>>>> -        guc_waklv_enable_simple(ads,
>>>> -
>>>> GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET,
>>>> -                    &offset, &remain);
>>>> +        xe_guc_klv_enable_simple(ads_to_guc(ads), ads_to_map(ads),
>>>> +
>>>> GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET,
>>>> +                     &offset, &remain);
>>>>          size = guc_ads_waklv_size(ads) - remain;
>>>>        if (!size)
>>>> diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/
>>>> drm/xe/xe_guc_klv_helpers.c
>>>> index 146a6eda9e06..50eab2086ee3 100644
>>>> --- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
>>>> +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
>>>> @@ -7,8 +7,11 @@
>>>>    #include <drm/drm_print.h>
>>>>      #include "abi/guc_klvs_abi.h"
>>>> +#include "xe_gt_printk.h"
>>>> +#include "xe_guc.h"
>>>>    #include "xe_guc_klv_helpers.h"
>>>>    #include "xe_guc_klv_thresholds_set.h"
>>>> +#include "xe_map.h"
>>>>      #define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
>>>>    @@ -146,3 +149,62 @@ int xe_guc_klv_count(const u32 *klvs, u32
>>>> num_dwords)
>>>>          return num_dwords ? -ENODATA : num_klvs;
>>>>    }
>>>> +
>>>> +static void emit_klv(struct xe_guc *guc, struct iosys_map *map, u32
>>>> *klv_entry,
>>>> +             u32 size, u32 *offset, u32 *remain)
>>> s/klv_entry/klv
>>>
>>> s/size/num_dwords
>>>
>>> KLVs are always u32 aligned
>>> 'size/offset/remain' all should be in dwords
>> That would mean having to convert back and forth between u8 and u32:
>>
>> - allocation is in u8 aligned
>> - params to this function would be u32 aligned
> but for generic KLV functions it doesn't make sense to pass u8 sizes
> and we should aim to define function signatures that enforce proper logic
>
>> - xe_map_memcpy_to below needs u8 alignment, so convert back
>> - the GuC expects a u8 aligned size, so need to convert remain back to u8
> that looks like a mistake done in this action definition, since all
> initial actions that introduced KLV concept (like UPDATE_VGT_POLICY,
> UPDATE_VF_CFG) were defined to operate on dwords as size

This is an ADS entry and all ADS blocks are offset/size. The new H2G 
command (in next patch) is indeed dword aligned, but it still seems 
simpler to just have a single "offset / 4" that multiple conversions 
back and forth.

>
>> Just using u8 everywhere seems simpler instead of converting back and
>> forth.
>>
>>> this will also match rest of functions in this file
>>>
>>>> +{
>>>> +    if (xe_gt_WARN(guc_to_gt(guc), *remain < size,
>>> do we need xe_gt_WARN in production?
>>> maybe xe_gt_assert will suffice?
>> I've kept this the same as what was in the original function. Not sure
>> why it was xe_gt_WARN instead of assert.
>>
>>>> +               "klv buffer too small to add klv id 0x%04x\n",
>>> s/klv/KLV
>>>
>>>> +               FIELD_GET(GUC_KLV_0_KEY, klv_entry[0])))
>>>> +        return;
>>>> +
>>>> +    xe_map_memcpy_to(guc_to_xe(guc), map, *offset, klv_entry, size);
>>>> +    *offset += size;
>>>> +    *remain -= size;
>>>> +}
>>>> +
>>>> +/**
>>>> + * xe_guc_klv_enable_simple - Emits a KLV with no data to an iosys
>>>> mapped buffer
>>>> + * @guc: the guc structure
>>>> + * @map: the iosys_map to write to
>>>> + * @klv_id: the KLV to enable
>>> key ? there is no Len nor Value
>>>
>>> this will follow other functions in this file
>> ok, will rename.
>>
>>>> + * @offset: pointer to a variable holding the offset to write to
>>>> + * @remain: pointer to a variable holding the remaining writing
>>>> space available
>>>> + *
>>>> + * The function checks if there is enough space to emit a KLV with
>>>> no data and
>>>> + * if so writes it to memory. After the write, the offset and remain
>>>> variables
>>>> + * are respectively increased and decreased of the written size to
>>>> be ready for
>>>> + * the next emission.
>>>> + */
>>>> +void xe_guc_klv_enable_simple(struct xe_guc *guc, struct iosys_map
>>>> *map,
>>>> +                  u16 klv_id, u32 *offset, u32 *remain)
>>>> +{
>>>> +    u32 klv_entry = PREP_GUC_KLV(klv_id, 0);
>>>      u32 klv[] = { PREP_GUC_KLV(key, 0) };
>>>
>>>      emit(... ARRAY_SIZE(klv)
>> Why? this function is specifically targeted to writing a single u32, why
>> write it as a u32[1]?
>>
>>>> +
>>>> +    emit_klv(guc, map, &klv_entry, sizeof(klv_entry), offset, remain);
>>>> +}
>>>> +
>>>> +/**
>>>> + * xe_guc_klv_enable_one_word - Emits a KLV with 1 DW data to an
>>>> iosys mapped buffer
>>>> + * @guc: the guc structure
>>>> + * @map: the iosys_map to write to
>>>> + * @klv_id: the KLV to enable
>>> s/klv_id/key
>>>
>>>> + * @value: the data associated with the KLV
>>>> + * @offset: pointer to a variable holding the offset to write to
>>>> + * @remain: pointer to a variable holding the remaining writing
>>>> space available
>>>> + *
>>>> + * The function checks if there is enough space to emit a KLV with 1
>>>> DW data and
>>>> + * if so writes it to memory. After the write, the offset and remain
>>>> variables
>>>> + * are respectively increased and decreased of the written size to
>>>> be ready for
>>>> + * the next emission.
>>>> + */
>>>> +void xe_guc_klv_enable_one_word(struct xe_guc *guc, struct iosys_map
>>>> *map,
>>>> +                u16 klv_id, u32 value, u32 *offset, u32 *remain)
>>>> +{
>>>> +    u32 klv_entry[] = {
>>>> +        PREP_GUC_KLV(klv_id, 1),
>>>> +        value,
>>>> +    };
>>>> +
>>>> +    emit_klv(guc, map, klv_entry, sizeof(klv_entry), offset, remain);
>>>> +}
>>>> diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h b/drivers/gpu/
>>>> drm/xe/xe_guc_klv_helpers.h
>>>> index c676d21c173b..231f7c4b0947 100644
>>>> --- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h
>>>> +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h
>>>> @@ -10,6 +10,8 @@
>>>>    #include <linux/types.h>
>>>>      struct drm_printer;
>>>> +struct iosys_map;
>>>> +struct xe_guc;
>>>>      const char *xe_guc_klv_key_to_string(u16 key);
>>>>    @@ -61,4 +63,9 @@ int xe_guc_klv_count(const u32 *klvs, u32
>>>> num_dwords);
>>>>    #define PREP_GUC_KLV_TAG(TAG) \
>>>>        PREP_GUC_KLV_CONST(MAKE_GUC_KLV_KEY(TAG), MAKE_GUC_KLV_LEN(TAG))
>>>>    +void xe_guc_klv_enable_simple(struct xe_guc *guc, struct iosys_map
>>>> *map,
>>>> +                  u16 klv_id, u32 *offset, u32 *remain);
>>>> +void xe_guc_klv_enable_one_word(struct xe_guc *guc, struct iosys_map
>>>> *map,
>>>> +                u16 klv_id, u32 value, u32 *offset, u32 *remain);
>>> hmm, "enable" does not fit here, maybe:
>>>
>>> xe_guc_klv_emit_empty / no_data / no_value
>>> xe_guc_klv_emit_dword
>> emit_nodata should work.
>>
>>> and those should be inlines that use:
>>>
>>> void xe_guc_klv_emit_simple(
>>>      struct xe_guc *guc,
>>>      struct iosys_map *map,
>>>      u16 key, u16 len, const u32 *klv,
>>>      u32 *offset, u32 *remain);
>>>
>>> like:
>>>
>>> inline xe_guc_klv_emit_simple_empty(.. key ..)
>>> {
>>>      xe_guc_klv_emit_simple(.. key, 0, NULL, ..);
>>> }
>>>
>>>
>>> inline xe_guc_klv_emit_simple_dword(.. key, value ..)
>>> {
>>>      xe_guc_klv_emit_simple(.. key, 1, &value, ..);
>>> }
>> This seems to just make things harder. xe_guc_klv_emit_simple() would
>> need to handle the case where we have data differently from the case
>> where we don't, potentially with 2 separate memcpys (one always done for
> this isn't on the hot path so I would prefer avoid code duplication over
> perf optimization

This isn't about a perf optimization, it is about keeping the code 
simple. The only duplication is between:

u32 klv_entry = PREP_GUC_KLV(klv_id, 0);

and

u32 klv_entry[] = {
         PREP_GUC_KLV(klv_id, 1),
         value,
};

Which is minimal, but it keeps the common code much simpler as it 
doesn't have to handle optional parameters.

>
>> the key and one optional for the data). I really don't see the benefit
>> when the current approach still makes most of the code common in
>> emit_klv() and only has separate arrays. Also, that wouldn't be a
>> "simple" emission anymore since it'd handle all the possible cases, so
>> the naming also wouldn't fit.
> but the goal is to have helper function to 'emit KLVs', so the 'simple'
> here means help in building and emit of the proper KLV-header based on
> the provided key/len params, followed by emit of value (as dwords)

The "simple" here was in reference to the fact that there was no data, 
as compared against the emit_dword.

>
> and while your current case is only KLV with 0 or 1 dword, nothing
> prevents us from implementing generic helper, and satisfying your need
> with trivial wrappers for 0/1 case

I'll give it a try and see how it looks, but it really seems like we're 
over-engineering this by introducing an helper that can handle cases 
that we don't have.

Daniele

>
>> Daniele
>>
>>>> +
>>>>    #endif