[PATCH] drm/xe/guc: Stop reading masks from device memory when populating the ADS
John Harrison
john.c.harrison at intel.com
Thu Feb 13 22:50:54 UTC 2025
On 2/13/2025 13:40, Daniele Ceraolo Spurio wrote:
> If the device memory is corrupted during the suspend/resume flow, the
> masks might end up being random values and using them might lead us to
> trying to set values for engines that do not exist, which in turns might
> lead to invalid memory accesses.
Which is all bad and should be fixed, but that is not the only problems
we would get from a corrupted ADS blob. E.g. GuC itself is going to have
exactly the same problem given that this is where it gets its engine
masks from.
Is it worth adding some kind of check on the memory being valid? Add a
magic word somewhere unused and check that it is still correct? And if
not, fail the re-init with a meaningful error message.
>
> Given that the driver does know which engines are available, we can just
> calculate the masks instead of reading them out of memory.
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: John Harrison <John.C.Harrison at Intel.com>
> ---
> drivers/gpu/drm/xe/xe_guc_ads.c | 36 ++++++++++++---------------------
> 1 file changed, 13 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
> index fab259adc380..10e2ab5791b7 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ads.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ads.c
> @@ -129,9 +129,6 @@ struct __guc_ads_blob {
> #define info_map_write(xe_, map_, field_, val_) \
> xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
>
> -#define info_map_read(xe_, map_, field_) \
> - xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
> -
> static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
> {
> struct xe_device *xe = ads_to_xe(ads);
> @@ -493,13 +490,12 @@ static void fill_engine_enable_masks(struct xe_gt *gt,
> static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
> {
> struct xe_device *xe = ads_to_xe(ads);
> - struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
> - offsetof(struct __guc_ads_blob, system_info));
> - u8 guc_class;
> + u8 class;
>
> - for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
> - if (!info_map_read(xe, &info_map,
> - engine_enabled_masks[guc_class]))
> + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
> + u8 guc_class = xe_engine_class_to_guc_class(class);
> +
> + if (!engine_enable_mask(ads_to_gt(ads), class))
> continue;
>
> ads_blob_write(ads, ads.eng_state_size[guc_class],
> @@ -546,25 +542,24 @@ static void guc_mapping_table_init(struct xe_gt *gt,
> static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map,
> enum guc_capture_list_class_type capture_class)
> {
> - struct xe_device *xe = gt_to_xe(gt);
> u32 mask;
>
> switch (capture_class) {
> case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
> - mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
> - mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
> + mask = engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER);
> + mask |= engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE);
Seems odd to have '; mask |=' rather than just '|'.
Not a blocker, but given you are changing this anyway, might as well
clean it up?
John.
> break;
> case GUC_CAPTURE_LIST_CLASS_VIDEO:
> - mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
> + mask = engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE);
> break;
> case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
> - mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
> + mask = engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
> break;
> case GUC_CAPTURE_LIST_CLASS_BLITTER:
> - mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
> + mask = engine_enable_mask(gt, XE_ENGINE_CLASS_COPY);
> break;
> case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
> - mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
> + mask = engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER);
> break;
> default:
> mask = 0;
> @@ -907,8 +902,6 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
> {
> struct xe_device *xe = ads_to_xe(ads);
> struct xe_gt *gt = ads_to_gt(ads);
> - struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
> - offsetof(struct __guc_ads_blob, system_info));
> size_t total_size = 0, alloc_size, real_size;
> u32 addr_ggtt, offset;
> int class;
> @@ -917,12 +910,9 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
> addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
>
> for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
> - u8 guc_class;
> + u8 guc_class = xe_engine_class_to_guc_class(class);
>
> - guc_class = xe_engine_class_to_guc_class(class);
> -
> - if (!info_map_read(xe, &info_map,
> - engine_enabled_masks[guc_class]))
> + if (!engine_enable_mask(gt, class))
> continue;
>
> xe_gt_assert(gt, gt->default_lrc[class]);
More information about the Intel-xe
mailing list