[PATCH v3 01/13] drm/xe/hw_engine_group: Introduce xe_hw_engine_group

Matt Roper matthew.d.roper at intel.com
Wed Jul 31 22:35:07 UTC 2024


On Wed, Jul 31, 2024 at 04:21:35PM +0200, Francois Dugast wrote:
> A xe_hw_engine_group is a group of hw engines. Two hw engines belong to
> the same xe_hw_engine_group if one hw engine cannot make progress while
> the other is stuck on a page fault.
> 
> Typically, hw engines of the same group share some resources such as EUs.
> This depends on the hardware configuration of the platforms.

How do we determine what the shared resources are for a given platform?
Are there bspec references that we should be checking to figure out what
does/doesn't need to be grouped on a given platform?

The EUs shared by the RCS+CCS engines are the obvious ones, but is it
really true that BCS engines can't make progress while another engine of
the same type is faulting?  Wouldn't that be a huge problem (since a
fault on one BCS engine usually requires that we utilize a different BCS
engine for the migration operations to resolve the fault)?  Or am I not
understanding what the groupings represent?

For VCS/VECS, is the shared resource we're concerned about the SFC?  If
so, then I'd expect there to be a separate group per SCMI / media slice
since each one has its own dedicated SFC (and sometimes some of the
engines aren't even tied to the SFC at all).  Throwing all the media
engines across all the SCMI's into a single group seems like it would be
overkill unless there's some other resource that we're concerned about,
so it would be good to have some more details in the commit message here.


Matt

> 
> v2: Move to own files, improve error handling (Matt Brost)
> 
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> ---
>  drivers/gpu/drm/xe/Makefile                   |  1 +
>  drivers/gpu/drm/xe/xe_hw_engine.c             |  4 +
>  drivers/gpu/drm/xe/xe_hw_engine_group.c       | 99 +++++++++++++++++++
>  drivers/gpu/drm/xe/xe_hw_engine_group.h       | 16 +++
>  drivers/gpu/drm/xe/xe_hw_engine_group_types.h | 48 +++++++++
>  drivers/gpu/drm/xe/xe_hw_engine_types.h       |  2 +
>  6 files changed, 170 insertions(+)
>  create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group.c
>  create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group.h
>  create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group_types.h
> 
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index 1ff9602a52f6..b67ace7ed204 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -69,6 +69,7 @@ xe-y += xe_bb.o \
>  	xe_heci_gsc.o \
>  	xe_hw_engine.o \
>  	xe_hw_engine_class_sysfs.o \
> +	xe_hw_engine_group.o \
>  	xe_hw_fence.o \
>  	xe_huc.o \
>  	xe_huc_debugfs.o \
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 00ace5fcc284..f62992d0497a 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -23,6 +23,7 @@
>  #include "xe_gt_printk.h"
>  #include "xe_gt_mcr.h"
>  #include "xe_gt_topology.h"
> +#include "xe_hw_engine_group.h"
>  #include "xe_hw_fence.h"
>  #include "xe_irq.h"
>  #include "xe_lrc.h"
> @@ -764,6 +765,9 @@ int xe_hw_engines_init(struct xe_gt *gt)
>  	}
>  
>  	hw_engine_setup_logical_mapping(gt);
> +	err = xe_hw_engine_setup_groups(gt);
> +	if (err)
> +		return err;
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> new file mode 100644
> index 000000000000..7fd10f0780ea
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> @@ -0,0 +1,99 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +#include <drm/drm_managed.h>
> +
> +#include "xe_device.h"
> +#include "xe_gt.h"
> +#include "xe_hw_engine_group.h"
> +
> +static void
> +hw_engine_group_free(struct drm_device *drm, void *arg)
> +{
> +	struct xe_hw_engine_group *group = arg;
> +
> +	kfree(group);
> +}
> +
> +static struct xe_hw_engine_group *
> +hw_engine_group_alloc(struct xe_device *xe)
> +{
> +	struct xe_hw_engine_group *group;
> +	int err;
> +
> +	group = kzalloc(sizeof(*group), GFP_KERNEL);
> +	if (!group)
> +		return ERR_PTR(-ENOMEM);
> +
> +	init_rwsem(&group->mode_sem);
> +	INIT_LIST_HEAD(&group->exec_queue_list);
> +
> +	err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group);
> +	if (err)
> +		return ERR_PTR(err);
> +
> +	return group;
> +}
> +
> +/**
> + * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt
> + * @gt: The gt for which groups are setup
> + *
> + * Return: 0 on success, negative error code on error.
> + */
> +int xe_hw_engine_setup_groups(struct xe_gt *gt)
> +{
> +	struct xe_hw_engine *hwe;
> +	enum xe_hw_engine_id id;
> +	struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
> +	struct xe_device *xe = gt_to_xe(gt);
> +	int err;
> +
> +	group_rcs_ccs = hw_engine_group_alloc(xe);
> +	if (IS_ERR(group_rcs_ccs)) {
> +		err = PTR_ERR(group_rcs_ccs);
> +		goto err_group_rcs_ccs;
> +	}
> +
> +	group_bcs = hw_engine_group_alloc(xe);
> +	if (IS_ERR(group_bcs)) {
> +		err = PTR_ERR(group_bcs);
> +		goto err_group_bcs;
> +	}
> +
> +	group_vcs_vecs = hw_engine_group_alloc(xe);
> +	if (IS_ERR(group_vcs_vecs)) {
> +		err = PTR_ERR(group_vcs_vecs);
> +		goto err_group_vcs_vecs;
> +	}
> +
> +	for_each_hw_engine(hwe, gt, id) {
> +		switch (hwe->class) {
> +		case XE_ENGINE_CLASS_COPY:
> +			hwe->hw_engine_group = group_bcs;
> +			break;
> +		case XE_ENGINE_CLASS_RENDER:
> +		case XE_ENGINE_CLASS_COMPUTE:
> +			hwe->hw_engine_group = group_rcs_ccs;
> +			break;
> +		case XE_ENGINE_CLASS_VIDEO_DECODE:
> +		case XE_ENGINE_CLASS_VIDEO_ENHANCE:
> +			hwe->hw_engine_group = group_vcs_vecs;
> +			break;
> +		default:
> +		}
> +	}
> +
> +	return 0;
> +
> +err_group_vcs_vecs:
> +	kfree(group_vcs_vecs);
> +err_group_bcs:
> +	kfree(group_bcs);
> +err_group_rcs_ccs:
> +	kfree(group_rcs_ccs);
> +
> +	return err;
> +}
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> new file mode 100644
> index 000000000000..c2648f87f7ef
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +#ifndef _XE_HW_ENGINE_GROUP_H_
> +#define _XE_HW_ENGINE_GROUP_H_
> +
> +#include "xe_hw_engine_group_types.h"
> +
> +struct drm_device;
> +struct xe_gt;
> +
> +int xe_hw_engine_setup_groups(struct xe_gt *gt);
> +
> +#endif
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h
> new file mode 100644
> index 000000000000..b828d85d24cb
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h
> @@ -0,0 +1,48 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +#ifndef _XE_HW_ENGINE_GROUP_TYPES_H_
> +#define _XE_HW_ENGINE_GROUP_TYPES_H_
> +
> +#include "xe_force_wake_types.h"
> +#include "xe_lrc_types.h"
> +#include "xe_reg_sr_types.h"
> +
> +/**
> + * enum xe_hw_engine_group_execution_mode - possible execution modes of a hw
> + * engine group
> + */
> +enum xe_hw_engine_group_execution_mode {
> +	EXEC_MODE_LR,
> +	EXEC_MODE_DMA_FENCE,
> +};
> +
> +/**
> + * struct xe_hw_engine_group - Hardware engine group
> + *
> + * hw engines belong to the same group if they share hardware resources in a way
> + * that prevents them from making progress when one is stuck on a page fault.
> + */
> +struct xe_hw_engine_group {
> +	/**
> +	 * @exec_queue_list: list of exec queues attached to this
> +	 * xe_hw_engine_group
> +	 */
> +	struct list_head exec_queue_list;
> +	/** @resume_work: worker to resume faulting LR exec queues */
> +	struct work_struct resume_work;
> +	/** @resume_wq: workqueue to resume faulting LR exec queues */
> +	struct workqueue_struct *resume_wq;
> +	/**
> +	 * @mode_sem: used to protect this group's hardware resources and ensure
> +	 * mutual exclusion between execution only in faulting LR mode and
> +	 * execution only in DMA_FENCE mode
> +	 */
> +	struct rw_semaphore mode_sem;
> +	/** @cur_mode: current execution mode of this hw engine group */
> +	enum xe_hw_engine_group_execution_mode cur_mode;
> +};
> +
> +#endif
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> index 70e6434f150d..39f24012d0f4 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> @@ -150,6 +150,8 @@ struct xe_hw_engine {
>  	struct xe_hw_engine_class_intf *eclass;
>  	/** @oa_unit: oa unit for this hw engine */
>  	struct xe_oa_unit *oa_unit;
> +	/** @hw_engine_group: the group of hw engines this one belongs to */
> +	struct xe_hw_engine_group *hw_engine_group;
>  };
>  
>  /**
> -- 
> 2.43.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the Intel-xe mailing list