[Intel-xe] [PATCH v3 3/3] drm/xe/pvc: Use fast copy engines as migrate engine on PVC

Thu Aug 24 03:49:30 UTC 2023

On Wed, Aug 23, 2023 at 04:40:40AM -0700, Niranjana Vishwanathapura wrote:
>Some copy hardware engine instances are faster than others on PVC.
>Use a virtual engine of these plus the reserved instance for the migrate
>engine on PVC. The idea being if a fast instance is available it will be
>used and the throughput of kernel copies, clears, and pagefault
>servicing will be higher.
>
>v2: Use OOB WA, use all copy engines if no WA is required
>
>Signed-off-by: Matthew Brost <matthew.brost at intel.com>
>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura at intel.com>
>---
> drivers/gpu/drm/xe/xe_migrate.c    | 39 +++++++++++++++++++++++++++---
> drivers/gpu/drm/xe/xe_wa_oob.rules |  1 +
> 2 files changed, 37 insertions(+), 3 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
>index 956a96b38346..60f4e0a60b71 100644
>--- a/drivers/gpu/drm/xe/xe_migrate.c
>+++ b/drivers/gpu/drm/xe/xe_migrate.c
>@@ -12,6 +12,7 @@
> #include <drm/ttm/ttm_tt.h>
> #include <drm/xe_drm.h>
>
>+#include "generated/xe_wa_oob.h"
> #include "regs/xe_gpu_commands.h"
> #include "tests/xe_test.h"
> #include "xe_bb.h"
>@@ -29,6 +30,7 @@
> #include "xe_sync.h"
> #include "xe_trace.h"
> #include "xe_vm.h"
>+#include "xe_wa.h"
>
> /**
>  * struct xe_migrate - migrate context.
>@@ -298,6 +300,36 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
> 	return 0;
> }
>
>+/**
>+ * xe_migrate_usm_logical_mask - Return logical mask for usm migrate engine.
>+ * @gt: gt instance
>+ *
>+ * Due to workaround 16017236439, odd instance hardware copy engines are
>+ * faster than even instance ones.
>+ * This function returns the mask involving all faster copy engines and the
>+ * reserved copy engine to be used as logical mask for migrate engine.
>+ * Including the reserved copy engine is required to avoid deadlocks due to
>+ * migrate jobs servicing the faults gets stuck behind the job that faulted.
>+ */
>+u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
>+{
>+	u32 logical_mask = 0;
>+	struct xe_hw_engine *hwe;
>+	enum xe_hw_engine_id id;
>+
>+	for_each_hw_engine(hwe, gt, id) {
>+		if (hwe->class != XE_ENGINE_CLASS_COPY)
>+			continue;
>+
>+		if (!XE_WA(gt, 16017236439) ||
>+		    xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
>+			logical_mask |= BIT(hwe->logical_instance);
>+	}
>+
>+	pr_info("NNV %s logical_mask 0x%x\n", __func__, logical_mask);
>+	return logical_mask;
>+}

Need to make this function static and remove the pr_info print statement.
Will do it in next revision along with addressing any other comments.

Niranjana

>+
> /**
>  * xe_migrate_init() - Initialize a migrate context
>  * @tile: Back-pointer to the tile we're initializing for.
>@@ -338,11 +370,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
> 							   XE_ENGINE_CLASS_COPY,
> 							   primary_gt->usm.reserved_bcs_instance,
> 							   false);
>-		if (!hwe)
>+		u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
>+
>+		if (!hwe || !logical_mask)
> 			return ERR_PTR(-EINVAL);
>
>-		m->q = xe_exec_queue_create(xe, vm,
>-					    BIT(hwe->logical_instance), 1,
>+		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1,
> 					    hwe, EXEC_QUEUE_FLAG_KERNEL);
> 	} else {
> 		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
>diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
>index ea90dcc933b5..599e67169dae 100644
>--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
>+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
>@@ -17,3 +17,4 @@
> 1409600907	GRAPHICS_VERSION_RANGE(1200, 1250)
> 14016763929	SUBPLATFORM(DG2, G10)
> 		SUBPLATFORM(DG2, G12)
>+16017236439	PLATFORM(PVC)
>-- 
>2.21.0.rc0.32.g243a4c7e27
>