[PATCH V1 7/9] accel: amd-ai-engine: Add support for AIEML devices

Gregory Williams gregory.williams at amd.com
Wed Jul 2 15:56:28 UTC 2025


Adds driver support for AIEML generation devices. The following modules
are enabled:
- Get tile type from location (support for new memory tile type)
- Clock state tracking and request and release of tiles

Signed-off-by: Gregory Williams <gregory.williams at amd.com>
---
 drivers/accel/amd-ai-engine/Makefile          |   1 +
 drivers/accel/amd-ai-engine/ai-engine-aieml.c | 210 ++++++++++++++++++
 drivers/accel/amd-ai-engine/ai-engine-dev.c   |   2 +
 .../accel/amd-ai-engine/ai-engine-internal.h  |   2 +
 4 files changed, 215 insertions(+)
 create mode 100644 drivers/accel/amd-ai-engine/ai-engine-aieml.c

diff --git a/drivers/accel/amd-ai-engine/Makefile b/drivers/accel/amd-ai-engine/Makefile
index 9a830f7432d2..66cbce4705ea 100644
--- a/drivers/accel/amd-ai-engine/Makefile
+++ b/drivers/accel/amd-ai-engine/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_DRM_ACCEL_AMDAIE)	+= amd-aie.o
 
 amd-aie-$(CONFIG_DRM_ACCEL_AMDAIE) := \
 	ai-engine-aie.o		\
+	ai-engine-aieml.o	\
 	ai-engine-aperture.o	\
 	ai-engine-clock.o	\
 	ai-engine-dev.o		\
diff --git a/drivers/accel/amd-ai-engine/ai-engine-aieml.c b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
new file mode 100644
index 000000000000..328688942a6a
--- /dev/null
+++ b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AI Engine driver AIEML device specific implementation
+ *
+ * Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#include <linux/amd-ai-engine.h>
+#include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/io.h>
+
+#include "ai-engine-internal.h"
+
+#define AIEML_ARRAY_SHIFT	32U
+#define AIEML_COL_SHIFT		25U
+#define AIEML_ROW_SHIFT		20U
+
+#define NUM_TYPES_OF_MEM	3U
+
+#define NUM_MODS_CORE_TILE	2U
+#define NUM_MODS_MEM_TILE	1U
+#define NUM_MODS_SHIMPL_TILE	1U
+
+/*
+ * Register offsets
+ */
+#define AIEML_SHIMPL_COLCLOCK_CTRL_REGOFF		0x000fff20U
+
+/*
+ * Register masks
+ */
+#define AIEML_SHIMPL_COLRESET_CTRL_MASK			GENMASK(1, 0)
+#define AIEML_SHIMPL_COLCLOCK_CTRL_MASK			GENMASK(1, 0)
+
+static u32 aieml_get_tile_type(struct aie_device *adev,
+			       struct aie_location *loc)
+{
+	u8 num_mem_rows = adev->ttype_attr[AIE_TILE_TYPE_MEMORY].num_rows;
+
+	if (loc->row > num_mem_rows)
+		return AIE_TILE_TYPE_TILE;
+	if (loc->row && loc->row <= num_mem_rows)
+		return AIE_TILE_TYPE_MEMORY;
+	if (loc->row == 0)
+		if ((loc->col % 4) < 2)
+			return AIE_TILE_TYPE_SHIMPL;
+
+	return AIE_TILE_TYPE_SHIMNOC;
+}
+
+/* aieml_scan_part_clocks() - scan clocks of a partition
+ * @apart: AI engine partition
+ *
+ * Return: 0 for success, negative value for errors.
+ */
+static int aieml_scan_part_clocks(struct aie_partition *apart)
+{
+	struct aie_aperture *aperture = apart->aperture;
+	struct aie_range *range = &apart->range;
+	struct aie_device *adev = apart->adev;
+	struct aie_location loc;
+	int ret;
+
+	/* Clear the bitmap of cores and memories clock state */
+	aie_resource_put_region(&apart->cores_clk_state, 0,
+				apart->cores_clk_state.total);
+
+	/*
+	 * In aieml if clock buffer on shim tile is enabled, the clock for all
+	 * tiles in the same column is enabled.
+	 */
+	for (loc.col = range->start.col;
+	     loc.col < range->start.col + range->size.col;
+	     loc.col++) {
+		void __iomem *va;
+		u32 val, nbitpos;
+
+		nbitpos = (loc.col - range->start.col) * (range->size.row - 1);
+
+		va = aperture->base +
+		     aie_cal_regoff(adev, loc,
+				    AIEML_SHIMPL_COLCLOCK_CTRL_REGOFF);
+		val = readl(va);
+
+		if (!(val & AIEML_SHIMPL_COLCLOCK_CTRL_MASK))
+			continue;
+
+		ret = aie_resource_set(&apart->cores_clk_state, nbitpos,
+				       range->size.row - 1);
+		if (ret) {
+			dev_err(aperture->dev,
+				"failed to set clock state bitmaps for column %u",
+				loc.col);
+			return ret;
+		}
+	}
+	/*
+	 * Set the tiles in use bitmap.
+	 * In case of scanning, tiles which are powered on are considered as
+	 * tiles in use.
+	 */
+	bitmap_copy(apart->tiles_inuse.bitmap, apart->cores_clk_state.bitmap,
+		    apart->tiles_inuse.total);
+
+	return 0;
+}
+
+/* aieml_set_part_clocks() - set clocks of a partition
+ * @apart: AI engine partition
+ *
+ * Return: 0 for success, negative value for errors.
+ */
+static int aieml_set_part_clocks(struct aie_partition *apart)
+{
+	struct aie_aperture *aperture = apart->aperture;
+	struct aie_range *range = &apart->range;
+	u32 node_id = apart->adev->pm_node_id;
+	struct aie_location loc;
+	int ret;
+
+	for (loc.col = range->start.col;
+	     loc.col < range->start.col + range->size.col;
+	     loc.col++) {
+		u32 startbit, col_inuse = 0;
+
+		startbit = (loc.col - range->start.col) * (range->size.row - 1);
+
+		for (loc.row = range->start.row + 1;
+		     loc.row < range->start.row + range->size.row;
+		     loc.row++) {
+			u32 nbitpos = startbit + loc.row - 1;
+
+			if (aie_resource_testbit(&apart->tiles_inuse, nbitpos)) {
+				col_inuse = 1;
+				break;
+			}
+		}
+
+		if (col_inuse) {
+			ret = zynqmp_pm_aie_operation(node_id, loc.col,
+						      1,
+						      XILINX_AIE_OPS_ENB_COL_CLK_BUFF);
+			if (ret < 0) {
+				dev_err(aperture->dev,
+					"failed to enable clock for column: %d",
+					loc.col);
+				return ret;
+			}
+
+			ret = aie_resource_set(&apart->tiles_inuse,
+					       startbit, apart->range.size.row - 1) |
+			      aie_resource_set(&apart->cores_clk_state,
+					       startbit, apart->range.size.row - 1);
+			if (ret) {
+				dev_err(aperture->dev,
+					"failed to set bitmaps for column: %d",
+					loc.col);
+				return ret;
+			}
+		} else {
+			ret = zynqmp_pm_aie_operation(node_id, loc.col,
+						      1,
+						      XILINX_AIE_OPS_DIS_COL_CLK_BUFF);
+			if (ret < 0) {
+				dev_err(aperture->dev,
+					"failed to disable clock for column: %d",
+					loc.col);
+				return ret;
+			}
+
+			ret = aie_resource_clear(&apart->tiles_inuse,
+						 startbit, apart->range.size.row - 1) |
+			      aie_resource_clear(&apart->cores_clk_state,
+						 startbit, apart->range.size.row - 1);
+			if (ret) {
+				dev_err(aperture->dev,
+					"failed to clear bitmaps for column: %d",
+					loc.col);
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static const struct aie_tile_operations aieml_ops = {
+	.get_tile_type = aieml_get_tile_type,
+	.scan_part_clocks = aieml_scan_part_clocks,
+	.set_part_clocks = aieml_set_part_clocks,
+};
+
+/**
+ * aieml_device_init() - Initialize AI engine device struct AIEML specific
+ * @adev: AI engine device
+ *
+ * This function initialize the AI engine device structure device version
+ * specific elements such as register addressing related array shift,
+ * column shift, and row shift; AIEML device specific device operations, device
+ * columns resource.
+ */
+void aieml_device_init(struct aie_device *adev)
+{
+	adev->array_shift = AIEML_ARRAY_SHIFT;
+	adev->col_shift = AIEML_COL_SHIFT;
+	adev->row_shift = AIEML_ROW_SHIFT;
+	adev->ops = &aieml_ops;
+}
diff --git a/drivers/accel/amd-ai-engine/ai-engine-dev.c b/drivers/accel/amd-ai-engine/ai-engine-dev.c
index ba28257cbd04..f713d38ff8c3 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-dev.c
+++ b/drivers/accel/amd-ai-engine/ai-engine-dev.c
@@ -154,6 +154,8 @@ static int amd_ai_engine_probe(struct platform_device *pdev)
 	adev->dev_gen = aie_gen;
 	if (aie_gen == AIE_DEVICE_GEN_AIE) {
 		aie_device_init(adev);
+	} else if (aie_gen == AIE_DEVICE_GEN_AIEML) {
+		aieml_device_init(adev);
 	} else {
 		dev_err(&pdev->dev, "Invalid device generation");
 		return -EINVAL;
diff --git a/drivers/accel/amd-ai-engine/ai-engine-internal.h b/drivers/accel/amd-ai-engine/ai-engine-internal.h
index 495d56d5f993..31a45575cc43 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-internal.h
+++ b/drivers/accel/amd-ai-engine/ai-engine-internal.h
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 
 #define AIE_DEVICE_GEN_AIE	1U
+#define AIE_DEVICE_GEN_AIEML	2U
 
 #define KBYTES(n)		((n) * SZ_1K)
 
@@ -246,6 +247,7 @@ static inline u32 aie_cal_regoff(struct aie_device *adev,
 }
 
 void aie_device_init(struct aie_device *adev);
+void aieml_device_init(struct aie_device *adev);
 struct aie_partition *
 aie_aperture_request_part(struct aie_aperture *aperture,
 			  struct aie_partition_req *req);
-- 
2.34.1



More information about the dri-devel mailing list