[PATCH V1 7/9] accel: amd-ai-engine: Add support for AIEML devices
Gregory Williams
gregory.williams at amd.com
Wed Jul 2 15:56:28 UTC 2025
Adds driver support for AIEML generation devices. The following modules
are enabled:
- Get tile type from location (support for new memory tile type)
- Clock state tracking and request and release of tiles
Signed-off-by: Gregory Williams <gregory.williams at amd.com>
---
drivers/accel/amd-ai-engine/Makefile | 1 +
drivers/accel/amd-ai-engine/ai-engine-aieml.c | 210 ++++++++++++++++++
drivers/accel/amd-ai-engine/ai-engine-dev.c | 2 +
.../accel/amd-ai-engine/ai-engine-internal.h | 2 +
4 files changed, 215 insertions(+)
create mode 100644 drivers/accel/amd-ai-engine/ai-engine-aieml.c
diff --git a/drivers/accel/amd-ai-engine/Makefile b/drivers/accel/amd-ai-engine/Makefile
index 9a830f7432d2..66cbce4705ea 100644
--- a/drivers/accel/amd-ai-engine/Makefile
+++ b/drivers/accel/amd-ai-engine/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_DRM_ACCEL_AMDAIE) += amd-aie.o
amd-aie-$(CONFIG_DRM_ACCEL_AMDAIE) := \
ai-engine-aie.o \
+ ai-engine-aieml.o \
ai-engine-aperture.o \
ai-engine-clock.o \
ai-engine-dev.o \
diff --git a/drivers/accel/amd-ai-engine/ai-engine-aieml.c b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
new file mode 100644
index 000000000000..328688942a6a
--- /dev/null
+++ b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AI Engine driver AIEML device specific implementation
+ *
+ * Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#include <linux/amd-ai-engine.h>
+#include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/io.h>
+
+#include "ai-engine-internal.h"
+
+#define AIEML_ARRAY_SHIFT 32U
+#define AIEML_COL_SHIFT 25U
+#define AIEML_ROW_SHIFT 20U
+
+#define NUM_TYPES_OF_MEM 3U
+
+#define NUM_MODS_CORE_TILE 2U
+#define NUM_MODS_MEM_TILE 1U
+#define NUM_MODS_SHIMPL_TILE 1U
+
+/*
+ * Register offsets
+ */
+#define AIEML_SHIMPL_COLCLOCK_CTRL_REGOFF 0x000fff20U
+
+/*
+ * Register masks
+ */
+#define AIEML_SHIMPL_COLRESET_CTRL_MASK GENMASK(1, 0)
+#define AIEML_SHIMPL_COLCLOCK_CTRL_MASK GENMASK(1, 0)
+
+static u32 aieml_get_tile_type(struct aie_device *adev,
+ struct aie_location *loc)
+{
+ u8 num_mem_rows = adev->ttype_attr[AIE_TILE_TYPE_MEMORY].num_rows;
+
+ if (loc->row > num_mem_rows)
+ return AIE_TILE_TYPE_TILE;
+ if (loc->row && loc->row <= num_mem_rows)
+ return AIE_TILE_TYPE_MEMORY;
+ if (loc->row == 0)
+ if ((loc->col % 4) < 2)
+ return AIE_TILE_TYPE_SHIMPL;
+
+ return AIE_TILE_TYPE_SHIMNOC;
+}
+
+/* aieml_scan_part_clocks() - scan clocks of a partition
+ * @apart: AI engine partition
+ *
+ * Return: 0 for success, negative value for errors.
+ */
+static int aieml_scan_part_clocks(struct aie_partition *apart)
+{
+ struct aie_aperture *aperture = apart->aperture;
+ struct aie_range *range = &apart->range;
+ struct aie_device *adev = apart->adev;
+ struct aie_location loc;
+ int ret;
+
+ /* Clear the bitmap of cores and memories clock state */
+ aie_resource_put_region(&apart->cores_clk_state, 0,
+ apart->cores_clk_state.total);
+
+ /*
+ * In aieml if clock buffer on shim tile is enabled, the clock for all
+ * tiles in the same column is enabled.
+ */
+ for (loc.col = range->start.col;
+ loc.col < range->start.col + range->size.col;
+ loc.col++) {
+ void __iomem *va;
+ u32 val, nbitpos;
+
+ nbitpos = (loc.col - range->start.col) * (range->size.row - 1);
+
+ va = aperture->base +
+ aie_cal_regoff(adev, loc,
+ AIEML_SHIMPL_COLCLOCK_CTRL_REGOFF);
+ val = readl(va);
+
+ if (!(val & AIEML_SHIMPL_COLCLOCK_CTRL_MASK))
+ continue;
+
+ ret = aie_resource_set(&apart->cores_clk_state, nbitpos,
+ range->size.row - 1);
+ if (ret) {
+ dev_err(aperture->dev,
+ "failed to set clock state bitmaps for column %u",
+ loc.col);
+ return ret;
+ }
+ }
+ /*
+ * Set the tiles in use bitmap.
+ * In case of scanning, tiles which are powered on are considered as
+ * tiles in use.
+ */
+ bitmap_copy(apart->tiles_inuse.bitmap, apart->cores_clk_state.bitmap,
+ apart->tiles_inuse.total);
+
+ return 0;
+}
+
+/* aieml_set_part_clocks() - set clocks of a partition
+ * @apart: AI engine partition
+ *
+ * Return: 0 for success, negative value for errors.
+ */
+static int aieml_set_part_clocks(struct aie_partition *apart)
+{
+ struct aie_aperture *aperture = apart->aperture;
+ struct aie_range *range = &apart->range;
+ u32 node_id = apart->adev->pm_node_id;
+ struct aie_location loc;
+ int ret;
+
+ for (loc.col = range->start.col;
+ loc.col < range->start.col + range->size.col;
+ loc.col++) {
+ u32 startbit, col_inuse = 0;
+
+ startbit = (loc.col - range->start.col) * (range->size.row - 1);
+
+ for (loc.row = range->start.row + 1;
+ loc.row < range->start.row + range->size.row;
+ loc.row++) {
+ u32 nbitpos = startbit + loc.row - 1;
+
+ if (aie_resource_testbit(&apart->tiles_inuse, nbitpos)) {
+ col_inuse = 1;
+ break;
+ }
+ }
+
+ if (col_inuse) {
+ ret = zynqmp_pm_aie_operation(node_id, loc.col,
+ 1,
+ XILINX_AIE_OPS_ENB_COL_CLK_BUFF);
+ if (ret < 0) {
+ dev_err(aperture->dev,
+ "failed to enable clock for column: %d",
+ loc.col);
+ return ret;
+ }
+
+ ret = aie_resource_set(&apart->tiles_inuse,
+ startbit, apart->range.size.row - 1) |
+ aie_resource_set(&apart->cores_clk_state,
+ startbit, apart->range.size.row - 1);
+ if (ret) {
+ dev_err(aperture->dev,
+ "failed to set bitmaps for column: %d",
+ loc.col);
+ return ret;
+ }
+ } else {
+ ret = zynqmp_pm_aie_operation(node_id, loc.col,
+ 1,
+ XILINX_AIE_OPS_DIS_COL_CLK_BUFF);
+ if (ret < 0) {
+ dev_err(aperture->dev,
+ "failed to disable clock for column: %d",
+ loc.col);
+ return ret;
+ }
+
+ ret = aie_resource_clear(&apart->tiles_inuse,
+ startbit, apart->range.size.row - 1) |
+ aie_resource_clear(&apart->cores_clk_state,
+ startbit, apart->range.size.row - 1);
+ if (ret) {
+ dev_err(aperture->dev,
+ "failed to clear bitmaps for column: %d",
+ loc.col);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static const struct aie_tile_operations aieml_ops = {
+ .get_tile_type = aieml_get_tile_type,
+ .scan_part_clocks = aieml_scan_part_clocks,
+ .set_part_clocks = aieml_set_part_clocks,
+};
+
+/**
+ * aieml_device_init() - Initialize AI engine device struct AIEML specific
+ * @adev: AI engine device
+ *
+ * This function initialize the AI engine device structure device version
+ * specific elements such as register addressing related array shift,
+ * column shift, and row shift; AIEML device specific device operations, device
+ * columns resource.
+ */
+void aieml_device_init(struct aie_device *adev)
+{
+ adev->array_shift = AIEML_ARRAY_SHIFT;
+ adev->col_shift = AIEML_COL_SHIFT;
+ adev->row_shift = AIEML_ROW_SHIFT;
+ adev->ops = &aieml_ops;
+}
diff --git a/drivers/accel/amd-ai-engine/ai-engine-dev.c b/drivers/accel/amd-ai-engine/ai-engine-dev.c
index ba28257cbd04..f713d38ff8c3 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-dev.c
+++ b/drivers/accel/amd-ai-engine/ai-engine-dev.c
@@ -154,6 +154,8 @@ static int amd_ai_engine_probe(struct platform_device *pdev)
adev->dev_gen = aie_gen;
if (aie_gen == AIE_DEVICE_GEN_AIE) {
aie_device_init(adev);
+ } else if (aie_gen == AIE_DEVICE_GEN_AIEML) {
+ aieml_device_init(adev);
} else {
dev_err(&pdev->dev, "Invalid device generation");
return -EINVAL;
diff --git a/drivers/accel/amd-ai-engine/ai-engine-internal.h b/drivers/accel/amd-ai-engine/ai-engine-internal.h
index 495d56d5f993..31a45575cc43 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-internal.h
+++ b/drivers/accel/amd-ai-engine/ai-engine-internal.h
@@ -19,6 +19,7 @@
#include <linux/platform_device.h>
#define AIE_DEVICE_GEN_AIE 1U
+#define AIE_DEVICE_GEN_AIEML 2U
#define KBYTES(n) ((n) * SZ_1K)
@@ -246,6 +247,7 @@ static inline u32 aie_cal_regoff(struct aie_device *adev,
}
void aie_device_init(struct aie_device *adev);
+void aieml_device_init(struct aie_device *adev);
struct aie_partition *
aie_aperture_request_part(struct aie_aperture *aperture,
struct aie_partition_req *req);
--
2.34.1
More information about the dri-devel
mailing list