[PATCH V1 8/9] accel: amd-ai-engine: Create tile memory information

Gregory Williams gregory.williams at amd.com
Wed Jul 2 15:56:29 UTC 2025


Creates tile memory information structure to store size and offsets for
core data and program memory and memory tile memory for AIEML.

Signed-off-by: Gregory Williams <gregory.williams at amd.com>
---
 drivers/accel/amd-ai-engine/ai-engine-aie.c   | 39 +++++++++
 drivers/accel/amd-ai-engine/ai-engine-aieml.c | 47 ++++++++++
 .../accel/amd-ai-engine/ai-engine-internal.h  | 85 +++++++++++++------
 drivers/accel/amd-ai-engine/ai-engine-part.c  | 45 ++++++++++
 4 files changed, 192 insertions(+), 24 deletions(-)

diff --git a/drivers/accel/amd-ai-engine/ai-engine-aie.c b/drivers/accel/amd-ai-engine/ai-engine-aie.c
index 5e3cb44a16c8..056db0b7be0e 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-aie.c
+++ b/drivers/accel/amd-ai-engine/ai-engine-aie.c
@@ -16,6 +16,8 @@
 #define AIE_COL_SHIFT		23U
 #define AIE_ROW_SHIFT		18U
 
+#define NUM_TYPES_OF_MEM	2U
+
 /*
  * Register offsets
  */
@@ -41,6 +43,42 @@ static u32 aie_get_tile_type(struct aie_device *adev, struct aie_location *loc)
 	return AIE_TILE_TYPE_SHIMNOC;
 }
 
+static unsigned int aie_get_mem_info(struct aie_device *adev,
+				     struct aie_range *range,
+				     struct aie_part_mem *pmem)
+{
+	u8 start_row, num_rows;
+	unsigned int i;
+
+	if (range->start.row + range->size.row <= 1) {
+		/* SHIM row only, no memories in this range */
+		return 0;
+	}
+	if (!pmem)
+		return NUM_TYPES_OF_MEM;
+
+	for (i = 0; i < NUM_TYPES_OF_MEM; i++) {
+		struct aie_mem *mem = &pmem[i].mem;
+
+		memcpy(&mem->range, range, sizeof(*range));
+	}
+
+	start_row = adev->ttype_attr[AIE_TILE_TYPE_TILE].start_row;
+	num_rows = adev->ttype_attr[AIE_TILE_TYPE_TILE].num_rows;
+	/* Setup tile data memory information */
+	pmem[0].mem.offset = 0;
+	pmem[0].mem.size = KBYTES(32);
+	pmem[0].mem.range.start.row = start_row;
+	pmem[0].mem.range.size.row = num_rows;
+	/* Setup program memory information */
+	pmem[1].mem.offset = 0x20000;
+	pmem[1].mem.size = KBYTES(16);
+	pmem[1].mem.range.start.row = start_row;
+	pmem[1].mem.range.size.row = num_rows;
+
+	return NUM_TYPES_OF_MEM;
+}
+
 /* aie_scan_part_clocks() - scan clocks of a partition
  * @apart: AI engine partition
  *
@@ -258,6 +296,7 @@ static int aie_set_part_clocks(struct aie_partition *apart)
 }
 static const struct aie_tile_operations aie_ops = {
 	.get_tile_type = aie_get_tile_type,
+	.get_mem_info = aie_get_mem_info,
 	.scan_part_clocks = aie_scan_part_clocks,
 	.set_part_clocks = aie_set_part_clocks,
 };
diff --git a/drivers/accel/amd-ai-engine/ai-engine-aieml.c b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
index 328688942a6a..7730609ff7c0 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-aieml.c
+++ b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
@@ -50,6 +50,52 @@ static u32 aieml_get_tile_type(struct aie_device *adev,
 	return AIE_TILE_TYPE_SHIMNOC;
 }
 
+static unsigned int aieml_get_mem_info(struct aie_device *adev,
+				       struct aie_range *range,
+				       struct aie_part_mem *pmem)
+{
+	u8 start_row, num_rows;
+	unsigned int i;
+
+	if (range->start.row + range->size.row <= 1) {
+		/* SHIM row only, no memories in this range */
+		return 0;
+	}
+
+	if (!pmem)
+		return NUM_TYPES_OF_MEM;
+
+	for (i = 0; i < NUM_TYPES_OF_MEM; i++) {
+		struct aie_mem *mem = &pmem[i].mem;
+
+		memcpy(&mem->range, range, sizeof(*range));
+	}
+
+	start_row = adev->ttype_attr[AIE_TILE_TYPE_TILE].start_row;
+	num_rows = adev->ttype_attr[AIE_TILE_TYPE_TILE].num_rows;
+	/* Setup tile data memory information */
+	pmem[0].mem.offset = 0;
+	pmem[0].mem.size = KBYTES(64);
+	pmem[0].mem.range.start.row = start_row;
+	pmem[0].mem.range.size.row = num_rows;
+
+	/* Setup program memory information */
+	pmem[1].mem.offset = 0x20000;
+	pmem[1].mem.size = KBYTES(16);
+	pmem[1].mem.range.start.row = start_row;
+	pmem[1].mem.range.size.row = num_rows;
+
+	start_row = adev->ttype_attr[AIE_TILE_TYPE_MEMORY].start_row;
+	num_rows = adev->ttype_attr[AIE_TILE_TYPE_MEMORY].num_rows;
+	/* Setup memory tile memory information */
+	pmem[2].mem.offset = 0;
+	pmem[2].mem.size = KBYTES(512);
+	pmem[2].mem.range.start.row = start_row;
+	pmem[2].mem.range.size.row = num_rows;
+
+	return NUM_TYPES_OF_MEM;
+}
+
 /* aieml_scan_part_clocks() - scan clocks of a partition
  * @apart: AI engine partition
  *
@@ -188,6 +234,7 @@ static int aieml_set_part_clocks(struct aie_partition *apart)
 
 static const struct aie_tile_operations aieml_ops = {
 	.get_tile_type = aieml_get_tile_type,
+	.get_mem_info = aieml_get_mem_info,
 	.scan_part_clocks = aieml_scan_part_clocks,
 	.set_part_clocks = aieml_set_part_clocks,
 };
diff --git a/drivers/accel/amd-ai-engine/ai-engine-internal.h b/drivers/accel/amd-ai-engine/ai-engine-internal.h
index 31a45575cc43..13a39c4e3331 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-internal.h
+++ b/drivers/accel/amd-ai-engine/ai-engine-internal.h
@@ -68,30 +68,6 @@ struct aie_device;
 struct aie_partition;
 struct aie_aperture;
 
-/**
- * struct aie_tile_operations - AI engine device operations
- * @get_tile_type: get type of tile based on tile operation
- * @scan_part_clocks: scan partition modules to check whether the modules are
- *		      clock gated or not, and update the soft clock states
- *		      structure. It is required to be called when the partition
- *		      is requested so that the driver knows which modules are
- *		      clock gated when the partition is requested. This function
- *		      expects the caller to apply partition lock before calling
- *		      this function.
- * @set_part_clocks: set partition modules clocks gate registers based on the
- *		     partition clock states bitmap. This function expects the
- *		     caller to apply partition lock before calling this
- *		     function. The caller function will need to set the bitmap
- *		     on which tiles are required to be clocked on.
- * Different AI engine device version has its own device
- * operation.
- */
-struct aie_tile_operations {
-	u32 (*get_tile_type)(struct aie_device *adev, struct aie_location *loc);
-	int (*scan_part_clocks)(struct aie_partition *apart);
-	int (*set_part_clocks)(struct aie_partition *apart);
-};
-
 /**
  * struct aie_resource - AI engine resource structure
  * @bitmap: resource bitmap
@@ -112,6 +88,37 @@ struct aie_range {
 	struct aie_location size;
 };
 
+/**
+ * struct aie_mem - AIE memory information
+ * @range: range of tiles of the memory
+ * @offset: register offset within a tile of the memory
+ * @size: of a the memory in one tile
+ */
+struct aie_mem {
+	struct aie_range range;
+	__kernel_size_t offset;
+	__kernel_size_t size;
+};
+
+/**
+ * struct aie_part_mem - AI engine partition memory information structure
+ * @apart: AI engine partition
+ * @mem: memory information of a type of memory
+ * @size: size of the total memories in the partition
+ *
+ * This structure is to keep the information of a type of memory in a
+ * partition. The memory information will be stored in @mem property.
+ * The following information will be kept:
+ *  * memory start address offset within a tile
+ *  * memory size
+ *  * what tiles contain this type of memory
+ */
+struct aie_part_mem {
+	struct aie_partition *apart;
+	struct aie_mem mem;
+	size_t size;
+};
+
 /**
  * struct aie_tile_attr - AI engine device tile type attributes
  * @start_row: start row
@@ -126,6 +133,34 @@ struct aie_tile_attr {
 	const enum aie_module_type *mods;
 };
 
+/**
+ * struct aie_tile_operations - AI engine device operations
+ * @get_tile_type: get type of tile based on tile operation
+ * @get_mem_info: get different types of memories information
+ * @scan_part_clocks: scan partition modules to check whether the modules are
+ *		      clock gated or not, and update the soft clock states
+ *		      structure. It is required to be called when the partition
+ *		      is requested so that the driver knows which modules are
+ *		      clock gated when the partition is requested. This function
+ *		      expects the caller to apply partition lock before calling
+ *		      this function.
+ * @set_part_clocks: set partition modules clocks gate registers based on the
+ *		     partition clock states bitmap. This function expects the
+ *		     caller to apply partition lock before calling this
+ *		     function. The caller function will need to set the bitmap
+ *		     on which tiles are required to be clocked on.
+ * Different AI engine device version has its own device
+ * operation.
+ */
+struct aie_tile_operations {
+	u32 (*get_tile_type)(struct aie_device *adev, struct aie_location *loc);
+	unsigned int (*get_mem_info)(struct aie_device *adev,
+				     struct aie_range *range,
+				     struct aie_part_mem *pmem);
+	int (*scan_part_clocks)(struct aie_partition *apart);
+	int (*set_part_clocks)(struct aie_partition *apart);
+};
+
 /**
  * struct aie_device - AI engine device structure
  * @apertures: list of apertures
@@ -188,6 +223,7 @@ struct aie_aperture {
  * @range: range of partition
  * @cores_clk_state: bitmap to indicate the power state of core and mem tiles
  * @tiles_inuse: bitmap to indicate if a tile is in use
+ * @pmems: pointer to partition memories types
  * @mlock: protection for AI engine partition operations
  * @freq_req: required frequency
  */
@@ -198,6 +234,7 @@ struct aie_partition {
 	struct aie_range range;
 	struct aie_resource cores_clk_state;
 	struct aie_resource tiles_inuse;
+	struct aie_part_mem *pmems;
 	struct mutex mlock; /* protection for AI engine partition operations */
 	u64 freq_req;
 };
diff --git a/drivers/accel/amd-ai-engine/ai-engine-part.c b/drivers/accel/amd-ai-engine/ai-engine-part.c
index 83099cb60161..878597eff202 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-part.c
+++ b/drivers/accel/amd-ai-engine/ai-engine-part.c
@@ -12,6 +12,44 @@
 
 #include "ai-engine-internal.h"
 
+/**
+ * aie_part_create_mems_info() - creates array to store the AI engine partition
+ *				 different memories types information
+ * @apart: AI engine partition
+ *
+ * Return: 0 for success, negative value for failure
+ *
+ * This function will create array to store the information of different
+ * memories types in the partition. This array is stored in @apart->pmems.
+ */
+static int aie_part_create_mems_info(struct aie_partition *apart)
+{
+	unsigned int i, num_mems;
+
+	num_mems = apart->adev->ops->get_mem_info(apart->adev, &apart->range,
+						  NULL);
+	if (!num_mems)
+		return 0;
+
+	apart->pmems = devm_kcalloc(apart->aperture->dev, num_mems,
+				    sizeof(struct aie_part_mem),
+				    GFP_KERNEL);
+	if (!apart->pmems)
+		return -ENOMEM;
+
+	apart->adev->ops->get_mem_info(apart->adev, &apart->range,
+				       apart->pmems);
+	for (i = 0; i < num_mems; i++) {
+		struct aie_mem *mem = &apart->pmems[i].mem;
+
+		apart->pmems[i].apart = apart;
+		apart->pmems[i].size = mem->size *
+				       mem->range.size.col *
+				       mem->range.size.row;
+	}
+	return 0;
+}
+
 /**
  * aie_part_release() - release an AI engine partition instance
  * @apart: AI engine partition device
@@ -29,6 +67,7 @@ void aie_part_release(struct aie_partition *apart)
 	aie_resource_uninitialize(&apart->cores_clk_state);
 	aie_resource_uninitialize(&apart->tiles_inuse);
 	list_del(&apart->node);
+	devm_kfree(aperture->dev, apart->pmems);
 	devm_kfree(aperture->dev, apart);
 	mutex_unlock(&aperture->mlock);
 }
@@ -64,6 +103,12 @@ struct aie_partition *aie_part_create(struct aie_aperture *aperture,
 	apart->range.start.row = aperture->range.start.row;
 	apart->range.size.row = aperture->range.size.row;
 
+	ret = aie_part_create_mems_info(apart);
+	if (ret) {
+		dev_err(aperture->dev, "failed to create tile memory information.");
+		return ERR_PTR(ret);
+	}
+
 	/* SHIM row always enabled so it is not needed in the bitmap */
 	num_tiles = apart->range.size.col * (apart->range.size.row - 1);
 	ret = aie_resource_initialize(&apart->cores_clk_state, num_tiles);
-- 
2.34.1



More information about the dri-devel mailing list