[PATCH] drm/amdkfd: Update L1 and add L2/3 cache information
Mike Li
Tianxinmike.Li at amd.com
Mon Mar 29 16:33:27 UTC 2021
The L1 cache information has been updated and the L2/L3
information has been added. The changes have been made
for Vega10 and newer ASICs. There are no changes
for the older ASICs before Vega10.
BUG: SWDEV-260249
Signed-off-by: Mike Li <Tianxinmike.Li at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 749 ++++++++++++++++++++++++--
1 file changed, 699 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index c60e82697385..eb30324393a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -55,7 +55,7 @@ struct kfd_gpu_cache_info {
uint32_t cache_level;
uint32_t flags;
/* Indicates how many Compute Units share this cache
- * Value = 1 indicates the cache is not shared
+ * within a SA. Value = 1 indicates the cache is not shared
*/
uint32_t num_cu_shared;
};
@@ -69,7 +69,6 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
-
},
{
/* Scalar L1 Instruction Cache (in SQC module) per bank */
@@ -126,9 +125,6 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
/* TODO: Add L2 Cache information */
};
-/* NOTE: In future if more information is added to struct kfd_gpu_cache_info
- * the following ASICs may need a separate table.
- */
#define hawaii_cache_info kaveri_cache_info
#define tonga_cache_info carrizo_cache_info
#define fiji_cache_info carrizo_cache_info
@@ -136,13 +132,562 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define polaris11_cache_info carrizo_cache_info
#define polaris12_cache_info carrizo_cache_info
#define vegam_cache_info carrizo_cache_info
-/* TODO - check & update Vega10 cache details */
-#define vega10_cache_info carrizo_cache_info
-#define raven_cache_info carrizo_cache_info
-#define renoir_cache_info carrizo_cache_info
-/* TODO - check & update Navi10 cache details */
-#define navi10_cache_info carrizo_cache_info
-#define vangogh_cache_info carrizo_cache_info
+
+/* NOTE: L1 cache information has been updated and L2/L3
+ * cache information has been added for Vega10 and
+ * newer ASICs. The unit for cache_size is KiB.
+ * In future, check & update cache details
+ * for every new ASIC is required.
+ */
+
+static struct kfd_gpu_cache_info vega10_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 4096,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 16,
+ },
+};
+
+static struct kfd_gpu_cache_info raven_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 1024,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 11,
+ },
+};
+
+static struct kfd_gpu_cache_info renoir_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 1024,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+};
+
+static struct kfd_gpu_cache_info vega12_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 2048,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 5,
+ },
+};
+
+static struct kfd_gpu_cache_info vega20_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 3,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 8192,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 16,
+ },
+};
+
+static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 8192,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 14,
+ },
+};
+
+static struct kfd_gpu_cache_info navi10_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 4096,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+};
+
+static struct kfd_gpu_cache_info vangogh_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 1024,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+};
+
+static struct kfd_gpu_cache_info navi14_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 12,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 2048,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 12,
+ },
+};
+
+static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 4096,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L3 Data Cache per GPU */
+ .cache_size = 128*1024,
+ .cache_level = 3,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+};
+
+static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 3072,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+ {
+ /* L3 Data Cache per GPU */
+ .cache_size = 96*1024,
+ .cache_level = 3,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 10,
+ },
+};
+
+static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 2048,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+ {
+ /* L3 Data Cache per GPU */
+ .cache_size = 32*1024,
+ .cache_level = 3,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 8,
+ },
+};
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
@@ -544,7 +1089,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
}
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_pcache(struct crat_subtype_cache *pcache,
+static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
int mem_available,
@@ -597,6 +1142,70 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache,
return 1;
}
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
+ struct kfd_gpu_cache_info *pcache_info,
+ struct kfd_cu_info *cu_info,
+ int mem_available,
+ int cache_type, unsigned int cu_processor_id)
+{
+ unsigned int cu_sibling_map_mask;
+ int first_active_cu;
+ int i, j, k;
+
+ /* First check if enough memory is available */
+ if (sizeof(struct crat_subtype_cache) > mem_available)
+ return -ENOMEM;
+
+ cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
+ cu_sibling_map_mask &=
+ ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ first_active_cu = ffs(cu_sibling_map_mask);
+
+ /* CU could be inactive. In case of shared cache find the first active
+ * CU. and incase of non-shared cache check if the CU is inactive. If
+ * inactive active skip it
+ */
+ if (first_active_cu) {
+ memset(pcache, 0, sizeof(struct crat_subtype_cache));
+ pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+ pcache->length = sizeof(struct crat_subtype_cache);
+ pcache->flags = pcache_info[cache_type].flags;
+ pcache->processor_id_low = cu_processor_id
+ + (first_active_cu - 1);
+ pcache->cache_level = pcache_info[cache_type].cache_level;
+ pcache->cache_size = pcache_info[cache_type].cache_size;
+
+ /* Sibling map is w.r.t processor_id_low, so shift out
+ * inactive CU
+ */
+ cu_sibling_map_mask =
+ cu_sibling_map_mask >> (first_active_cu - 1);
+ k = 0;
+ for (i = 0; i < cu_info->num_shader_engines; i++) {
+ for (j = 0; j < cu_info->num_shader_arrays_per_engine;
+ j++) {
+ pcache->sibling_map[k] =
+ (uint8_t)(cu_sibling_map_mask & 0xFF);
+ pcache->sibling_map[k+1] =
+ (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+ pcache->sibling_map[k+2] =
+ (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+ pcache->sibling_map[k+3] =
+ (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+ k += 4;
+ cu_sibling_map_mask =
+ cu_info->cu_bitmap[i % 4][j + i / 4];
+ cu_sibling_map_mask &= (
+ (1 << pcache_info[cache_type].num_cu_shared)
+ - 1);
+ }
+ }
+ return 0;
+ }
+ return 1;
+}
+
/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
* tables
*
@@ -624,6 +1233,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
int mem_available = available_size;
unsigned int cu_processor_id;
int ret;
+ unsigned int num_cu_shared;
switch (kdev->device_info->asic_family) {
case CHIP_KAVERI:
@@ -663,12 +1273,21 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
case CHIP_VEGA10:
+ pcache_info = vega10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ break;
case CHIP_VEGA12:
+ pcache_info = vega12_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+ break;
case CHIP_VEGA20:
case CHIP_ARCTURUS:
+ pcache_info = vega20_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+ break;
case CHIP_ALDEBARAN:
- pcache_info = vega10_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ pcache_info = aldebaran_cache_info;
+ num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
break;
case CHIP_RAVEN:
pcache_info = raven_cache_info;
@@ -680,12 +1299,24 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
break;
case CHIP_NAVI10:
case CHIP_NAVI12:
+ pcache_info = navi10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+ break;
case CHIP_NAVI14:
+ pcache_info = navi14_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+ break;
case CHIP_SIENNA_CICHLID:
+ pcache_info = sienna_cichlid_cache_info;
+ num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+ break;
case CHIP_NAVY_FLOUNDER:
+ pcache_info = navy_flounder_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+ break;
case CHIP_DIMGREY_CAVEFISH:
- pcache_info = navi10_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+ pcache_info = dimgrey_cavefish_cache_info;
+ num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
break;
case CHIP_VANGOGH:
pcache_info = vangogh_cache_info;
@@ -709,40 +1340,58 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
*/
for (ct = 0; ct < num_of_cache_types; ct++) {
- cu_processor_id = gpu_processor_id;
- for (i = 0; i < cu_info->num_shader_engines; i++) {
- for (j = 0; j < cu_info->num_shader_arrays_per_engine;
- j++) {
- for (k = 0; k < cu_info->num_cu_per_sh;
- k += pcache_info[ct].num_cu_shared) {
-
- ret = fill_in_pcache(pcache,
- pcache_info,
- cu_info,
- mem_available,
- cu_info->cu_bitmap[i % 4][j + i / 4],
- ct,
- cu_processor_id,
- k);
-
- if (ret < 0)
- break;
-
- if (!ret) {
- pcache++;
- (*num_of_entries)++;
- mem_available -=
- sizeof(*pcache);
- (*size_filled) +=
- sizeof(*pcache);
- }
-
- /* Move to next CU block */
- cu_processor_id +=
- pcache_info[ct].num_cu_shared;
- }
- }
+ cu_processor_id = gpu_processor_id;
+ if (pcache_info[ct].cache_level == 1) {
+ for (i = 0; i < cu_info->num_shader_engines; i++) {
+ for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+ for (k = 0; k < cu_info->num_cu_per_sh;
+ k += pcache_info[ct].num_cu_shared) {
+ ret = fill_in_l1_pcache(pcache,
+ pcache_info,
+ cu_info,
+ mem_available,
+ cu_info->cu_bitmap[i % 4][j + i / 4],
+ ct,
+ cu_processor_id,
+ k);
+
+ if (ret < 0)
+ break;
+
+ if (!ret) {
+ pcache++;
+ (*num_of_entries)++;
+ mem_available -= sizeof(*pcache);
+ (*size_filled) += sizeof(*pcache);
+ }
+
+ /* Move to next CU block */
+ num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+ cu_info->num_cu_per_sh) ?
+ pcache_info[ct].num_cu_shared :
+ (cu_info->num_cu_per_sh - k);
+ cu_processor_id += num_cu_shared;
}
+ }
+ }
+ } else {
+ ret = fill_in_l2_l3_pcache(pcache,
+ pcache_info,
+ cu_info,
+ mem_available,
+ ct,
+ cu_processor_id);
+
+ if (ret < 0)
+ break;
+
+ if (!ret) {
+ pcache++;
+ (*num_of_entries)++;
+ mem_available -= sizeof(*pcache);
+ (*size_filled) += sizeof(*pcache);
+ }
+ }
}
pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
--
2.25.1
More information about the amd-gfx
mailing list