[PATCH 2/2] drm/amdkfd: Fix the warning of array-index-out-of-bounds
Felix Kuehling
felix.kuehling at amd.com
Mon Oct 24 21:06:53 UTC 2022
On 2022-10-24 07:26, Ma Jun wrote:
> For some GPUs with more CUs, the original sibling_map[32]
> in struct crat_subtype_cache is not enough
> to save the cache information when create the VCRAT table,
> so fill the cache info into struct kfd_cache_properties_ext
> directly to fix the problem.
>
> At the same time, a new directory
> "/sys/class/kfd/kfd/topology/nodes/*nodes_num*/caches_ext"
> is created for cache information showing.
Is this necessary because existing user mode cannot handle the larger
sibling map? If so, you'll also need to update the Thunk to parse the
information from caches_ext. Do you have a link to that patch?
If user mode can handle a larger siblingmap in the existing sysfs
interface, we should not create new one.
Another alternative is to add more lines to the existing cache info
sysfs entries. Older user mode will probably ignore the ones it doesn't
know about.
Regards,
Felix
>
> The original directory "cache" is reserved for GPU which using real CRAT
> table.
>
> Signed-off-by: Ma Jun <Jun.Ma2 at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 1229 +-------------------
> drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1246 ++++++++++++++++++++-
> drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 21 +
> 3 files changed, 1261 insertions(+), 1235 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 4857ec5b9f46..e6928c60338e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -30,799 +30,6 @@
> #include "amdgpu.h"
> #include "amdgpu_amdkfd.h"
>
> -/* Static table to describe GPU Cache information */
> -struct kfd_gpu_cache_info {
> - uint32_t cache_size;
> - uint32_t cache_level;
> - uint32_t flags;
> - /* Indicates how many Compute Units share this cache
> - * within a SA. Value = 1 indicates the cache is not shared
> - */
> - uint32_t num_cu_shared;
> -};
> -
> -static struct kfd_gpu_cache_info kaveri_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache (in SQC module) per bank */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache (in SQC module) per bank */
> - .cache_size = 8,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> -
> - /* TODO: Add L2 Cache information */
> -};
> -
> -
> -static struct kfd_gpu_cache_info carrizo_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache (in SQC module) per bank */
> - .cache_size = 8,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 4,
> - },
> - {
> - /* Scalar L1 Data Cache (in SQC module) per bank. */
> - .cache_size = 4,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 4,
> - },
> -
> - /* TODO: Add L2 Cache information */
> -};
> -
> -#define hawaii_cache_info kaveri_cache_info
> -#define tonga_cache_info carrizo_cache_info
> -#define fiji_cache_info carrizo_cache_info
> -#define polaris10_cache_info carrizo_cache_info
> -#define polaris11_cache_info carrizo_cache_info
> -#define polaris12_cache_info carrizo_cache_info
> -#define vegam_cache_info carrizo_cache_info
> -
> -/* NOTE: L1 cache information has been updated and L2/L3
> - * cache information has been added for Vega10 and
> - * newer ASICs. The unit for cache_size is KiB.
> - * In future, check & update cache details
> - * for every new ASIC is required.
> - */
> -
> -static struct kfd_gpu_cache_info vega10_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 4096,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 16,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info raven_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 1024,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 11,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info renoir_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 1024,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 8,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info vega12_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 2048,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 5,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info vega20_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 3,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 8192,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 16,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 8192,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 14,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info navi10_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* GL1 Data Cache per SA */
> - .cache_size = 128,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 10,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 4096,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 10,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info vangogh_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* GL1 Data Cache per SA */
> - .cache_size = 128,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 8,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 1024,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 8,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info navi14_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* GL1 Data Cache per SA */
> - .cache_size = 128,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 12,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 2048,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 12,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* GL1 Data Cache per SA */
> - .cache_size = 128,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 10,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 4096,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 10,
> - },
> - {
> - /* L3 Data Cache per GPU */
> - .cache_size = 128*1024,
> - .cache_level = 3,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 10,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* GL1 Data Cache per SA */
> - .cache_size = 128,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 10,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 3072,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 10,
> - },
> - {
> - /* L3 Data Cache per GPU */
> - .cache_size = 96*1024,
> - .cache_level = 3,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 10,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* GL1 Data Cache per SA */
> - .cache_size = 128,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 8,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 2048,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 8,
> - },
> - {
> - /* L3 Data Cache per GPU */
> - .cache_size = 32*1024,
> - .cache_level = 3,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 8,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* GL1 Data Cache per SA */
> - .cache_size = 128,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 8,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 1024,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 8,
> - },
> - {
> - /* L3 Data Cache per GPU */
> - .cache_size = 16*1024,
> - .cache_level = 3,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 8,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* GL1 Data Cache per SA */
> - .cache_size = 128,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 6,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 2048,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 6,
> - },
> -};
> -
> -static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
> - {
> - /* TCP L1 Cache per CU */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 1,
> - },
> - {
> - /* Scalar L1 Instruction Cache per SQC */
> - .cache_size = 32,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* Scalar L1 Data Cache per SQC */
> - .cache_size = 16,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* GL1 Data Cache per SA */
> - .cache_size = 128,
> - .cache_level = 1,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> - {
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - .cache_size = 256,
> - .cache_level = 2,
> - .flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE),
> - .num_cu_shared = 2,
> - },
> -};
> -
> static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
> struct crat_subtype_computeunit *cu)
> {
> @@ -1223,419 +430,6 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
> return ret;
> }
>
> -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
> -static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
> - struct kfd_gpu_cache_info *pcache_info,
> - struct kfd_cu_info *cu_info,
> - int mem_available,
> - int cu_bitmask,
> - int cache_type, unsigned int cu_processor_id,
> - int cu_block)
> -{
> - unsigned int cu_sibling_map_mask;
> - int first_active_cu;
> -
> - /* First check if enough memory is available */
> - if (sizeof(struct crat_subtype_cache) > mem_available)
> - return -ENOMEM;
> -
> - cu_sibling_map_mask = cu_bitmask;
> - cu_sibling_map_mask >>= cu_block;
> - cu_sibling_map_mask &=
> - ((1 << pcache_info[cache_type].num_cu_shared) - 1);
> - first_active_cu = ffs(cu_sibling_map_mask);
> -
> - /* CU could be inactive. In case of shared cache find the first active
> - * CU. and incase of non-shared cache check if the CU is inactive. If
> - * inactive active skip it
> - */
> - if (first_active_cu) {
> - memset(pcache, 0, sizeof(struct crat_subtype_cache));
> - pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
> - pcache->length = sizeof(struct crat_subtype_cache);
> - pcache->flags = pcache_info[cache_type].flags;
> - pcache->processor_id_low = cu_processor_id
> - + (first_active_cu - 1);
> - pcache->cache_level = pcache_info[cache_type].cache_level;
> - pcache->cache_size = pcache_info[cache_type].cache_size;
> -
> - /* Sibling map is w.r.t processor_id_low, so shift out
> - * inactive CU
> - */
> - cu_sibling_map_mask =
> - cu_sibling_map_mask >> (first_active_cu - 1);
> -
> - pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
> - pcache->sibling_map[1] =
> - (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
> - pcache->sibling_map[2] =
> - (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
> - pcache->sibling_map[3] =
> - (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
> - return 0;
> - }
> - return 1;
> -}
> -
> -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
> -static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
> - struct kfd_gpu_cache_info *pcache_info,
> - struct kfd_cu_info *cu_info,
> - int mem_available,
> - int cache_type, unsigned int cu_processor_id)
> -{
> - unsigned int cu_sibling_map_mask;
> - int first_active_cu;
> - int i, j, k;
> -
> - /* First check if enough memory is available */
> - if (sizeof(struct crat_subtype_cache) > mem_available)
> - return -ENOMEM;
> -
> - cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
> - cu_sibling_map_mask &=
> - ((1 << pcache_info[cache_type].num_cu_shared) - 1);
> - first_active_cu = ffs(cu_sibling_map_mask);
> -
> - /* CU could be inactive. In case of shared cache find the first active
> - * CU. and incase of non-shared cache check if the CU is inactive. If
> - * inactive active skip it
> - */
> - if (first_active_cu) {
> - memset(pcache, 0, sizeof(struct crat_subtype_cache));
> - pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
> - pcache->length = sizeof(struct crat_subtype_cache);
> - pcache->flags = pcache_info[cache_type].flags;
> - pcache->processor_id_low = cu_processor_id
> - + (first_active_cu - 1);
> - pcache->cache_level = pcache_info[cache_type].cache_level;
> - pcache->cache_size = pcache_info[cache_type].cache_size;
> -
> - /* Sibling map is w.r.t processor_id_low, so shift out
> - * inactive CU
> - */
> - cu_sibling_map_mask =
> - cu_sibling_map_mask >> (first_active_cu - 1);
> - k = 0;
> - for (i = 0; i < cu_info->num_shader_engines; i++) {
> - for (j = 0; j < cu_info->num_shader_arrays_per_engine;
> - j++) {
> - pcache->sibling_map[k] =
> - (uint8_t)(cu_sibling_map_mask & 0xFF);
> - pcache->sibling_map[k+1] =
> - (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
> - pcache->sibling_map[k+2] =
> - (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
> - pcache->sibling_map[k+3] =
> - (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
> - k += 4;
> - cu_sibling_map_mask =
> - cu_info->cu_bitmap[i % 4][j + i / 4];
> - cu_sibling_map_mask &= (
> - (1 << pcache_info[cache_type].num_cu_shared)
> - - 1);
> - }
> - }
> - return 0;
> - }
> - return 1;
> -}
> -
> -#define KFD_MAX_CACHE_TYPES 6
> -
> -static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
> - struct kfd_gpu_cache_info *pcache_info)
> -{
> - struct amdgpu_device *adev = kdev->adev;
> - int i = 0;
> -
> - /* TCP L1 Cache per CU */
> - if (adev->gfx.config.gc_tcp_l1_size) {
> - pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
> - pcache_info[i].cache_level = 1;
> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE);
> - pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
> - i++;
> - }
> - /* Scalar L1 Instruction Cache per SQC */
> - if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
> - pcache_info[i].cache_size =
> - adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
> - pcache_info[i].cache_level = 1;
> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_INST_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE);
> - pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
> - i++;
> - }
> - /* Scalar L1 Data Cache per SQC */
> - if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
> - pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
> - pcache_info[i].cache_level = 1;
> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE);
> - pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
> - i++;
> - }
> - /* GL1 Data Cache per SA */
> - if (adev->gfx.config.gc_gl1c_per_sa &&
> - adev->gfx.config.gc_gl1c_size_per_instance) {
> - pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
> - adev->gfx.config.gc_gl1c_size_per_instance;
> - pcache_info[i].cache_level = 1;
> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE);
> - pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
> - i++;
> - }
> - /* L2 Data Cache per GPU (Total Tex Cache) */
> - if (adev->gfx.config.gc_gl2c_per_gpu) {
> - pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
> - pcache_info[i].cache_level = 2;
> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE);
> - pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
> - i++;
> - }
> - /* L3 Data Cache per GPU */
> - if (adev->gmc.mall_size) {
> - pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
> - pcache_info[i].cache_level = 3;
> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> - CRAT_CACHE_FLAGS_DATA_CACHE |
> - CRAT_CACHE_FLAGS_SIMD_CACHE);
> - pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
> - i++;
> - }
> - return i;
> -}
> -
> -/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
> - * tables
> - *
> - * @kdev - [IN] GPU device
> - * @gpu_processor_id - [IN] GPU processor ID to which these caches
> - * associate
> - * @available_size - [IN] Amount of memory available in pcache
> - * @cu_info - [IN] Compute Unit info obtained from KGD
> - * @pcache - [OUT] memory into which cache data is to be filled in.
> - * @size_filled - [OUT] amount of data used up in pcache.
> - * @num_of_entries - [OUT] number of caches added
> - */
> -static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
> - int gpu_processor_id,
> - int available_size,
> - struct kfd_cu_info *cu_info,
> - struct crat_subtype_cache *pcache,
> - int *size_filled,
> - int *num_of_entries)
> -{
> - struct kfd_gpu_cache_info *pcache_info;
> - struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
> - int num_of_cache_types = 0;
> - int i, j, k;
> - int ct = 0;
> - int mem_available = available_size;
> - unsigned int cu_processor_id;
> - int ret;
> - unsigned int num_cu_shared;
> -
> - switch (kdev->adev->asic_type) {
> - case CHIP_KAVERI:
> - pcache_info = kaveri_cache_info;
> - num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
> - break;
> - case CHIP_HAWAII:
> - pcache_info = hawaii_cache_info;
> - num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
> - break;
> - case CHIP_CARRIZO:
> - pcache_info = carrizo_cache_info;
> - num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
> - break;
> - case CHIP_TONGA:
> - pcache_info = tonga_cache_info;
> - num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
> - break;
> - case CHIP_FIJI:
> - pcache_info = fiji_cache_info;
> - num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
> - break;
> - case CHIP_POLARIS10:
> - pcache_info = polaris10_cache_info;
> - num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
> - break;
> - case CHIP_POLARIS11:
> - pcache_info = polaris11_cache_info;
> - num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
> - break;
> - case CHIP_POLARIS12:
> - pcache_info = polaris12_cache_info;
> - num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
> - break;
> - case CHIP_VEGAM:
> - pcache_info = vegam_cache_info;
> - num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
> - break;
> - default:
> - switch (KFD_GC_VERSION(kdev)) {
> - case IP_VERSION(9, 0, 1):
> - pcache_info = vega10_cache_info;
> - num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
> - break;
> - case IP_VERSION(9, 2, 1):
> - pcache_info = vega12_cache_info;
> - num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
> - break;
> - case IP_VERSION(9, 4, 0):
> - case IP_VERSION(9, 4, 1):
> - pcache_info = vega20_cache_info;
> - num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
> - break;
> - case IP_VERSION(9, 4, 2):
> - pcache_info = aldebaran_cache_info;
> - num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
> - break;
> - case IP_VERSION(9, 1, 0):
> - case IP_VERSION(9, 2, 2):
> - pcache_info = raven_cache_info;
> - num_of_cache_types = ARRAY_SIZE(raven_cache_info);
> - break;
> - case IP_VERSION(9, 3, 0):
> - pcache_info = renoir_cache_info;
> - num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
> - break;
> - case IP_VERSION(10, 1, 10):
> - case IP_VERSION(10, 1, 2):
> - case IP_VERSION(10, 1, 3):
> - case IP_VERSION(10, 1, 4):
> - pcache_info = navi10_cache_info;
> - num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
> - break;
> - case IP_VERSION(10, 1, 1):
> - pcache_info = navi14_cache_info;
> - num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
> - break;
> - case IP_VERSION(10, 3, 0):
> - pcache_info = sienna_cichlid_cache_info;
> - num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
> - break;
> - case IP_VERSION(10, 3, 2):
> - pcache_info = navy_flounder_cache_info;
> - num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
> - break;
> - case IP_VERSION(10, 3, 4):
> - pcache_info = dimgrey_cavefish_cache_info;
> - num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
> - break;
> - case IP_VERSION(10, 3, 1):
> - pcache_info = vangogh_cache_info;
> - num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
> - break;
> - case IP_VERSION(10, 3, 5):
> - pcache_info = beige_goby_cache_info;
> - num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
> - break;
> - case IP_VERSION(10, 3, 3):
> - case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */
> - pcache_info = yellow_carp_cache_info;
> - num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
> - break;
> - case IP_VERSION(10, 3, 6):
> - pcache_info = gc_10_3_6_cache_info;
> - num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
> - break;
> - case IP_VERSION(11, 0, 0):
> - case IP_VERSION(11, 0, 1):
> - case IP_VERSION(11, 0, 2):
> - case IP_VERSION(11, 0, 3):
> - pcache_info = cache_info;
> - num_of_cache_types =
> - kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
> - break;
> - default:
> - return -EINVAL;
> - }
> - }
> -
> - *size_filled = 0;
> - *num_of_entries = 0;
> -
> - /* For each type of cache listed in the kfd_gpu_cache_info table,
> - * go through all available Compute Units.
> - * The [i,j,k] loop will
> - * if kfd_gpu_cache_info.num_cu_shared = 1
> - * will parse through all available CU
> - * If (kfd_gpu_cache_info.num_cu_shared != 1)
> - * then it will consider only one CU from
> - * the shared unit
> - */
> -
> - for (ct = 0; ct < num_of_cache_types; ct++) {
> - cu_processor_id = gpu_processor_id;
> - if (pcache_info[ct].cache_level == 1) {
> - for (i = 0; i < cu_info->num_shader_engines; i++) {
> - for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
> - for (k = 0; k < cu_info->num_cu_per_sh;
> - k += pcache_info[ct].num_cu_shared) {
> - ret = fill_in_l1_pcache(pcache,
> - pcache_info,
> - cu_info,
> - mem_available,
> - cu_info->cu_bitmap[i % 4][j + i / 4],
> - ct,
> - cu_processor_id,
> - k);
> -
> - if (ret < 0)
> - break;
> -
> - if (!ret) {
> - pcache++;
> - (*num_of_entries)++;
> - mem_available -= sizeof(*pcache);
> - (*size_filled) += sizeof(*pcache);
> - }
> -
> - /* Move to next CU block */
> - num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
> - cu_info->num_cu_per_sh) ?
> - pcache_info[ct].num_cu_shared :
> - (cu_info->num_cu_per_sh - k);
> - cu_processor_id += num_cu_shared;
> - }
> - }
> - }
> - } else {
> - ret = fill_in_l2_l3_pcache(pcache,
> - pcache_info,
> - cu_info,
> - mem_available,
> - ct,
> - cu_processor_id);
> -
> - if (ret < 0)
> - break;
> -
> - if (!ret) {
> - pcache++;
> - (*num_of_entries)++;
> - mem_available -= sizeof(*pcache);
> - (*size_filled) += sizeof(*pcache);
> - }
> - }
> - }
> -
> - pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
> -
> - return 0;
> -}
> -
> static bool kfd_ignore_crat(void)
> {
> bool ret;
> @@ -2203,8 +997,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
> struct crat_subtype_computeunit *cu;
> struct kfd_cu_info cu_info;
> int avail_size = *size;
> - int num_of_cache_entries = 0;
> - int cache_mem_filled = 0;
> uint32_t nid = 0;
> int ret = 0;
>
> @@ -2304,31 +1096,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
> crat_table->length += sizeof(struct crat_subtype_memory);
> crat_table->total_entries++;
>
> - /* TODO: Fill in cache information. This information is NOT readily
> - * available in KGD
> - */
> - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
> - sub_type_hdr->length);
> - ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
> - avail_size,
> - &cu_info,
> - (struct crat_subtype_cache *)sub_type_hdr,
> - &cache_mem_filled,
> - &num_of_cache_entries);
> -
> - if (ret < 0)
> - return ret;
> -
> - crat_table->length += cache_mem_filled;
> - crat_table->total_entries += num_of_cache_entries;
> - avail_size -= cache_mem_filled;
> -
> /* Fill in Subtype: IO_LINKS
> * Only direct links are added here which is Link from GPU to
> * its NUMA node. Indirect links are added by userspace.
> */
> sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
> - cache_mem_filled);
> + sub_type_hdr->length);
> ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
> (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index e0680d265a66..97e88c35be01 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -50,6 +50,747 @@ static struct kfd_system_properties sys_props;
> static DECLARE_RWSEM(topology_lock);
> static uint32_t topology_crat_proximity_domain;
>
> +/* Static table to describe GPU Cache information */
> +struct kfd_gpu_cache_info {
> + uint32_t cache_size;
> + uint32_t cache_level;
> + uint32_t flags;
> + /* Indicates how many Compute Units share this cache
> + * within a SA. Value = 1 indicates the cache is not shared
> + */
> + uint32_t num_cu_shared;
> +};
> +
> +static struct kfd_gpu_cache_info kaveri_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache (in SQC module) per bank */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache (in SQC module) per bank */
> + .cache_size = 8,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> +
> + /* TODO: Add L2 Cache information */
> +};
> +
> +static struct kfd_gpu_cache_info carrizo_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache (in SQC module) per bank */
> + .cache_size = 8,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 4,
> + },
> + {
> + /* Scalar L1 Data Cache (in SQC module) per bank. */
> + .cache_size = 4,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 4,
> + },
> +
> + /* TODO: Add L2 Cache information */
> +};
> +
> +#define hawaii_cache_info kaveri_cache_info
> +#define tonga_cache_info carrizo_cache_info
> +#define fiji_cache_info carrizo_cache_info
> +#define polaris10_cache_info carrizo_cache_info
> +#define polaris11_cache_info carrizo_cache_info
> +#define polaris12_cache_info carrizo_cache_info
> +#define vegam_cache_info carrizo_cache_info
> +
> +/* NOTE: L1 cache information has been updated and L2/L3
> + * cache information has been added for Vega10 and
> + * newer ASICs. The unit for cache_size is KiB.
> + * In future, check & update cache details
> + * for every new ASIC is required.
> + */
> +static struct kfd_gpu_cache_info vega10_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 4096,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 16,
> + },
> +};
> +static struct kfd_gpu_cache_info raven_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 1024,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 11,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info renoir_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 1024,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 8,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info vega12_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 2048,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 5,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info vega20_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 3,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 8192,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 16,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 8192,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 14,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info navi10_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* GL1 Data Cache per SA */
> + .cache_size = 128,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 10,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 4096,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 10,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info vangogh_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* GL1 Data Cache per SA */
> + .cache_size = 128,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 8,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 1024,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 8,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info navi14_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* GL1 Data Cache per SA */
> + .cache_size = 128,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 12,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 2048,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 12,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* GL1 Data Cache per SA */
> + .cache_size = 128,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 10,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 4096,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 10,
> + },
> + {
> + /* L3 Data Cache per GPU */
> + .cache_size = 128*1024,
> + .cache_level = 3,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 10,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* GL1 Data Cache per SA */
> + .cache_size = 128,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 10,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 3072,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 10,
> + },
> + {
> + /* L3 Data Cache per GPU */
> + .cache_size = 96*1024,
> + .cache_level = 3,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 10,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* GL1 Data Cache per SA */
> + .cache_size = 128,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 8,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 2048,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 8,
> + },
> + {
> + /* L3 Data Cache per GPU */
> + .cache_size = 32*1024,
> + .cache_level = 3,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 8,
> + },
> +};
> +
> +static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* GL1 Data Cache per SA */
> + .cache_size = 128,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 8,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 1024,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 8,
> + },
> + {
> + /* L3 Data Cache per GPU */
> + .cache_size = 16*1024,
> + .cache_level = 3,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 8,
> + },
> +};
> +static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
> + {
> + /* TCP L1 Cache per CU */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 1,
> + },
> + {
> + /* Scalar L1 Instruction Cache per SQC */
> + .cache_size = 32,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* Scalar L1 Data Cache per SQC */
> + .cache_size = 16,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 2,
> + },
> + {
> + /* GL1 Data Cache per SA */
> + .cache_size = 128,
> + .cache_level = 1,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 6,
> + },
> + {
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + .cache_size = 2048,
> + .cache_level = 2,
> + .flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE),
> + .num_cu_shared = 6,
> + },
> +};
> +
> struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
> uint32_t proximity_domain)
> {
> @@ -149,6 +890,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
> {
> struct kfd_mem_properties *mem;
> struct kfd_cache_properties *cache;
> + struct kfd_cache_properties_ext *cache_ext;
> struct kfd_iolink_properties *iolink;
> struct kfd_iolink_properties *p2plink;
> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
> @@ -171,6 +913,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
> kfree(cache);
> }
>
> + while (dev->cache_props_ext.next != &dev->cache_props_ext) {
> + cache_ext = container_of(dev->cache_props_ext.next,
> + struct kfd_cache_properties_ext, list);
> + list_del(&cache_ext->list);
> + kfree(cache_ext);
> + }
> +
> while (dev->io_link_props.next != &dev->io_link_props) {
> iolink = container_of(dev->io_link_props.next,
> struct kfd_iolink_properties, list);
> @@ -227,6 +976,7 @@ struct kfd_topology_device *kfd_create_topology_device(
>
> INIT_LIST_HEAD(&dev->mem_props);
> INIT_LIST_HEAD(&dev->cache_props);
> + INIT_LIST_HEAD(&dev->cache_props_ext);
> INIT_LIST_HEAD(&dev->io_link_props);
> INIT_LIST_HEAD(&dev->p2p_link_props);
> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
> @@ -387,7 +1137,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
>
> /* Making sure that the buffer is an empty string */
> buffer[0] = 0;
> -
> cache = container_of(attr, struct kfd_cache_properties, attr);
> if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
> return -EPERM;
> @@ -423,6 +1172,50 @@ static struct kobj_type cache_type = {
> .sysfs_ops = &cache_ops,
> };
>
> +static ssize_t kfd_cache_ext_show(struct kobject *kobj, struct attribute *attr,
> + char *buffer)
> +{
> + int offs = 0;
> + uint32_t i, j;
> + struct kfd_cache_properties_ext *cache;
> +
> + /* Making sure that the buffer is an empty string */
> + buffer[0] = 0;
> + cache = container_of(attr, struct kfd_cache_properties_ext, attr);
> + if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
> + return -EPERM;
> + sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
> + cache->processor_id_low);
> + sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
> + sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
> + sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
> + cache->cacheline_size);
> + sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
> + cache->cachelines_per_tag);
> + sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
> + sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
> + sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
> + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
> + for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
> + for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
> + /* Check each bit */
> + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
> + (cache->sibling_map[i] >> j) & 1);
> +
> + /* Replace the last "," with end of line */
> + buffer[offs-1] = '\n';
> + return offs;
> +}
> +
> +static const struct sysfs_ops cache_ext_ops = {
> + .show = kfd_cache_ext_show,
> +};
> +
> +static struct kobj_type cache_ext_type = {
> + .release = kfd_topology_kobj_release,
> + .sysfs_ops = &cache_ext_ops,
> +};
> +
> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
> /****** Sysfs of Performance Counters ******/
>
> @@ -610,6 +1403,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
> struct kfd_iolink_properties *p2plink;
> struct kfd_iolink_properties *iolink;
> struct kfd_cache_properties *cache;
> + struct kfd_cache_properties_ext *cache_ext;
> struct kfd_mem_properties *mem;
> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
> struct kfd_perf_properties *perf;
> @@ -663,6 +1457,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
> dev->kobj_cache = NULL;
> }
>
> + if (dev->kobj_cache_ext) {
> + list_for_each_entry(cache_ext, &dev->cache_props_ext, list)
> + if (cache_ext->kobj) {
> + kfd_remove_sysfs_file(cache_ext->kobj,
> + &cache_ext->attr);
> + cache_ext->kobj = NULL;
> + }
> + kobject_del(dev->kobj_cache_ext);
> + kobject_put(dev->kobj_cache_ext);
> + dev->kobj_cache_ext = NULL;
> + }
> +
> if (dev->kobj_mem) {
> list_for_each_entry(mem, &dev->mem_props, list)
> if (mem->kobj) {
> @@ -707,6 +1513,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
> struct kfd_iolink_properties *p2plink;
> struct kfd_iolink_properties *iolink;
> struct kfd_cache_properties *cache;
> + struct kfd_cache_properties_ext *cache_ext;
> struct kfd_mem_properties *mem;
> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
> struct kfd_perf_properties *perf;
> @@ -741,6 +1548,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
> if (!dev->kobj_cache)
> return -ENOMEM;
>
> + dev->kobj_cache_ext = kobject_create_and_add("caches_ext", dev->kobj_node);
> + if (!dev->kobj_cache_ext)
> + return -ENOMEM;
> +
> dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
> if (!dev->kobj_iolink)
> return -ENOMEM;
> @@ -830,6 +1641,28 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
> i++;
> }
>
> + i = 0;
> + list_for_each_entry(cache_ext, &dev->cache_props_ext, list) {
> + cache_ext->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
> + if (!cache_ext->kobj)
> + return -ENOMEM;
> + ret = kobject_init_and_add(cache_ext->kobj, &cache_ext_type,
> + dev->kobj_cache_ext, "%d", i);
> + if (ret < 0) {
> + kobject_put(cache_ext->kobj);
> + return ret;
> + }
> +
> + cache_ext->attr.name = "properties";
> + cache_ext->attr.mode = KFD_SYSFS_FILE_MODE;
> + sysfs_attr_init(&cache_ext->attr);
> + ret = sysfs_create_file(cache_ext->kobj, &cache_ext->attr);
> + if (ret < 0)
> + return ret;
> + i++;
> + }
> +
> +
> i = 0;
> list_for_each_entry(iolink, &dev->io_link_props, list) {
> iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
> @@ -1268,6 +2101,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
> struct kfd_topology_device *out_dev = NULL;
> struct kfd_mem_properties *mem;
> struct kfd_cache_properties *cache;
> + struct kfd_cache_properties_ext *cache_ext;
> struct kfd_iolink_properties *iolink;
> struct kfd_iolink_properties *p2plink;
>
> @@ -1288,6 +2122,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
> mem->gpu = dev->gpu;
> list_for_each_entry(cache, &dev->cache_props, list)
> cache->gpu = dev->gpu;
> + list_for_each_entry(cache, &dev->cache_props_ext, list)
> + cache_ext->gpu = dev->gpu;
> list_for_each_entry(iolink, &dev->io_link_props, list)
> iolink->gpu = dev->gpu;
> list_for_each_entry(p2plink, &dev->p2p_link_props, list)
> @@ -1721,6 +2557,397 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
> dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED;
> }
>
> +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
> +static int fill_in_l1_pcache(struct kfd_cache_properties_ext **props_ext,
> + struct kfd_gpu_cache_info *pcache_info,
> + struct kfd_cu_info *cu_info,
> + int cu_bitmask,
> + int cache_type, unsigned int cu_processor_id,
> + int cu_block)
> +{
> + unsigned int cu_sibling_map_mask;
> + int first_active_cu;
> + struct kfd_cache_properties_ext *pcache = NULL;
> +
> + cu_sibling_map_mask = cu_bitmask;
> + cu_sibling_map_mask >>= cu_block;
> + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
> + first_active_cu = ffs(cu_sibling_map_mask);
> +
> + /* CU could be inactive. In case of shared cache find the first active
> + * CU. and incase of non-shared cache check if the CU is inactive. If
> + * inactive active skip it
> + */
> + if (first_active_cu) {
> + pcache = kfd_alloc_struct(pcache);
> + if (!pcache)
> + return -ENOMEM;
> +
> + memset(pcache, 0, sizeof(struct kfd_cache_properties_ext));
> + pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
> + pcache->cache_level = pcache_info[cache_type].cache_level;
> + pcache->cache_size = pcache_info[cache_type].cache_size;
> +
> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
> + pcache->cache_type |= HSA_CACHE_TYPE_DATA;
> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
> + pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
> + pcache->cache_type |= HSA_CACHE_TYPE_CPU;
> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
> + pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
> +
> + /* Sibling map is w.r.t processor_id_low, so shift out
> + * inactive CU
> + */
> + cu_sibling_map_mask =
> + cu_sibling_map_mask >> (first_active_cu - 1);
> +
> + pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
> + pcache->sibling_map[1] =
> + (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
> + pcache->sibling_map[2] =
> + (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
> + pcache->sibling_map[3] =
> + (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
> +
> + *props_ext = pcache;
> +
> + return 0;
> + }
> + return 1;
> +}
> +
> +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
> +static int fill_in_l2_l3_pcache(struct kfd_cache_properties_ext **props_ext,
> + struct kfd_gpu_cache_info *pcache_info,
> + struct kfd_cu_info *cu_info,
> + int cache_type, unsigned int cu_processor_id)
> +{
> + unsigned int cu_sibling_map_mask;
> + int first_active_cu;
> + int i, j, k;
> + struct kfd_cache_properties_ext *pcache = NULL;
> +
> + cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
> + cu_sibling_map_mask &=
> + ((1 << pcache_info[cache_type].num_cu_shared) - 1);
> + first_active_cu = ffs(cu_sibling_map_mask);
> +
> + /* CU could be inactive. In case of shared cache find the first active
> + * CU. and incase of non-shared cache check if the CU is inactive. If
> + * inactive active skip it
> + */
> + if (first_active_cu) {
> + pcache = kfd_alloc_struct(pcache);
> + if (!pcache)
> + return -ENOMEM;
> +
> + memset(pcache, 0, sizeof(struct kfd_cache_properties_ext));
> + pcache->processor_id_low = cu_processor_id
> + + (first_active_cu - 1);
> + pcache->cache_level = pcache_info[cache_type].cache_level;
> + pcache->cache_size = pcache_info[cache_type].cache_size;
> +
> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
> + pcache->cache_type |= HSA_CACHE_TYPE_DATA;
> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
> + pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
> + pcache->cache_type |= HSA_CACHE_TYPE_CPU;
> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
> + pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
> +
> + /* Sibling map is w.r.t processor_id_low, so shift out
> + * inactive CU
> + */
> + cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
> + k = 0;
> +
> + for (i = 0; i < cu_info->num_shader_engines; i++) {
> + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
> + pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
> + pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
> + pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
> + pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
> + k += 4;
> +
> + cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
> + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
> + }
> + }
> + *props_ext = pcache;
> + return 0;
> + }
> + return 1;
> +}
> +
> +#define KFD_MAX_CACHE_TYPES 6
> +
> +static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
> + struct kfd_gpu_cache_info *pcache_info)
> +{
> + struct amdgpu_device *adev = kdev->adev;
> + int i = 0;
> +
> + /* TCP L1 Cache per CU */
> + if (adev->gfx.config.gc_tcp_l1_size) {
> + pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
> + pcache_info[i].cache_level = 1;
> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE);
> + pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
> + i++;
> + }
> + /* Scalar L1 Instruction Cache per SQC */
> + if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
> + pcache_info[i].cache_size =
> + adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
> + pcache_info[i].cache_level = 1;
> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_INST_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE);
> + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
> + i++;
> + }
> + /* Scalar L1 Data Cache per SQC */
> + if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
> + pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
> + pcache_info[i].cache_level = 1;
> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE);
> + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
> + i++;
> + }
> + /* GL1 Data Cache per SA */
> + if (adev->gfx.config.gc_gl1c_per_sa &&
> + adev->gfx.config.gc_gl1c_size_per_instance) {
> + pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
> + adev->gfx.config.gc_gl1c_size_per_instance;
> + pcache_info[i].cache_level = 1;
> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE);
> + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
> + i++;
> + }
> + /* L2 Data Cache per GPU (Total Tex Cache) */
> + if (adev->gfx.config.gc_gl2c_per_gpu) {
> + pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
> + pcache_info[i].cache_level = 2;
> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE);
> + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
> + i++;
> + }
> + /* L3 Data Cache per GPU */
> + if (adev->gmc.mall_size) {
> + pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
> + pcache_info[i].cache_level = 3;
> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
> + CRAT_CACHE_FLAGS_DATA_CACHE |
> + CRAT_CACHE_FLAGS_SIMD_CACHE);
> + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
> + i++;
> + }
> + return i;
> +}
> +/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
> + * tables
> + */
> +static int kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev)
> +{
> + struct kfd_gpu_cache_info *pcache_info = NULL;
> + struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
> + int num_of_cache_types = 0;
> + int i, j, k;
> + int ct = 0;
> + unsigned int cu_processor_id;
> + int ret;
> + unsigned int num_cu_shared;
> + struct kfd_cu_info cu_info;
> + struct kfd_cu_info *pcu_info;
> + int gpu_processor_id;
> + struct kfd_cache_properties_ext *props_ext;
> + int num_of_entries = 0;
> +
> + amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
> + pcu_info = &cu_info;
> +
> + gpu_processor_id = kdev->processor_id_low;
> +
> + switch (kdev->adev->asic_type) {
> + case CHIP_KAVERI:
> + pcache_info = kaveri_cache_info;
> + num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
> + break;
> + case CHIP_HAWAII:
> + pcache_info = hawaii_cache_info;
> + num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
> + break;
> + case CHIP_CARRIZO:
> + pcache_info = carrizo_cache_info;
> + num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
> + break;
> + case CHIP_TONGA:
> + pcache_info = tonga_cache_info;
> + num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
> + break;
> + case CHIP_FIJI:
> + pcache_info = fiji_cache_info;
> + num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
> + break;
> + case CHIP_POLARIS10:
> + pcache_info = polaris10_cache_info;
> + num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
> + break;
> + case CHIP_POLARIS11:
> + pcache_info = polaris11_cache_info;
> + num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
> + break;
> + case CHIP_POLARIS12:
> + pcache_info = polaris12_cache_info;
> + num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
> + break;
> + case CHIP_VEGAM:
> + pcache_info = vegam_cache_info;
> + num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
> + break;
> + default:
> + switch (KFD_GC_VERSION(kdev)) {
> + case IP_VERSION(9, 0, 1):
> + pcache_info = vega10_cache_info;
> + num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
> + break;
> + case IP_VERSION(9, 2, 1):
> + pcache_info = vega12_cache_info;
> + num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
> + break;
> + case IP_VERSION(9, 4, 0):
> + case IP_VERSION(9, 4, 1):
> + pcache_info = vega20_cache_info;
> + num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
> + break;
> + case IP_VERSION(9, 4, 2):
> + pcache_info = aldebaran_cache_info;
> + num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
> + break;
> + case IP_VERSION(9, 1, 0):
> + case IP_VERSION(9, 2, 2):
> + pcache_info = raven_cache_info;
> + num_of_cache_types = ARRAY_SIZE(raven_cache_info);
> + break;
> + case IP_VERSION(9, 3, 0):
> + pcache_info = renoir_cache_info;
> + num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
> + break;
> + case IP_VERSION(10, 1, 10):
> + case IP_VERSION(10, 1, 2):
> + case IP_VERSION(10, 1, 3):
> + case IP_VERSION(10, 1, 4):
> + pcache_info = navi10_cache_info;
> + num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
> + break;
> + case IP_VERSION(10, 1, 1):
> + pcache_info = navi14_cache_info;
> + num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
> + break;
> + case IP_VERSION(10, 3, 0):
> + pcache_info = sienna_cichlid_cache_info;
> + num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
> + break;
> + case IP_VERSION(10, 3, 2):
> + pcache_info = navy_flounder_cache_info;
> + num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
> + break;
> + case IP_VERSION(10, 3, 4):
> + pcache_info = dimgrey_cavefish_cache_info;
> + num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
> + break;
> + case IP_VERSION(10, 3, 1):
> + pcache_info = vangogh_cache_info;
> + num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
> + break;
> + case IP_VERSION(10, 3, 5):
> + pcache_info = beige_goby_cache_info;
> + num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
> + break;
> + case IP_VERSION(10, 3, 3):
> + case IP_VERSION(10, 3, 6): /* TODO: Double check these on production silicon */
> + case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */
> + pcache_info = yellow_carp_cache_info;
> + num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
> + break;
> + case IP_VERSION(11, 0, 0):
> + case IP_VERSION(11, 0, 1):
> + case IP_VERSION(11, 0, 2):
> + case IP_VERSION(11, 0, 3):
> + pcache_info = cache_info;
> + num_of_cache_types =
> + kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
> + break;
> + default:
> + return -EINVAL;
> + }
> + }
> +
> + /* For each type of cache listed in the kfd_gpu_cache_info table,
> + * go through all available Compute Units.
> + * The [i,j,k] loop will
> + * if kfd_gpu_cache_info.num_cu_shared = 1
> + * will parse through all available CU
> + * If (kfd_gpu_cache_info.num_cu_shared != 1)
> + * then it will consider only one CU from
> + * the shared unit
> + */
> + for (ct = 0; ct < num_of_cache_types; ct++) {
> + cu_processor_id = gpu_processor_id;
> + if (pcache_info[ct].cache_level == 1) {
> + for (i = 0; i < pcu_info->num_shader_engines; i++) {
> + for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
> + for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
> +
> + ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
> + pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
> + cu_processor_id, k);
> +
> + if (ret < 0)
> + break;
> +
> + if (!ret) {
> + num_of_entries++;
> + list_add_tail(&props_ext->list, &dev->cache_props_ext);
> + }
> +
> + /* Move to next CU block */
> + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
> + pcu_info->num_cu_per_sh) ?
> + pcache_info[ct].num_cu_shared :
> + (pcu_info->num_cu_per_sh - k);
> + cu_processor_id += num_cu_shared;
> + }
> + }
> + }
> + } else {
> + ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
> + pcu_info, ct, cu_processor_id);
> +
> + if (ret < 0)
> + break;
> +
> + if (!ret) {
> + num_of_entries++;
> + list_add_tail(&props_ext->list, &dev->cache_props_ext);
> + }
> + }
> + }
> + pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
> + return 0;
> +}
> +
> int kfd_topology_add_device(struct kfd_dev *gpu)
> {
> uint32_t gpu_id;
> @@ -1759,6 +2986,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
> topology_crat_proximity_domain--;
> return res;
> }
> +
> res = kfd_parse_crat_table(crat_image,
> &temp_topology_device_list,
> proximity_domain);
> @@ -1771,23 +2999,27 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>
> kfd_topology_update_device_list(&temp_topology_device_list,
> &topology_device_list);
> + up_write(&topology_lock);
> +
> + dev = kfd_assign_gpu(gpu);
> + if (WARN_ON(!dev)) {
> + res = -ENODEV;
> + goto err;
> + }
> +
> + down_write(&topology_lock);
> + kfd_fill_cache_non_crat_info(dev, gpu);
>
> /* Update the SYSFS tree, since we added another topology
> * device
> */
> res = kfd_topology_update_sysfs();
> up_write(&topology_lock);
> -
> if (!res)
> sys_props.generation_count++;
> else
> pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
> gpu_id, res);
> - dev = kfd_assign_gpu(gpu);
> - if (WARN_ON(!dev)) {
> - res = -ENODEV;
> - goto err;
> - }
> }
>
> dev->gpu_id = gpu_id;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> index dc4e239c8f8f..fc35fe9fa914 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> @@ -103,6 +103,25 @@ struct kfd_cache_properties {
> struct attribute attr;
> };
>
> +#define VCRAT_SIBLINGMAP_SIZE 64
> +
> +/* for GPUs with more CUs */
> +struct kfd_cache_properties_ext {
> + struct list_head list;
> + uint32_t processor_id_low;
> + uint32_t cache_level;
> + uint32_t cache_size;
> + uint32_t cacheline_size;
> + uint32_t cachelines_per_tag;
> + uint32_t cache_assoc;
> + uint32_t cache_latency;
> + uint32_t cache_type;
> + uint8_t sibling_map[VCRAT_SIBLINGMAP_SIZE];
> + struct kfd_dev *gpu;
> + struct kobject *kobj;
> + struct attribute attr;
> +};
> +
> struct kfd_iolink_properties {
> struct list_head list;
> uint32_t iolink_type;
> @@ -139,6 +158,7 @@ struct kfd_topology_device {
> struct list_head mem_props;
> uint32_t cache_count;
> struct list_head cache_props;
> + struct list_head cache_props_ext;
> struct list_head io_link_props;
> struct list_head p2p_link_props;
> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
> @@ -148,6 +168,7 @@ struct kfd_topology_device {
> struct kobject *kobj_node;
> struct kobject *kobj_mem;
> struct kobject *kobj_cache;
> + struct kobject *kobj_cache_ext;
> struct kobject *kobj_iolink;
> struct kobject *kobj_p2plink;
> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
More information about the amd-gfx
mailing list