[PATCH 2/2] drm/amdkfd: Fix the warning of array-index-out-of-bounds

Felix Kuehling felix.kuehling at amd.com
Tue Oct 25 14:25:22 UTC 2022


Am 2022-10-25 um 07:37 schrieb Ma, Jun:
> Hi Felix,
>
> On 10/25/2022 5:06 AM, Felix Kuehling wrote:
>> On 2022-10-24 07:26, Ma Jun wrote:
>>> For some GPUs with more CUs, the original sibling_map[32]
>>> in struct crat_subtype_cache is not enough
>>> to save the cache information when create the VCRAT table,
>>> so fill the cache info into struct kfd_cache_properties_ext
>>> directly to fix the problem.
>>>
>>> At the same time, a new directory
>>> "/sys/class/kfd/kfd/topology/nodes/*nodes_num*/caches_ext"
>>> is created for cache information showing.
>> Is this necessary because existing user mode cannot handle the larger
>> sibling map? If so, you'll also need to update the Thunk to parse the
>> information from caches_ext. Do you have a link to that patch?
>>
>> If user mode can handle a larger siblingmap in the existing sysfs
>> interface, we should not create new one.
>>
>> Another alternative is to add more lines to the existing cache info
>> sysfs entries. Older user mode will probably ignore the ones it doesn't
>> know about.
>>
> Yes, it's not necessary to make a new directory for cache.
>
> How about the code below?
>
> +#define CAHCHE_INFO_FROM_CRAT  0x00000001
> +#define CAHCHE_INFO_FROM_VCRAT 0x00000002
> +
> +#define VCRAT_SIBLINGMAP_SIZE 64
> +
>   struct kfd_cache_properties {
>          struct list_head        list;
>          uint32_t                processor_id_low;
> @@ -98,9 +103,11 @@ struct kfd_cache_properties {
>          uint32_t                cache_latency;
>          uint32_t                cache_type;
>          uint8_t                 sibling_map[CRAT_SIBLINGMAP_SIZE];
> +       uint8_t                 sibling_map_ext[VCRAT_SIBLINGMAP_SIZE];

You probably don't need the second array. Just make the original array 
larger. When you present the information in kfd_cache_show, it doesn't 
matter any more where the info came from.


>          struct kfd_dev          *gpu;
>          struct kobject          *kobj;
>          struct attribute        attr;
> +       uint32_t                flags;

Instead of a flag, this could be a variable uint32_t sibling_map_size. 
That way you only need a single loop in kfd_cache_show, and it's easy to 
change the size in the future if needed.

Regards,
   Felix


>   };
>
>          cache = container_of(attr, struct kfd_cache_properties, attr);
>          if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
>                  return -EPERM;
> @@ -402,12 +1142,22 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
>          sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
>          sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
>          sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
> -       offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
> -       for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
> -               for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
> -                       /* Check each bit */
> -                       offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
> -                                        (cache->sibling_map[i] >> j) & 1);
> +
> +       if (cache->flags & CAHCHE_INFO_FROM_CRAT) {
> +               offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
> +               for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
> +                       for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
> +                               /* Check each bit */
> +                               offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
> +                                               (cache->sibling_map[i] >> j) & 1);
> +       } else if (cache->flags & CAHCHE_INFO_FROM_VCRAT) {
> +               offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
> +               for (i = 0; i < VCRAT_SIBLINGMAP_SIZE; i++)
> +                       for (j = 0; j < sizeof(cache->sibling_map_ext[0])*8; j++)
> +                               /* Check each bit */
> +                               offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
> +                                               (cache->sibling_map_ext[i] >> j) & 1);
> +       }
>   
> Regards,
> Ma Jun
>
>> Regards,
>>     Felix
>>
>>
>>> The original directory "cache" is reserved for GPU which using real CRAT
>>> table.
>>>
>>> Signed-off-by: Ma Jun <Jun.Ma2 at amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdkfd/kfd_crat.c     | 1229 +-------------------
>>>    drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1246 ++++++++++++++++++++-
>>>    drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   21 +
>>>    3 files changed, 1261 insertions(+), 1235 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> index 4857ec5b9f46..e6928c60338e 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> @@ -30,799 +30,6 @@
>>>    #include "amdgpu.h"
>>>    #include "amdgpu_amdkfd.h"
>>>    
>>> -/* Static table to describe GPU Cache information */
>>> -struct kfd_gpu_cache_info {
>>> -	uint32_t	cache_size;
>>> -	uint32_t	cache_level;
>>> -	uint32_t	flags;
>>> -	/* Indicates how many Compute Units share this cache
>>> -	 * within a SA. Value = 1 indicates the cache is not shared
>>> -	 */
>>> -	uint32_t	num_cu_shared;
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info kaveri_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache (in SQC module) per bank */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache (in SQC module) per bank */
>>> -		.cache_size = 8,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -
>>> -	/* TODO: Add L2 Cache information */
>>> -};
>>> -
>>> -
>>> -static struct kfd_gpu_cache_info carrizo_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache (in SQC module) per bank */
>>> -		.cache_size = 8,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 4,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache (in SQC module) per bank. */
>>> -		.cache_size = 4,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 4,
>>> -	},
>>> -
>>> -	/* TODO: Add L2 Cache information */
>>> -};
>>> -
>>> -#define hawaii_cache_info kaveri_cache_info
>>> -#define tonga_cache_info carrizo_cache_info
>>> -#define fiji_cache_info  carrizo_cache_info
>>> -#define polaris10_cache_info carrizo_cache_info
>>> -#define polaris11_cache_info carrizo_cache_info
>>> -#define polaris12_cache_info carrizo_cache_info
>>> -#define vegam_cache_info carrizo_cache_info
>>> -
>>> -/* NOTE: L1 cache information has been updated and L2/L3
>>> - * cache information has been added for Vega10 and
>>> - * newer ASICs. The unit for cache_size is KiB.
>>> - * In future,  check & update cache details
>>> - * for every new ASIC is required.
>>> - */
>>> -
>>> -static struct kfd_gpu_cache_info vega10_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 4096,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 16,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info raven_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 1024,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 11,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info renoir_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 1024,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 8,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info vega12_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 2048,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 5,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info vega20_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 3,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 8192,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 16,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 8192,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 14,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info navi10_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* GL1 Data Cache per SA */
>>> -		.cache_size = 128,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 10,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 4096,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 10,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info vangogh_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* GL1 Data Cache per SA */
>>> -		.cache_size = 128,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 8,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 1024,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 8,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info navi14_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* GL1 Data Cache per SA */
>>> -		.cache_size = 128,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 12,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 2048,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 12,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* GL1 Data Cache per SA */
>>> -		.cache_size = 128,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 10,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 4096,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 10,
>>> -	},
>>> -	{
>>> -		/* L3 Data Cache per GPU */
>>> -		.cache_size = 128*1024,
>>> -		.cache_level = 3,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 10,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* GL1 Data Cache per SA */
>>> -		.cache_size = 128,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 10,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 3072,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 10,
>>> -	},
>>> -	{
>>> -		/* L3 Data Cache per GPU */
>>> -		.cache_size = 96*1024,
>>> -		.cache_level = 3,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 10,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* GL1 Data Cache per SA */
>>> -		.cache_size = 128,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 8,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 2048,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 8,
>>> -	},
>>> -	{
>>> -		/* L3 Data Cache per GPU */
>>> -		.cache_size = 32*1024,
>>> -		.cache_level = 3,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 8,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* GL1 Data Cache per SA */
>>> -		.cache_size = 128,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 8,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 1024,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 8,
>>> -	},
>>> -	{
>>> -		/* L3 Data Cache per GPU */
>>> -		.cache_size = 16*1024,
>>> -		.cache_level = 3,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 8,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
>>> -	{
>>> -		/* TCP L1 Cache per CU */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 1,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Instruction Cache per SQC */
>>> -		.cache_size = 32,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_INST_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* Scalar L1 Data Cache per SQC */
>>> -		.cache_size = 16,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 2,
>>> -	},
>>> -	{
>>> -		/* GL1 Data Cache per SA */
>>> -		.cache_size = 128,
>>> -		.cache_level = 1,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 6,
>>> -	},
>>> -	{
>>> -		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -		.cache_size = 2048,
>>> -		.cache_level = 2,
>>> -		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -		.num_cu_shared = 6,
>>> -	},
>>> -};
>>> -
>>> -static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
>>> -       {
>>> -               /* TCP L1 Cache per CU */
>>> -               .cache_size = 16,
>>> -               .cache_level = 1,
>>> -               .flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -                               CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -                               CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -               .num_cu_shared = 1,
>>> -       },
>>> -       {
>>> -               /* Scalar L1 Instruction Cache per SQC */
>>> -               .cache_size = 32,
>>> -               .cache_level = 1,
>>> -               .flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -                               CRAT_CACHE_FLAGS_INST_CACHE |
>>> -                               CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -               .num_cu_shared = 2,
>>> -       },
>>> -       {
>>> -               /* Scalar L1 Data Cache per SQC */
>>> -               .cache_size = 16,
>>> -               .cache_level = 1,
>>> -               .flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -                               CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -                               CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -               .num_cu_shared = 2,
>>> -       },
>>> -       {
>>> -               /* GL1 Data Cache per SA */
>>> -               .cache_size = 128,
>>> -               .cache_level = 1,
>>> -               .flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -                               CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -                               CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -               .num_cu_shared = 2,
>>> -       },
>>> -       {
>>> -               /* L2 Data Cache per GPU (Total Tex Cache) */
>>> -               .cache_size = 256,
>>> -               .cache_level = 2,
>>> -               .flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -                               CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -                               CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> -               .num_cu_shared = 2,
>>> -       },
>>> -};
>>> -
>>>    static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
>>>    		struct crat_subtype_computeunit *cu)
>>>    {
>>> @@ -1223,419 +430,6 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
>>>    	return ret;
>>>    }
>>>    
>>> -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
>>> -static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
>>> -				struct kfd_gpu_cache_info *pcache_info,
>>> -				struct kfd_cu_info *cu_info,
>>> -				int mem_available,
>>> -				int cu_bitmask,
>>> -				int cache_type, unsigned int cu_processor_id,
>>> -				int cu_block)
>>> -{
>>> -	unsigned int cu_sibling_map_mask;
>>> -	int first_active_cu;
>>> -
>>> -	/* First check if enough memory is available */
>>> -	if (sizeof(struct crat_subtype_cache) > mem_available)
>>> -		return -ENOMEM;
>>> -
>>> -	cu_sibling_map_mask = cu_bitmask;
>>> -	cu_sibling_map_mask >>= cu_block;
>>> -	cu_sibling_map_mask &=
>>> -		((1 << pcache_info[cache_type].num_cu_shared) - 1);
>>> -	first_active_cu = ffs(cu_sibling_map_mask);
>>> -
>>> -	/* CU could be inactive. In case of shared cache find the first active
>>> -	 * CU. and incase of non-shared cache check if the CU is inactive. If
>>> -	 * inactive active skip it
>>> -	 */
>>> -	if (first_active_cu) {
>>> -		memset(pcache, 0, sizeof(struct crat_subtype_cache));
>>> -		pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
>>> -		pcache->length = sizeof(struct crat_subtype_cache);
>>> -		pcache->flags = pcache_info[cache_type].flags;
>>> -		pcache->processor_id_low = cu_processor_id
>>> -					 + (first_active_cu - 1);
>>> -		pcache->cache_level = pcache_info[cache_type].cache_level;
>>> -		pcache->cache_size = pcache_info[cache_type].cache_size;
>>> -
>>> -		/* Sibling map is w.r.t processor_id_low, so shift out
>>> -		 * inactive CU
>>> -		 */
>>> -		cu_sibling_map_mask =
>>> -			cu_sibling_map_mask >> (first_active_cu - 1);
>>> -
>>> -		pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
>>> -		pcache->sibling_map[1] =
>>> -				(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
>>> -		pcache->sibling_map[2] =
>>> -				(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
>>> -		pcache->sibling_map[3] =
>>> -				(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
>>> -		return 0;
>>> -	}
>>> -	return 1;
>>> -}
>>> -
>>> -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
>>> -static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
>>> -				struct kfd_gpu_cache_info *pcache_info,
>>> -				struct kfd_cu_info *cu_info,
>>> -				int mem_available,
>>> -				int cache_type, unsigned int cu_processor_id)
>>> -{
>>> -	unsigned int cu_sibling_map_mask;
>>> -	int first_active_cu;
>>> -	int i, j, k;
>>> -
>>> -	/* First check if enough memory is available */
>>> -	if (sizeof(struct crat_subtype_cache) > mem_available)
>>> -		return -ENOMEM;
>>> -
>>> -	cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
>>> -	cu_sibling_map_mask &=
>>> -		((1 << pcache_info[cache_type].num_cu_shared) - 1);
>>> -	first_active_cu = ffs(cu_sibling_map_mask);
>>> -
>>> -	/* CU could be inactive. In case of shared cache find the first active
>>> -	 * CU. and incase of non-shared cache check if the CU is inactive. If
>>> -	 * inactive active skip it
>>> -	 */
>>> -	if (first_active_cu) {
>>> -		memset(pcache, 0, sizeof(struct crat_subtype_cache));
>>> -		pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
>>> -		pcache->length = sizeof(struct crat_subtype_cache);
>>> -		pcache->flags = pcache_info[cache_type].flags;
>>> -		pcache->processor_id_low = cu_processor_id
>>> -					 + (first_active_cu - 1);
>>> -		pcache->cache_level = pcache_info[cache_type].cache_level;
>>> -		pcache->cache_size = pcache_info[cache_type].cache_size;
>>> -
>>> -		/* Sibling map is w.r.t processor_id_low, so shift out
>>> -		 * inactive CU
>>> -		 */
>>> -		cu_sibling_map_mask =
>>> -			cu_sibling_map_mask >> (first_active_cu - 1);
>>> -		k = 0;
>>> -		for (i = 0; i < cu_info->num_shader_engines; i++) {
>>> -			for (j = 0; j < cu_info->num_shader_arrays_per_engine;
>>> -				j++) {
>>> -				pcache->sibling_map[k] =
>>> -				 (uint8_t)(cu_sibling_map_mask & 0xFF);
>>> -				pcache->sibling_map[k+1] =
>>> -				 (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
>>> -				pcache->sibling_map[k+2] =
>>> -				 (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
>>> -				pcache->sibling_map[k+3] =
>>> -				 (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
>>> -				k += 4;
>>> -				cu_sibling_map_mask =
>>> -					cu_info->cu_bitmap[i % 4][j + i / 4];
>>> -				cu_sibling_map_mask &= (
>>> -				 (1 << pcache_info[cache_type].num_cu_shared)
>>> -				 - 1);
>>> -			}
>>> -		}
>>> -		return 0;
>>> -	}
>>> -	return 1;
>>> -}
>>> -
>>> -#define KFD_MAX_CACHE_TYPES 6
>>> -
>>> -static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
>>> -						   struct kfd_gpu_cache_info *pcache_info)
>>> -{
>>> -	struct amdgpu_device *adev = kdev->adev;
>>> -	int i = 0;
>>> -
>>> -	/* TCP L1 Cache per CU */
>>> -	if (adev->gfx.config.gc_tcp_l1_size) {
>>> -		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
>>> -		pcache_info[i].cache_level = 1;
>>> -		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> -		pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
>>> -		i++;
>>> -	}
>>> -	/* Scalar L1 Instruction Cache per SQC */
>>> -	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
>>> -		pcache_info[i].cache_size =
>>> -			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
>>> -		pcache_info[i].cache_level = 1;
>>> -		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -					CRAT_CACHE_FLAGS_INST_CACHE |
>>> -					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> -		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
>>> -		i++;
>>> -	}
>>> -	/* Scalar L1 Data Cache per SQC */
>>> -	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
>>> -		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
>>> -		pcache_info[i].cache_level = 1;
>>> -		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> -		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
>>> -		i++;
>>> -	}
>>> -	/* GL1 Data Cache per SA */
>>> -	if (adev->gfx.config.gc_gl1c_per_sa &&
>>> -	    adev->gfx.config.gc_gl1c_size_per_instance) {
>>> -		pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
>>> -			adev->gfx.config.gc_gl1c_size_per_instance;
>>> -		pcache_info[i].cache_level = 1;
>>> -		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> -		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
>>> -		i++;
>>> -	}
>>> -	/* L2 Data Cache per GPU (Total Tex Cache) */
>>> -	if (adev->gfx.config.gc_gl2c_per_gpu) {
>>> -		pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
>>> -		pcache_info[i].cache_level = 2;
>>> -		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> -		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
>>> -		i++;
>>> -	}
>>> -	/* L3 Data Cache per GPU */
>>> -	if (adev->gmc.mall_size) {
>>> -		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
>>> -		pcache_info[i].cache_level = 3;
>>> -		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> -					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> -					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> -		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
>>> -		i++;
>>> -	}
>>> -	return i;
>>> -}
>>> -
>>> -/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
>>> - * tables
>>> - *
>>> - *	@kdev - [IN] GPU device
>>> - *	@gpu_processor_id - [IN] GPU processor ID to which these caches
>>> - *			    associate
>>> - *	@available_size - [IN] Amount of memory available in pcache
>>> - *	@cu_info - [IN] Compute Unit info obtained from KGD
>>> - *	@pcache - [OUT] memory into which cache data is to be filled in.
>>> - *	@size_filled - [OUT] amount of data used up in pcache.
>>> - *	@num_of_entries - [OUT] number of caches added
>>> - */
>>> -static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
>>> -			int gpu_processor_id,
>>> -			int available_size,
>>> -			struct kfd_cu_info *cu_info,
>>> -			struct crat_subtype_cache *pcache,
>>> -			int *size_filled,
>>> -			int *num_of_entries)
>>> -{
>>> -	struct kfd_gpu_cache_info *pcache_info;
>>> -	struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
>>> -	int num_of_cache_types = 0;
>>> -	int i, j, k;
>>> -	int ct = 0;
>>> -	int mem_available = available_size;
>>> -	unsigned int cu_processor_id;
>>> -	int ret;
>>> -	unsigned int num_cu_shared;
>>> -
>>> -	switch (kdev->adev->asic_type) {
>>> -	case CHIP_KAVERI:
>>> -		pcache_info = kaveri_cache_info;
>>> -		num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
>>> -		break;
>>> -	case CHIP_HAWAII:
>>> -		pcache_info = hawaii_cache_info;
>>> -		num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
>>> -		break;
>>> -	case CHIP_CARRIZO:
>>> -		pcache_info = carrizo_cache_info;
>>> -		num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
>>> -		break;
>>> -	case CHIP_TONGA:
>>> -		pcache_info = tonga_cache_info;
>>> -		num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
>>> -		break;
>>> -	case CHIP_FIJI:
>>> -		pcache_info = fiji_cache_info;
>>> -		num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
>>> -		break;
>>> -	case CHIP_POLARIS10:
>>> -		pcache_info = polaris10_cache_info;
>>> -		num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
>>> -		break;
>>> -	case CHIP_POLARIS11:
>>> -		pcache_info = polaris11_cache_info;
>>> -		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
>>> -		break;
>>> -	case CHIP_POLARIS12:
>>> -		pcache_info = polaris12_cache_info;
>>> -		num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
>>> -		break;
>>> -	case CHIP_VEGAM:
>>> -		pcache_info = vegam_cache_info;
>>> -		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
>>> -		break;
>>> -	default:
>>> -		switch (KFD_GC_VERSION(kdev)) {
>>> -		case IP_VERSION(9, 0, 1):
>>> -			pcache_info = vega10_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
>>> -			break;
>>> -		case IP_VERSION(9, 2, 1):
>>> -			pcache_info = vega12_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
>>> -			break;
>>> -		case IP_VERSION(9, 4, 0):
>>> -		case IP_VERSION(9, 4, 1):
>>> -			pcache_info = vega20_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
>>> -			break;
>>> -		case IP_VERSION(9, 4, 2):
>>> -			pcache_info = aldebaran_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
>>> -			break;
>>> -		case IP_VERSION(9, 1, 0):
>>> -		case IP_VERSION(9, 2, 2):
>>> -			pcache_info = raven_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(raven_cache_info);
>>> -			break;
>>> -		case IP_VERSION(9, 3, 0):
>>> -			pcache_info = renoir_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
>>> -			break;
>>> -		case IP_VERSION(10, 1, 10):
>>> -		case IP_VERSION(10, 1, 2):
>>> -		case IP_VERSION(10, 1, 3):
>>> -		case IP_VERSION(10, 1, 4):
>>> -			pcache_info = navi10_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
>>> -			break;
>>> -		case IP_VERSION(10, 1, 1):
>>> -			pcache_info = navi14_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
>>> -			break;
>>> -		case IP_VERSION(10, 3, 0):
>>> -			pcache_info = sienna_cichlid_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
>>> -			break;
>>> -		case IP_VERSION(10, 3, 2):
>>> -			pcache_info = navy_flounder_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
>>> -			break;
>>> -		case IP_VERSION(10, 3, 4):
>>> -			pcache_info = dimgrey_cavefish_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
>>> -			break;
>>> -		case IP_VERSION(10, 3, 1):
>>> -			pcache_info = vangogh_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
>>> -			break;
>>> -		case IP_VERSION(10, 3, 5):
>>> -			pcache_info = beige_goby_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
>>> -			break;
>>> -		case IP_VERSION(10, 3, 3):
>>> -		case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */
>>> -			pcache_info = yellow_carp_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
>>> -			break;
>>> -		case IP_VERSION(10, 3, 6):
>>> -			pcache_info = gc_10_3_6_cache_info;
>>> -			num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
>>> -			break;
>>> -		case IP_VERSION(11, 0, 0):
>>> -		case IP_VERSION(11, 0, 1):
>>> -		case IP_VERSION(11, 0, 2):
>>> -		case IP_VERSION(11, 0, 3):
>>> -			pcache_info = cache_info;
>>> -			num_of_cache_types =
>>> -				kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
>>> -			break;
>>> -		default:
>>> -			return -EINVAL;
>>> -		}
>>> -	}
>>> -
>>> -	*size_filled = 0;
>>> -	*num_of_entries = 0;
>>> -
>>> -	/* For each type of cache listed in the kfd_gpu_cache_info table,
>>> -	 * go through all available Compute Units.
>>> -	 * The [i,j,k] loop will
>>> -	 *		if kfd_gpu_cache_info.num_cu_shared = 1
>>> -	 *			will parse through all available CU
>>> -	 *		If (kfd_gpu_cache_info.num_cu_shared != 1)
>>> -	 *			then it will consider only one CU from
>>> -	 *			the shared unit
>>> -	 */
>>> -
>>> -	for (ct = 0; ct < num_of_cache_types; ct++) {
>>> -	  cu_processor_id = gpu_processor_id;
>>> -	  if (pcache_info[ct].cache_level == 1) {
>>> -	    for (i = 0; i < cu_info->num_shader_engines; i++) {
>>> -	      for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
>>> -	        for (k = 0; k < cu_info->num_cu_per_sh;
>>> -		  k += pcache_info[ct].num_cu_shared) {
>>> -		  ret = fill_in_l1_pcache(pcache,
>>> -					pcache_info,
>>> -					cu_info,
>>> -					mem_available,
>>> -					cu_info->cu_bitmap[i % 4][j + i / 4],
>>> -					ct,
>>> -					cu_processor_id,
>>> -					k);
>>> -
>>> -		  if (ret < 0)
>>> -			break;
>>> -
>>> -		  if (!ret) {
>>> -				pcache++;
>>> -				(*num_of_entries)++;
>>> -				mem_available -= sizeof(*pcache);
>>> -				(*size_filled) += sizeof(*pcache);
>>> -		  }
>>> -
>>> -		  /* Move to next CU block */
>>> -		  num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
>>> -					cu_info->num_cu_per_sh) ?
>>> -					pcache_info[ct].num_cu_shared :
>>> -					(cu_info->num_cu_per_sh - k);
>>> -		  cu_processor_id += num_cu_shared;
>>> -		}
>>> -	      }
>>> -	    }
>>> -	  } else {
>>> -			ret = fill_in_l2_l3_pcache(pcache,
>>> -				pcache_info,
>>> -				cu_info,
>>> -				mem_available,
>>> -				ct,
>>> -				cu_processor_id);
>>> -
>>> -			if (ret < 0)
>>> -				break;
>>> -
>>> -			if (!ret) {
>>> -				pcache++;
>>> -				(*num_of_entries)++;
>>> -				mem_available -= sizeof(*pcache);
>>> -				(*size_filled) += sizeof(*pcache);
>>> -			}
>>> -	  }
>>> -	}
>>> -
>>> -	pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
>>> -
>>> -	return 0;
>>> -}
>>> -
>>>    static bool kfd_ignore_crat(void)
>>>    {
>>>    	bool ret;
>>> @@ -2203,8 +997,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
>>>    	struct crat_subtype_computeunit *cu;
>>>    	struct kfd_cu_info cu_info;
>>>    	int avail_size = *size;
>>> -	int num_of_cache_entries = 0;
>>> -	int cache_mem_filled = 0;
>>>    	uint32_t nid = 0;
>>>    	int ret = 0;
>>>    
>>> @@ -2304,31 +1096,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
>>>    	crat_table->length += sizeof(struct crat_subtype_memory);
>>>    	crat_table->total_entries++;
>>>    
>>> -	/* TODO: Fill in cache information. This information is NOT readily
>>> -	 * available in KGD
>>> -	 */
>>> -	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
>>> -		sub_type_hdr->length);
>>> -	ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
>>> -				avail_size,
>>> -				&cu_info,
>>> -				(struct crat_subtype_cache *)sub_type_hdr,
>>> -				&cache_mem_filled,
>>> -				&num_of_cache_entries);
>>> -
>>> -	if (ret < 0)
>>> -		return ret;
>>> -
>>> -	crat_table->length += cache_mem_filled;
>>> -	crat_table->total_entries += num_of_cache_entries;
>>> -	avail_size -= cache_mem_filled;
>>> -
>>>    	/* Fill in Subtype: IO_LINKS
>>>    	 *  Only direct links are added here which is Link from GPU to
>>>    	 *  its NUMA node. Indirect links are added by userspace.
>>>    	 */
>>>    	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
>>> -		cache_mem_filled);
>>> +		sub_type_hdr->length);
>>>    	ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
>>>    		(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
>>>    
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>>> index e0680d265a66..97e88c35be01 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>>> @@ -50,6 +50,747 @@ static struct kfd_system_properties sys_props;
>>>    static DECLARE_RWSEM(topology_lock);
>>>    static uint32_t topology_crat_proximity_domain;
>>>    
>>> +/* Static table to describe GPU Cache information */
>>> +struct kfd_gpu_cache_info {
>>> +	uint32_t	cache_size;
>>> +	uint32_t	cache_level;
>>> +	uint32_t	flags;
>>> +	/* Indicates how many Compute Units share this cache
>>> +	 * within a SA. Value = 1 indicates the cache is not shared
>>> +	 */
>>> +	uint32_t	num_cu_shared;
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info kaveri_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache (in SQC module) per bank */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache (in SQC module) per bank */
>>> +		.cache_size = 8,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +
>>> +	/* TODO: Add L2 Cache information */
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info carrizo_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache (in SQC module) per bank */
>>> +		.cache_size = 8,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 4,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache (in SQC module) per bank. */
>>> +		.cache_size = 4,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 4,
>>> +	},
>>> +
>>> +	/* TODO: Add L2 Cache information */
>>> +};
>>> +
>>> +#define hawaii_cache_info kaveri_cache_info
>>> +#define tonga_cache_info carrizo_cache_info
>>> +#define fiji_cache_info  carrizo_cache_info
>>> +#define polaris10_cache_info carrizo_cache_info
>>> +#define polaris11_cache_info carrizo_cache_info
>>> +#define polaris12_cache_info carrizo_cache_info
>>> +#define vegam_cache_info carrizo_cache_info
>>> +
>>> +/* NOTE: L1 cache information has been updated and L2/L3
>>> + * cache information has been added for Vega10 and
>>> + * newer ASICs. The unit for cache_size is KiB.
>>> + * In future,  check & update cache details
>>> + * for every new ASIC is required.
>>> + */
>>> +static struct kfd_gpu_cache_info vega10_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 4096,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 16,
>>> +	},
>>> +};
>>> +static struct kfd_gpu_cache_info raven_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 1024,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 11,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info renoir_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 1024,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 8,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info vega12_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 2048,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 5,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info vega20_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 3,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 8192,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 16,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 8192,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 14,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info navi10_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* GL1 Data Cache per SA */
>>> +		.cache_size = 128,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 10,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 4096,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 10,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info vangogh_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* GL1 Data Cache per SA */
>>> +		.cache_size = 128,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 8,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 1024,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 8,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info navi14_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* GL1 Data Cache per SA */
>>> +		.cache_size = 128,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 12,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 2048,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 12,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* GL1 Data Cache per SA */
>>> +		.cache_size = 128,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 10,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 4096,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 10,
>>> +	},
>>> +	{
>>> +		/* L3 Data Cache per GPU */
>>> +		.cache_size = 128*1024,
>>> +		.cache_level = 3,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 10,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* GL1 Data Cache per SA */
>>> +		.cache_size = 128,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 10,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 3072,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 10,
>>> +	},
>>> +	{
>>> +		/* L3 Data Cache per GPU */
>>> +		.cache_size = 96*1024,
>>> +		.cache_level = 3,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 10,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* GL1 Data Cache per SA */
>>> +		.cache_size = 128,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 8,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 2048,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 8,
>>> +	},
>>> +	{
>>> +		/* L3 Data Cache per GPU */
>>> +		.cache_size = 32*1024,
>>> +		.cache_level = 3,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 8,
>>> +	},
>>> +};
>>> +
>>> +static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* GL1 Data Cache per SA */
>>> +		.cache_size = 128,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 8,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 1024,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 8,
>>> +	},
>>> +	{
>>> +		/* L3 Data Cache per GPU */
>>> +		.cache_size = 16*1024,
>>> +		.cache_level = 3,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 8,
>>> +	},
>>> +};
>>> +static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
>>> +	{
>>> +		/* TCP L1 Cache per CU */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 1,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Instruction Cache per SQC */
>>> +		.cache_size = 32,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_INST_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* Scalar L1 Data Cache per SQC */
>>> +		.cache_size = 16,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 2,
>>> +	},
>>> +	{
>>> +		/* GL1 Data Cache per SA */
>>> +		.cache_size = 128,
>>> +		.cache_level = 1,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 6,
>>> +	},
>>> +	{
>>> +		/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +		.cache_size = 2048,
>>> +		.cache_level = 2,
>>> +		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +				CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +				CRAT_CACHE_FLAGS_SIMD_CACHE),
>>> +		.num_cu_shared = 6,
>>> +	},
>>> +};
>>> +
>>>    struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
>>>    						uint32_t proximity_domain)
>>>    {
>>> @@ -149,6 +890,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
>>>    {
>>>    	struct kfd_mem_properties *mem;
>>>    	struct kfd_cache_properties *cache;
>>> +	struct kfd_cache_properties_ext *cache_ext;
>>>    	struct kfd_iolink_properties *iolink;
>>>    	struct kfd_iolink_properties *p2plink;
>>>    #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
>>> @@ -171,6 +913,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
>>>    		kfree(cache);
>>>    	}
>>>    
>>> +	while (dev->cache_props_ext.next != &dev->cache_props_ext) {
>>> +		cache_ext = container_of(dev->cache_props_ext.next,
>>> +				struct kfd_cache_properties_ext, list);
>>> +		list_del(&cache_ext->list);
>>> +		kfree(cache_ext);
>>> +	}
>>> +
>>>    	while (dev->io_link_props.next != &dev->io_link_props) {
>>>    		iolink = container_of(dev->io_link_props.next,
>>>    				struct kfd_iolink_properties, list);
>>> @@ -227,6 +976,7 @@ struct kfd_topology_device *kfd_create_topology_device(
>>>    
>>>    	INIT_LIST_HEAD(&dev->mem_props);
>>>    	INIT_LIST_HEAD(&dev->cache_props);
>>> +	INIT_LIST_HEAD(&dev->cache_props_ext);
>>>    	INIT_LIST_HEAD(&dev->io_link_props);
>>>    	INIT_LIST_HEAD(&dev->p2p_link_props);
>>>    #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
>>> @@ -387,7 +1137,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
>>>    
>>>    	/* Making sure that the buffer is an empty string */
>>>    	buffer[0] = 0;
>>> -
>>>    	cache = container_of(attr, struct kfd_cache_properties, attr);
>>>    	if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
>>>    		return -EPERM;
>>> @@ -423,6 +1172,50 @@ static struct kobj_type cache_type = {
>>>    	.sysfs_ops = &cache_ops,
>>>    };
>>>    
>>> +static ssize_t kfd_cache_ext_show(struct kobject *kobj, struct attribute *attr,
>>> +		char *buffer)
>>> +{
>>> +	int offs = 0;
>>> +	uint32_t i, j;
>>> +	struct kfd_cache_properties_ext *cache;
>>> +
>>> +	/* Making sure that the buffer is an empty string */
>>> +	buffer[0] = 0;
>>> +	cache = container_of(attr, struct kfd_cache_properties_ext, attr);
>>> +	if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
>>> +		return -EPERM;
>>> +	sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
>>> +			cache->processor_id_low);
>>> +	sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
>>> +	sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
>>> +	sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
>>> +			      cache->cacheline_size);
>>> +	sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
>>> +			      cache->cachelines_per_tag);
>>> +	sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
>>> +	sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
>>> +	sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
>>> +	offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
>>> +	for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
>>> +		for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
>>> +			/* Check each bit */
>>> +			offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
>>> +					 (cache->sibling_map[i] >> j) & 1);
>>> +
>>> +	/* Replace the last "," with end of line */
>>> +	buffer[offs-1] = '\n';
>>> +	return offs;
>>> +}
>>> +
>>> +static const struct sysfs_ops cache_ext_ops = {
>>> +	.show = kfd_cache_ext_show,
>>> +};
>>> +
>>> +static struct kobj_type cache_ext_type = {
>>> +	.release = kfd_topology_kobj_release,
>>> +	.sysfs_ops = &cache_ext_ops,
>>> +};
>>> +
>>>    #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
>>>    /****** Sysfs of Performance Counters ******/
>>>    
>>> @@ -610,6 +1403,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
>>>    	struct kfd_iolink_properties *p2plink;
>>>    	struct kfd_iolink_properties *iolink;
>>>    	struct kfd_cache_properties *cache;
>>> +	struct kfd_cache_properties_ext *cache_ext;
>>>    	struct kfd_mem_properties *mem;
>>>    #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
>>>    	struct kfd_perf_properties *perf;
>>> @@ -663,6 +1457,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
>>>    		dev->kobj_cache = NULL;
>>>    	}
>>>    
>>> +	if (dev->kobj_cache_ext) {
>>> +		list_for_each_entry(cache_ext, &dev->cache_props_ext, list)
>>> +			if (cache_ext->kobj) {
>>> +				kfd_remove_sysfs_file(cache_ext->kobj,
>>> +							&cache_ext->attr);
>>> +				cache_ext->kobj = NULL;
>>> +			}
>>> +		kobject_del(dev->kobj_cache_ext);
>>> +		kobject_put(dev->kobj_cache_ext);
>>> +		dev->kobj_cache_ext = NULL;
>>> +	}
>>> +
>>>    	if (dev->kobj_mem) {
>>>    		list_for_each_entry(mem, &dev->mem_props, list)
>>>    			if (mem->kobj) {
>>> @@ -707,6 +1513,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
>>>    	struct kfd_iolink_properties *p2plink;
>>>    	struct kfd_iolink_properties *iolink;
>>>    	struct kfd_cache_properties *cache;
>>> +	struct kfd_cache_properties_ext *cache_ext;
>>>    	struct kfd_mem_properties *mem;
>>>    #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
>>>    	struct kfd_perf_properties *perf;
>>> @@ -741,6 +1548,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
>>>    	if (!dev->kobj_cache)
>>>    		return -ENOMEM;
>>>    
>>> +	dev->kobj_cache_ext = kobject_create_and_add("caches_ext", dev->kobj_node);
>>> +	if (!dev->kobj_cache_ext)
>>> +		return -ENOMEM;
>>> +
>>>    	dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
>>>    	if (!dev->kobj_iolink)
>>>    		return -ENOMEM;
>>> @@ -830,6 +1641,28 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
>>>    		i++;
>>>    	}
>>>    
>>> +	i = 0;
>>> +	list_for_each_entry(cache_ext, &dev->cache_props_ext, list) {
>>> +		cache_ext->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
>>> +		if (!cache_ext->kobj)
>>> +			return -ENOMEM;
>>> +		ret = kobject_init_and_add(cache_ext->kobj, &cache_ext_type,
>>> +				dev->kobj_cache_ext, "%d", i);
>>> +		if (ret < 0) {
>>> +			kobject_put(cache_ext->kobj);
>>> +			return ret;
>>> +		}
>>> +
>>> +		cache_ext->attr.name = "properties";
>>> +		cache_ext->attr.mode = KFD_SYSFS_FILE_MODE;
>>> +		sysfs_attr_init(&cache_ext->attr);
>>> +		ret = sysfs_create_file(cache_ext->kobj, &cache_ext->attr);
>>> +		if (ret < 0)
>>> +			return ret;
>>> +		i++;
>>> +	}
>>> +
>>> +
>>>    	i = 0;
>>>    	list_for_each_entry(iolink, &dev->io_link_props, list) {
>>>    		iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
>>> @@ -1268,6 +2101,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
>>>    	struct kfd_topology_device *out_dev = NULL;
>>>    	struct kfd_mem_properties *mem;
>>>    	struct kfd_cache_properties *cache;
>>> +	struct kfd_cache_properties_ext *cache_ext;
>>>    	struct kfd_iolink_properties *iolink;
>>>    	struct kfd_iolink_properties *p2plink;
>>>    
>>> @@ -1288,6 +2122,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
>>>    				mem->gpu = dev->gpu;
>>>    			list_for_each_entry(cache, &dev->cache_props, list)
>>>    				cache->gpu = dev->gpu;
>>> +			list_for_each_entry(cache, &dev->cache_props_ext, list)
>>> +				cache_ext->gpu = dev->gpu;
>>>    			list_for_each_entry(iolink, &dev->io_link_props, list)
>>>    				iolink->gpu = dev->gpu;
>>>    			list_for_each_entry(p2plink, &dev->p2p_link_props, list)
>>> @@ -1721,6 +2557,397 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
>>>    		dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED;
>>>    }
>>>    
>>> +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
>>> +static int fill_in_l1_pcache(struct kfd_cache_properties_ext **props_ext,
>>> +				struct kfd_gpu_cache_info *pcache_info,
>>> +				struct kfd_cu_info *cu_info,
>>> +				int cu_bitmask,
>>> +				int cache_type, unsigned int cu_processor_id,
>>> +				int cu_block)
>>> +{
>>> +	unsigned int cu_sibling_map_mask;
>>> +	int first_active_cu;
>>> +	struct kfd_cache_properties_ext *pcache = NULL;
>>> +
>>> +	cu_sibling_map_mask = cu_bitmask;
>>> +	cu_sibling_map_mask >>= cu_block;
>>> +	cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
>>> +	first_active_cu = ffs(cu_sibling_map_mask);
>>> +
>>> +	/* CU could be inactive. In case of shared cache find the first active
>>> +	 * CU. and incase of non-shared cache check if the CU is inactive. If
>>> +	 * inactive active skip it
>>> +	 */
>>> +	if (first_active_cu) {
>>> +		pcache = kfd_alloc_struct(pcache);
>>> +		if (!pcache)
>>> +			return -ENOMEM;
>>> +
>>> +		memset(pcache, 0, sizeof(struct kfd_cache_properties_ext));
>>> +		pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
>>> +		pcache->cache_level = pcache_info[cache_type].cache_level;
>>> +		pcache->cache_size = pcache_info[cache_type].cache_size;
>>> +
>>> +		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
>>> +			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
>>> +		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
>>> +			pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
>>> +		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
>>> +			pcache->cache_type |= HSA_CACHE_TYPE_CPU;
>>> +		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
>>> +			pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
>>> +
>>> +		/* Sibling map is w.r.t processor_id_low, so shift out
>>> +		 * inactive CU
>>> +		 */
>>> +		cu_sibling_map_mask =
>>> +			cu_sibling_map_mask >> (first_active_cu - 1);
>>> +
>>> +		pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
>>> +		pcache->sibling_map[1] =
>>> +				(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
>>> +		pcache->sibling_map[2] =
>>> +				(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
>>> +		pcache->sibling_map[3] =
>>> +				(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
>>> +
>>> +		*props_ext = pcache;
>>> +
>>> +		return 0;
>>> +	}
>>> +	return 1;
>>> +}
>>> +
>>> +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
>>> +static int fill_in_l2_l3_pcache(struct kfd_cache_properties_ext **props_ext,
>>> +				struct kfd_gpu_cache_info *pcache_info,
>>> +				struct kfd_cu_info *cu_info,
>>> +				int cache_type, unsigned int cu_processor_id)
>>> +{
>>> +	unsigned int cu_sibling_map_mask;
>>> +	int first_active_cu;
>>> +	int i, j, k;
>>> +	struct kfd_cache_properties_ext *pcache = NULL;
>>> +
>>> +	cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
>>> +	cu_sibling_map_mask &=
>>> +		((1 << pcache_info[cache_type].num_cu_shared) - 1);
>>> +	first_active_cu = ffs(cu_sibling_map_mask);
>>> +
>>> +	/* CU could be inactive. In case of shared cache find the first active
>>> +	 * CU. and incase of non-shared cache check if the CU is inactive. If
>>> +	 * inactive active skip it
>>> +	 */
>>> +	if (first_active_cu) {
>>> +		pcache = kfd_alloc_struct(pcache);
>>> +		if (!pcache)
>>> +			return -ENOMEM;
>>> +
>>> +		memset(pcache, 0, sizeof(struct kfd_cache_properties_ext));
>>> +		pcache->processor_id_low = cu_processor_id
>>> +					+ (first_active_cu - 1);
>>> +		pcache->cache_level = pcache_info[cache_type].cache_level;
>>> +		pcache->cache_size = pcache_info[cache_type].cache_size;
>>> +
>>> +		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
>>> +			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
>>> +		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
>>> +			pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
>>> +		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
>>> +			pcache->cache_type |= HSA_CACHE_TYPE_CPU;
>>> +		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
>>> +			pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
>>> +
>>> +		/* Sibling map is w.r.t processor_id_low, so shift out
>>> +		 * inactive CU
>>> +		 */
>>> +		cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
>>> +		k = 0;
>>> +
>>> +		for (i = 0; i < cu_info->num_shader_engines; i++) {
>>> +			for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
>>> +				pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
>>> +				pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
>>> +				pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
>>> +				pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
>>> +				k += 4;
>>> +
>>> +				cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
>>> +				cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
>>> +			}
>>> +		}
>>> +		*props_ext = pcache;
>>> +		return 0;
>>> +	}
>>> +	return 1;
>>> +}
>>> +
>>> +#define KFD_MAX_CACHE_TYPES 6
>>> +
>>> +static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
>>> +						   struct kfd_gpu_cache_info *pcache_info)
>>> +{
>>> +	struct amdgpu_device *adev = kdev->adev;
>>> +	int i = 0;
>>> +
>>> +	/* TCP L1 Cache per CU */
>>> +	if (adev->gfx.config.gc_tcp_l1_size) {
>>> +		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
>>> +		pcache_info[i].cache_level = 1;
>>> +		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> +		pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
>>> +		i++;
>>> +	}
>>> +	/* Scalar L1 Instruction Cache per SQC */
>>> +	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
>>> +		pcache_info[i].cache_size =
>>> +			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
>>> +		pcache_info[i].cache_level = 1;
>>> +		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +					CRAT_CACHE_FLAGS_INST_CACHE |
>>> +					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> +		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
>>> +		i++;
>>> +	}
>>> +	/* Scalar L1 Data Cache per SQC */
>>> +	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
>>> +		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
>>> +		pcache_info[i].cache_level = 1;
>>> +		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> +		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
>>> +		i++;
>>> +	}
>>> +	/* GL1 Data Cache per SA */
>>> +	if (adev->gfx.config.gc_gl1c_per_sa &&
>>> +		adev->gfx.config.gc_gl1c_size_per_instance) {
>>> +		pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
>>> +			adev->gfx.config.gc_gl1c_size_per_instance;
>>> +		pcache_info[i].cache_level = 1;
>>> +		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> +		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
>>> +		i++;
>>> +	}
>>> +	/* L2 Data Cache per GPU (Total Tex Cache) */
>>> +	if (adev->gfx.config.gc_gl2c_per_gpu) {
>>> +		pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
>>> +		pcache_info[i].cache_level = 2;
>>> +		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> +		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
>>> +		i++;
>>> +	}
>>> +	/* L3 Data Cache per GPU */
>>> +	if (adev->gmc.mall_size) {
>>> +		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
>>> +		pcache_info[i].cache_level = 3;
>>> +		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
>>> +					CRAT_CACHE_FLAGS_DATA_CACHE |
>>> +					CRAT_CACHE_FLAGS_SIMD_CACHE);
>>> +		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
>>> +		i++;
>>> +	}
>>> +	return i;
>>> +}
>>> +/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
>>> + * tables
>>> + */
>>> +static int kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev)
>>> +{
>>> +	struct kfd_gpu_cache_info *pcache_info = NULL;
>>> +	struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
>>> +	int num_of_cache_types = 0;
>>> +	int i, j, k;
>>> +	int ct = 0;
>>> +	unsigned int cu_processor_id;
>>> +	int ret;
>>> +	unsigned int num_cu_shared;
>>> +	struct kfd_cu_info cu_info;
>>> +	struct kfd_cu_info *pcu_info;
>>> +	int gpu_processor_id;
>>> +	struct kfd_cache_properties_ext *props_ext;
>>> +	int num_of_entries = 0;
>>> +
>>> +	amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
>>> +	pcu_info = &cu_info;
>>> +
>>> +	gpu_processor_id = kdev->processor_id_low;
>>> +
>>> +	switch (kdev->adev->asic_type) {
>>> +	case CHIP_KAVERI:
>>> +		pcache_info = kaveri_cache_info;
>>> +		num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
>>> +		break;
>>> +	case CHIP_HAWAII:
>>> +		pcache_info = hawaii_cache_info;
>>> +		num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
>>> +		break;
>>> +	case CHIP_CARRIZO:
>>> +		pcache_info = carrizo_cache_info;
>>> +		num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
>>> +		break;
>>> +	case CHIP_TONGA:
>>> +		pcache_info = tonga_cache_info;
>>> +		num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
>>> +		break;
>>> +	case CHIP_FIJI:
>>> +		pcache_info = fiji_cache_info;
>>> +		num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
>>> +		break;
>>> +	case CHIP_POLARIS10:
>>> +		pcache_info = polaris10_cache_info;
>>> +		num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
>>> +		break;
>>> +	case CHIP_POLARIS11:
>>> +		pcache_info = polaris11_cache_info;
>>> +		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
>>> +		break;
>>> +	case CHIP_POLARIS12:
>>> +		pcache_info = polaris12_cache_info;
>>> +		num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
>>> +		break;
>>> +	case CHIP_VEGAM:
>>> +		pcache_info = vegam_cache_info;
>>> +		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
>>> +		break;
>>> +	default:
>>> +		switch (KFD_GC_VERSION(kdev)) {
>>> +		case IP_VERSION(9, 0, 1):
>>> +			pcache_info = vega10_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
>>> +			break;
>>> +		case IP_VERSION(9, 2, 1):
>>> +			pcache_info = vega12_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
>>> +			break;
>>> +		case IP_VERSION(9, 4, 0):
>>> +		case IP_VERSION(9, 4, 1):
>>> +			pcache_info = vega20_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
>>> +			break;
>>> +		case IP_VERSION(9, 4, 2):
>>> +			pcache_info = aldebaran_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
>>> +			break;
>>> +		case IP_VERSION(9, 1, 0):
>>> +		case IP_VERSION(9, 2, 2):
>>> +			pcache_info = raven_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(raven_cache_info);
>>> +			break;
>>> +		case IP_VERSION(9, 3, 0):
>>> +			pcache_info = renoir_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
>>> +			break;
>>> +		case IP_VERSION(10, 1, 10):
>>> +		case IP_VERSION(10, 1, 2):
>>> +		case IP_VERSION(10, 1, 3):
>>> +		case IP_VERSION(10, 1, 4):
>>> +			pcache_info = navi10_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
>>> +			break;
>>> +		case IP_VERSION(10, 1, 1):
>>> +			pcache_info = navi14_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
>>> +			break;
>>> +		case IP_VERSION(10, 3, 0):
>>> +			pcache_info = sienna_cichlid_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
>>> +			break;
>>> +		case IP_VERSION(10, 3, 2):
>>> +			pcache_info = navy_flounder_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
>>> +			break;
>>> +		case IP_VERSION(10, 3, 4):
>>> +			pcache_info = dimgrey_cavefish_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
>>> +			break;
>>> +		case IP_VERSION(10, 3, 1):
>>> +			pcache_info = vangogh_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
>>> +			break;
>>> +		case IP_VERSION(10, 3, 5):
>>> +			pcache_info = beige_goby_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
>>> +			break;
>>> +		case IP_VERSION(10, 3, 3):
>>> +		case IP_VERSION(10, 3, 6): /* TODO: Double check these on production silicon */
>>> +		case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */
>>> +			pcache_info = yellow_carp_cache_info;
>>> +			num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
>>> +			break;
>>> +		case IP_VERSION(11, 0, 0):
>>> +		case IP_VERSION(11, 0, 1):
>>> +		case IP_VERSION(11, 0, 2):
>>> +		case IP_VERSION(11, 0, 3):
>>> +			pcache_info = cache_info;
>>> +			num_of_cache_types =
>>> +				kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
>>> +			break;
>>> +		default:
>>> +			return -EINVAL;
>>> +		}
>>> +	}
>>> +
>>> +	/* For each type of cache listed in the kfd_gpu_cache_info table,
>>> +	 * go through all available Compute Units.
>>> +	 * The [i,j,k] loop will
>>> +	 *		if kfd_gpu_cache_info.num_cu_shared = 1
>>> +	 *			will parse through all available CU
>>> +	 *		If (kfd_gpu_cache_info.num_cu_shared != 1)
>>> +	 *			then it will consider only one CU from
>>> +	 *			the shared unit
>>> +	 */
>>> +	for (ct = 0; ct < num_of_cache_types; ct++) {
>>> +		cu_processor_id = gpu_processor_id;
>>> +		if (pcache_info[ct].cache_level == 1) {
>>> +			for (i = 0; i < pcu_info->num_shader_engines; i++) {
>>> +				for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
>>> +					for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
>>> +
>>> +						ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
>>> +										pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
>>> +										cu_processor_id, k);
>>> +
>>> +						if (ret < 0)
>>> +							break;
>>> +
>>> +						if (!ret) {
>>> +							num_of_entries++;
>>> +							list_add_tail(&props_ext->list, &dev->cache_props_ext);
>>> +						}
>>> +
>>> +						/* Move to next CU block */
>>> +						num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
>>> +							pcu_info->num_cu_per_sh) ?
>>> +							pcache_info[ct].num_cu_shared :
>>> +							(pcu_info->num_cu_per_sh - k);
>>> +						cu_processor_id += num_cu_shared;
>>> +					}
>>> +				}
>>> +			}
>>> +		} else {
>>> +			ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
>>> +								pcu_info, ct, cu_processor_id);
>>> +
>>> +			if (ret < 0)
>>> +				break;
>>> +
>>> +			if (!ret) {
>>> +				num_of_entries++;
>>> +				list_add_tail(&props_ext->list, &dev->cache_props_ext);
>>> +			}
>>> +		}
>>> +	}
>>> +	pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
>>> +	return 0;
>>> +}
>>> +
>>>    int kfd_topology_add_device(struct kfd_dev *gpu)
>>>    {
>>>    	uint32_t gpu_id;
>>> @@ -1759,6 +2986,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>>>    			topology_crat_proximity_domain--;
>>>    			return res;
>>>    		}
>>> +
>>>    		res = kfd_parse_crat_table(crat_image,
>>>    					   &temp_topology_device_list,
>>>    					   proximity_domain);
>>> @@ -1771,23 +2999,27 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>>>    
>>>    		kfd_topology_update_device_list(&temp_topology_device_list,
>>>    			&topology_device_list);
>>> +		up_write(&topology_lock);
>>> +
>>> +		dev = kfd_assign_gpu(gpu);
>>> +		if (WARN_ON(!dev)) {
>>> +			res = -ENODEV;
>>> +			goto err;
>>> +		}
>>> +
>>> +		down_write(&topology_lock);
>>> +		kfd_fill_cache_non_crat_info(dev, gpu);
>>>    
>>>    		/* Update the SYSFS tree, since we added another topology
>>>    		 * device
>>>    		 */
>>>    		res = kfd_topology_update_sysfs();
>>>    		up_write(&topology_lock);
>>> -
>>>    		if (!res)
>>>    			sys_props.generation_count++;
>>>    		else
>>>    			pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
>>>    						gpu_id, res);
>>> -		dev = kfd_assign_gpu(gpu);
>>> -		if (WARN_ON(!dev)) {
>>> -			res = -ENODEV;
>>> -			goto err;
>>> -		}
>>>    	}
>>>    
>>>    	dev->gpu_id = gpu_id;
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>>> index dc4e239c8f8f..fc35fe9fa914 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>>> @@ -103,6 +103,25 @@ struct kfd_cache_properties {
>>>    	struct attribute	attr;
>>>    };
>>>    
>>> +#define VCRAT_SIBLINGMAP_SIZE 64
>>> +
>>> +/* for GPUs with more CUs */
>>> +struct kfd_cache_properties_ext {
>>> +	struct list_head	list;
>>> +	uint32_t		processor_id_low;
>>> +	uint32_t		cache_level;
>>> +	uint32_t		cache_size;
>>> +	uint32_t		cacheline_size;
>>> +	uint32_t		cachelines_per_tag;
>>> +	uint32_t		cache_assoc;
>>> +	uint32_t		cache_latency;
>>> +	uint32_t		cache_type;
>>> +	uint8_t			sibling_map[VCRAT_SIBLINGMAP_SIZE];
>>> +	struct kfd_dev		*gpu;
>>> +	struct kobject		*kobj;
>>> +	struct attribute	attr;
>>> +};
>>> +
>>>    struct kfd_iolink_properties {
>>>    	struct list_head	list;
>>>    	uint32_t		iolink_type;
>>> @@ -139,6 +158,7 @@ struct kfd_topology_device {
>>>    	struct list_head		mem_props;
>>>    	uint32_t			cache_count;
>>>    	struct list_head		cache_props;
>>> +	struct list_head		cache_props_ext;
>>>    	struct list_head		io_link_props;
>>>    	struct list_head		p2p_link_props;
>>>    #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
>>> @@ -148,6 +168,7 @@ struct kfd_topology_device {
>>>    	struct kobject			*kobj_node;
>>>    	struct kobject			*kobj_mem;
>>>    	struct kobject			*kobj_cache;
>>> +	struct kobject			*kobj_cache_ext;
>>>    	struct kobject			*kobj_iolink;
>>>    	struct kobject			*kobj_p2plink;
>>>    #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED


More information about the amd-gfx mailing list