[PATCH] drm/amdkfd: Add cache line sizes to KFD topology

Felix Kuehling felix.kuehling at amd.com
Mon Jan 22 20:01:52 UTC 2024


On 2024-01-19 21:21, Joseph Greathouse wrote:
> The KFD topology includes cache line size, but we have not been
> filling that information out unless we are parsing a CRAT table.
> Fill in this information for the devices where we have cache
> information structs, and pipe this information to the topology
> sysfs files.
>
> Signed-off-by: Joseph Greathouse<Joseph.Greathouse at amd.com>

Looks good to me in general. I can't be sure about the correctness of 
the information. Some observations:

  * Cache line sizes seem to be 64 or 128
  * On GFXv9 parts cache line sizes are 64, except on Aldebaran, L2 data
    cache lines are 128
  * On various Navis, most cache lines are 128 except L1 scalar data and
    instruction caches as well as L3 cache
  * You fixed L1 scalar data and instruction cache sizes for Carrizo.
    Was that intentional?

If that sounds correct and how it's meant to be, you can add my

Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c     | 93 ++++++++++++++++++++++-
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.h     |  1 +
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  2 +
>   3 files changed, 94 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index cd8e459201f1..002b08fa632f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -55,6 +55,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -64,6 +65,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
>   		/* Scalar L1 Instruction Cache (in SQC module) per bank */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -73,6 +75,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
>   		/* Scalar L1 Data Cache (in SQC module) per bank */
>   		.cache_size = 8,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -88,6 +91,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -95,8 +99,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
>   	},
>   	{
>   		/* Scalar L1 Instruction Cache (in SQC module) per bank */
> -		.cache_size = 8,
> +		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -104,8 +109,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
>   	},
>   	{
>   		/* Scalar L1 Data Cache (in SQC module) per bank. */
> -		.cache_size = 4,
> +		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -135,6 +141,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -144,6 +151,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -153,6 +161,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -162,6 +171,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 4096,
>   		.cache_level = 2,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -174,6 +184,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -183,6 +194,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -192,6 +204,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -201,6 +214,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 1024,
>   		.cache_level = 2,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -213,6 +227,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -222,6 +237,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -231,6 +247,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -240,6 +257,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 1024,
>   		.cache_level = 2,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -252,6 +270,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -261,6 +280,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -270,6 +290,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -279,6 +300,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 2048,
>   		.cache_level = 2,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -291,6 +313,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -300,6 +323,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -309,6 +333,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -318,6 +343,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 8192,
>   		.cache_level = 2,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -330,6 +356,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -339,6 +366,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -348,6 +376,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -357,6 +386,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 8192,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -369,6 +399,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -378,6 +409,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -387,6 +419,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -396,6 +429,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -405,6 +439,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 4096,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -417,6 +452,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -426,6 +462,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -435,6 +472,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -444,6 +482,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -453,6 +492,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 1024,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -465,6 +505,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -474,6 +515,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -483,6 +525,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -492,6 +535,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -501,6 +545,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 2048,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -513,6 +558,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -522,6 +568,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -531,6 +578,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -540,6 +588,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -549,6 +598,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 4096,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -558,6 +608,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
>   		/* L3 Data Cache per GPU */
>   		.cache_size = 128*1024,
>   		.cache_level = 3,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -570,6 +621,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -579,6 +631,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -588,6 +641,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -597,6 +651,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -606,6 +661,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 3072,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -615,6 +671,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
>   		/* L3 Data Cache per GPU */
>   		.cache_size = 96*1024,
>   		.cache_level = 3,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -627,6 +684,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -636,6 +694,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -645,6 +704,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -654,6 +714,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -663,6 +724,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 2048,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -672,6 +734,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
>   		/* L3 Data Cache per GPU */
>   		.cache_size = 32*1024,
>   		.cache_level = 3,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -684,6 +747,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -693,6 +757,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -702,6 +767,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -711,6 +777,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -720,6 +787,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 1024,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -729,6 +797,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
>   		/* L3 Data Cache per GPU */
>   		.cache_size = 16*1024,
>   		.cache_level = 3,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -741,6 +810,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -750,6 +820,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -759,6 +830,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -768,6 +840,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -777,6 +850,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 2048,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -789,6 +863,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -798,6 +873,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -807,6 +883,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -816,6 +893,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -825,6 +903,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 256,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -837,6 +916,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   			  CRAT_CACHE_FLAGS_DATA_CACHE |
>   			  CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -846,6 +926,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   			  CRAT_CACHE_FLAGS_INST_CACHE |
>   			  CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -855,6 +936,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   			  CRAT_CACHE_FLAGS_DATA_CACHE |
>   			  CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -864,6 +946,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   			  CRAT_CACHE_FLAGS_DATA_CACHE |
>   			  CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -873,6 +956,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 256,
>   		.cache_level = 2,
> +		.cache_line_size = 128,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   			  CRAT_CACHE_FLAGS_DATA_CACHE |
>   			  CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -885,6 +969,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
>   		/* TCP L1 Cache per CU */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -894,6 +979,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
>   		/* Scalar L1 Instruction Cache per SQC */
>   		.cache_size = 32,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_INST_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -903,6 +989,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
>   		/* Scalar L1 Data Cache per SQC */
>   		.cache_size = 16,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -912,6 +999,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
>   		/* GL1 Data Cache per SA */
>   		.cache_size = 128,
>   		.cache_level = 1,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> @@ -921,6 +1009,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
>   		/* L2 Data Cache per GPU (Total Tex Cache) */
>   		.cache_size = 2048,
>   		.cache_level = 2,
> +		.cache_line_size = 64,
>   		.flags = (CRAT_CACHE_FLAGS_ENABLED |
>   				CRAT_CACHE_FLAGS_DATA_CACHE |
>   				CRAT_CACHE_FLAGS_SIMD_CACHE),
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> index 74c2d7a0d628..300634b9f668 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> @@ -303,6 +303,7 @@ struct kfd_node;
>   struct kfd_gpu_cache_info {
>   	uint32_t	cache_size;
>   	uint32_t	cache_level;
> +	uint32_t	cache_line_size;
>   	uint32_t	flags;
>   	/* Indicates how many Compute Units share this cache
>   	 * within a SA. Value = 1 indicates the cache is not shared
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 83024c6bdd50..3df2a8ad86fb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1564,6 +1564,7 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
>   		pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
>   		pcache->cache_level = pcache_info[cache_type].cache_level;
>   		pcache->cache_size = pcache_info[cache_type].cache_size;
> +		pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
>   
>   		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
>   			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
> @@ -1632,6 +1633,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
>   		pcache->processor_id_low = cu_processor_id
>   					+ (first_active_cu - 1);
>   		pcache->cache_level = pcache_info[cache_type].cache_level;
> +		pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
>   
>   		if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3))
>   			mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20240122/05211cae/attachment-0001.htm>


More information about the amd-gfx mailing list