[PATCHv2 2/4] drm/amdkfd: Update cache info reporting for GFX v9.4.3
Joshi, Mukul
Mukul.Joshi at amd.com
Thu Sep 7 22:54:04 UTC 2023
[AMD Official Use Only - General]
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling at amd.com>
> Sent: Thursday, September 7, 2023 4:51 PM
> To: Joshi, Mukul <Mukul.Joshi at amd.com>; amd-gfx at lists.freedesktop.org
> Subject: Re: [PATCHv2 2/4] drm/amdkfd: Update cache info reporting for GFX
> v9.4.3
>
>
> On 2023-09-06 11:44, Mukul Joshi wrote:
> > Update cache info reporting in sysfs to report the correct number of
> > CUs and associated cache information based on different spatial
> > partitioning modes.
> >
> > Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
> > ---
> > v1->v2:
> > - Revert the change in kfd_crat.c
> > - Add a comment to not change value of CRAT_SIBLINGMAP_SIZE.
> >
> > drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 4 ++
> > drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 82 +++++++++++++-------
> ---
> > drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 2 +-
> > 3 files changed, 51 insertions(+), 37 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> > b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> > index 387a8ef49385..74c2d7a0d628 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> > @@ -79,6 +79,10 @@ struct crat_header {
> > #define CRAT_SUBTYPE_IOLINK_AFFINITY 5
> > #define CRAT_SUBTYPE_MAX 6
> >
> > +/*
> > + * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
> > + * as it breaks the ABI.
> > + */
> > #define CRAT_SIBLINGMAP_SIZE 32
> >
> > /*
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > index c54795682dfb..b98cc7930e4c 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > @@ -1596,14 +1596,17 @@ static int fill_in_l1_pcache(struct
> kfd_cache_properties **props_ext,
> > static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
> > struct kfd_gpu_cache_info *pcache_info,
> > struct kfd_cu_info *cu_info,
> > - int cache_type, unsigned int cu_processor_id)
> > + int cache_type, unsigned int cu_processor_id,
> > + struct kfd_node *knode)
> > {
> > unsigned int cu_sibling_map_mask;
> > int first_active_cu;
> > - int i, j, k;
> > + int i, j, k, xcc, start, end;
> > struct kfd_cache_properties *pcache = NULL;
> >
> > - cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0];
> > + start = ffs(knode->xcc_mask) - 1;
> > + end = start + NUM_XCC(knode->xcc_mask);
> > + cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
> > cu_sibling_map_mask &=
> > ((1 << pcache_info[cache_type].num_cu_shared) - 1);
> > first_active_cu = ffs(cu_sibling_map_mask); @@ -1638,16 +1641,18
> @@
> > static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
> > cu_sibling_map_mask = cu_sibling_map_mask >>
> (first_active_cu - 1);
> > k = 0;
> >
> > - for (i = 0; i < cu_info->num_shader_engines; i++) {
> > - for (j = 0; j < cu_info-
> >num_shader_arrays_per_engine; j++) {
> > - pcache->sibling_map[k] =
> (uint8_t)(cu_sibling_map_mask & 0xFF);
> > - pcache->sibling_map[k+1] =
> (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
> > - pcache->sibling_map[k+2] =
> (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
> > - pcache->sibling_map[k+3] =
> (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
> > - k += 4;
> > -
> > - cu_sibling_map_mask = cu_info-
> >cu_bitmap[0][i % 4][j + i / 4];
> > - cu_sibling_map_mask &= ((1 <<
> pcache_info[cache_type].num_cu_shared) - 1);
> > + for (xcc = start; xcc < end; xcc++) {
> > + for (i = 0; i < cu_info->num_shader_engines; i++) {
> > + for (j = 0; j < cu_info-
> >num_shader_arrays_per_engine; j++) {
> > + pcache->sibling_map[k] =
> (uint8_t)(cu_sibling_map_mask & 0xFF);
> > + pcache->sibling_map[k+1] =
> (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
> > + pcache->sibling_map[k+2] =
> (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
> > + pcache->sibling_map[k+3] =
> (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
> > + k += 4;
> > +
> > + cu_sibling_map_mask = cu_info-
> >cu_bitmap[start][i % 4][j + i /
> > +4];
>
> Shouldn't this use "xcc" as index instead of "start"?
>
Yes that’s correct. Thanks for the catch. I will fix this before committing.
> Other than that, this patch is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>
> P.S.: This whole function looks suspicious to me the way it exposes
> active/inactive CUs to user mode. I think user mode only wants to count active
> CUs in the sibling mask because it has no other information about inactive
> CUs. And reading the cu_info into cu_sibling_map_mask and then only using it
> in the next loop iteration is probably also wrong. It basically throws away
> cu_info from the last xcc,SE,SA. Or maybe I just don't understand what this
> function is trying to do.
>
Yes the function is a bit convoluted to understand. It starts with the following code before
entering the loop.
cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
...
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
...
It again re-uses cu_info->cu_bitmap[start][0][0]; in the second iteration. I am not sure
if it is doing the right thing either. I will try to take a look at it and understand what
exactly we want to achieve with this function.
Regards,
Mukul
>
> > + cu_sibling_map_mask &= ((1 <<
> pcache_info[cache_type].num_cu_shared) - 1);
> > + }
> > }
> > }
> > pcache->sibling_map_size = k;
> > @@ -1665,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct
> kfd_cache_properties **props_ext,
> > static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev,
> struct kfd_node *kdev)
> > {
> > struct kfd_gpu_cache_info *pcache_info = NULL;
> > - int i, j, k;
> > + int i, j, k, xcc, start, end;
> > int ct = 0;
> > unsigned int cu_processor_id;
> > int ret;
> > @@ -1699,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct
> kfd_topology_device *dev, struct
> > * then it will consider only one CU from
> > * the shared unit
> > */
> > + start = ffs(kdev->xcc_mask) - 1;
> > + end = start + NUM_XCC(kdev->xcc_mask);
> > +
> > for (ct = 0; ct < num_of_cache_types; ct++) {
> > cu_processor_id = gpu_processor_id;
> > if (pcache_info[ct].cache_level == 1) {
> > - for (i = 0; i < pcu_info->num_shader_engines; i++) {
> > - for (j = 0; j < pcu_info-
> >num_shader_arrays_per_engine; j++) {
> > - for (k = 0; k < pcu_info-
> >num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
> > -
> > - ret =
> fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
> > -
> pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct,
> > -
> cu_processor_id, k);
> > -
> > - if (ret < 0)
> > - break;
> > -
> > - if (!ret) {
> > - num_of_entries++;
> > -
> list_add_tail(&props_ext->list, &dev->cache_props);
> > + for (xcc = start; xcc < end; xcc++) {
> > + for (i = 0; i < pcu_info->num_shader_engines;
> i++) {
> > + for (j = 0; j < pcu_info-
> >num_shader_arrays_per_engine; j++) {
> > + for (k = 0; k < pcu_info-
> >num_cu_per_sh; k +=
> > +pcache_info[ct].num_cu_shared) {
> > +
> > + ret =
> fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
> > +
> pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
> > +
> cu_processor_id, k);
> > +
> > + if (ret < 0)
> > + break;
> > +
> > + if (!ret) {
> > +
> num_of_entries++;
> > +
> list_add_tail(&props_ext->list, &dev->cache_props);
> > + }
> > +
> > + /* Move to next CU
> block */
> > + num_cu_shared = ((k
> + pcache_info[ct].num_cu_shared) <=
> > + pcu_info-
> >num_cu_per_sh) ?
> > +
> pcache_info[ct].num_cu_shared :
> > + (pcu_info-
> >num_cu_per_sh - k);
> > + cu_processor_id +=
> num_cu_shared;
> > }
> > -
> > - /* Move to next CU block */
> > - num_cu_shared = ((k +
> pcache_info[ct].num_cu_shared) <=
> > - pcu_info-
> >num_cu_per_sh) ?
> > -
> pcache_info[ct].num_cu_shared :
> > - (pcu_info-
> >num_cu_per_sh - k);
> > - cu_processor_id +=
> num_cu_shared;
> > }
> > }
> > }
> > } else {
> > ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
> > - pcu_info, ct,
> cu_processor_id);
> > + pcu_info, ct, cu_processor_id, kdev);
> >
> > if (ret < 0)
> > break;
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> > b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> > index dea32a9e5506..27386ce9a021 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> > @@ -89,7 +89,7 @@ struct kfd_mem_properties {
> > struct attribute attr;
> > };
> >
> > -#define CACHE_SIBLINGMAP_SIZE 64
> > +#define CACHE_SIBLINGMAP_SIZE 128
> >
> > struct kfd_cache_properties {
> > struct list_head list;
More information about the amd-gfx
mailing list