[PATCH] drm/amdkfd: fix cu mask for asics with wgps

Felix Kuehling felix.kuehling at amd.com
Wed Jun 29 22:44:32 UTC 2022


On 2022-06-29 14:16, Jonathan Kim wrote:
> GFX10 and up have work group processors (WGP) and WGP mode is the native
> compile mode.
>
> KFD and ROCr have no visibility into whether a dispatch is operating
> in CU or WGP mode.
>
> Enforce CU masking to be pairwise continguous in enablement and
> round robin distribute CUs across the SEs in a pairwise manner to
> assume WGP mode at all times.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>

Looks good to me. Three nit-picks inline. With that fixed, the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c      | 12 +++++++-----
>   .../drm/amd/amdkfd/kfd_process_queue_manager.c    | 15 +++++++++++++++
>   2 files changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> index 49a283be6b57..7febd1e69d13 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> @@ -100,7 +100,9 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
>   {
>   	struct kfd_cu_info cu_info;
>   	uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
> -	int i, se, sh, cu, cu_bitmap_sh_mul;
> +	bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
> +	uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
> +	int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
>   
>   	amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
>   
> @@ -167,13 +169,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
>   		se_mask[i] = 0;
>   
>   	i = 0;
> -	for (cu = 0; cu < 16; cu++) {
> +	for (cu = 0; cu < 16; cu = cu + inc) {

cu += inc


>   		for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
>   			for (se = 0; se < cu_info.num_shader_engines; se++) {
>   				if (cu_per_sh[se][sh] > cu) {
> -					if (cu_mask[i / 32] & (1 << (i % 32)))
> -						se_mask[se] |= 1 << (cu + sh * 16);
> -					i++;
> +					if (cu_mask[i / 32] & (en_mask << (i % 32)))
> +						se_mask[se] |= en_mask << (cu + sh * 16);
> +					i = i + inc;

i += inc;


>   					if (i == cu_mask_count)
>   						return;
>   				}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index c9c205df4a14..fc0416f6f83e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -498,6 +498,21 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
>   		return -EFAULT;
>   	}
>   
> +	/* ASICs that have WGPs must enforce pairwise enabled mask checks. */
> +	if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr &&
> +			KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
> +		int i;
> +
> +		for (i = 0; i < minfo->cu_mask.count; i = i + 2) {

i += 2;

Regards,
   Felix


> +			uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
> +
> +			if (cu_pair && cu_pair != 0x3) {
> +				pr_debug("CUs must be adjacent pairwise enabled.\n");
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +
>   	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
>   							pqn->q, minfo);
>   	if (retval != 0)


More information about the amd-gfx mailing list