[PATCH v4 2/3] drm/amdkfd: Set per-process flags only once for gfx9/10/11/12

Amber Lin Amber.Lin at amd.com
Thu Mar 6 20:45:14 UTC 2025


Reviewed-by: Amber Lin <Amber.Lin at amd.com>

Regards,
Amber

On 3/6/25 14:52, Harish Kasiviswanathan wrote:
> Define set_cache_memory_policy() for these asics and move all static
> changes from update_qpd() which is called each time a queue is created
> to set_cache_memory_policy() which is called once during process
> initialization
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
> ---
>   .../amd/amdkfd/kfd_device_queue_manager_v10.c | 41 +++++++++++--------
>   .../amd/amdkfd/kfd_device_queue_manager_v11.c | 41 +++++++++++--------
>   .../amd/amdkfd/kfd_device_queue_manager_v12.c | 41 +++++++++++--------
>   .../amd/amdkfd/kfd_device_queue_manager_v9.c  | 36 +++++++++++++++-
>   4 files changed, 107 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
> index 245a90dfc2f6..b5f5f141353b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
> @@ -31,10 +31,17 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
>   			 struct qcm_process_device *qpd);
>   static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
>   			    struct qcm_process_device *qpd);
> +static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
> +				   struct qcm_process_device *qpd,
> +				   enum cache_policy default_policy,
> +				   enum cache_policy alternate_policy,
> +				   void __user *alternate_aperture_base,
> +				   uint64_t alternate_aperture_size);
>   
>   void device_queue_manager_init_v10(
>   	struct device_queue_manager_asic_ops *asic_ops)
>   {
> +	asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
>   	asic_ops->update_qpd = update_qpd_v10;
>   	asic_ops->init_sdma_vm = init_sdma_vm_v10;
>   	asic_ops->mqd_manager_init = mqd_manager_init_v10;
> @@ -49,27 +56,27 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
>   		private_base;
>   }
>   
> -static int update_qpd_v10(struct device_queue_manager *dqm,
> -			 struct qcm_process_device *qpd)
> +static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
> +				   struct qcm_process_device *qpd,
> +				   enum cache_policy default_policy,
> +				   enum cache_policy alternate_policy,
> +				   void __user *alternate_aperture_base,
> +				   uint64_t alternate_aperture_size)
>   {
> -	struct kfd_process_device *pdd;
> -
> -	pdd = qpd_to_pdd(qpd);
> -
> -	/* check if sh_mem_config register already configured */
> -	if (qpd->sh_mem_config == 0) {
> -		qpd->sh_mem_config =
> -			(SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
> -				SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
> -			(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
> -		qpd->sh_mem_ape1_limit = 0;
> -		qpd->sh_mem_ape1_base = 0;
> -	}
> -
> -	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
> +	qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
> +			      SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
> +			      (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
> +	qpd->sh_mem_ape1_limit = 0;
> +	qpd->sh_mem_ape1_base = 0;
> +	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
>   
>   	pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
> +	return true;
> +}
>   
> +static int update_qpd_v10(struct device_queue_manager *dqm,
> +			 struct qcm_process_device *qpd)
> +{
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
> index 2e129da7acb4..f436878d0d62 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
> @@ -30,10 +30,17 @@ static int update_qpd_v11(struct device_queue_manager *dqm,
>   			 struct qcm_process_device *qpd);
>   static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
>   			    struct qcm_process_device *qpd);
> +static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
> +				   struct qcm_process_device *qpd,
> +				   enum cache_policy default_policy,
> +				   enum cache_policy alternate_policy,
> +				   void __user *alternate_aperture_base,
> +				   uint64_t alternate_aperture_size);
>   
>   void device_queue_manager_init_v11(
>   	struct device_queue_manager_asic_ops *asic_ops)
>   {
> +	asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11;
>   	asic_ops->update_qpd = update_qpd_v11;
>   	asic_ops->init_sdma_vm = init_sdma_vm_v11;
>   	asic_ops->mqd_manager_init = mqd_manager_init_v11;
> @@ -48,28 +55,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
>   		private_base;
>   }
>   
> -static int update_qpd_v11(struct device_queue_manager *dqm,
> -			 struct qcm_process_device *qpd)
> +static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
> +				   struct qcm_process_device *qpd,
> +				   enum cache_policy default_policy,
> +				   enum cache_policy alternate_policy,
> +				   void __user *alternate_aperture_base,
> +				   uint64_t alternate_aperture_size)
>   {
> -	struct kfd_process_device *pdd;
> -
> -	pdd = qpd_to_pdd(qpd);
> -
> -	/* check if sh_mem_config register already configured */
> -	if (qpd->sh_mem_config == 0) {
> -		qpd->sh_mem_config =
> -			(SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
> -				SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
> -			(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
> -
> -		qpd->sh_mem_ape1_limit = 0;
> -		qpd->sh_mem_ape1_base = 0;
> -	}
> +	qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
> +			      SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
> +			      (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
>   
> -	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
> +	qpd->sh_mem_ape1_limit = 0;
> +	qpd->sh_mem_ape1_base = 0;
> +	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
>   
>   	pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
> +	return true;
> +}
>   
> +static int update_qpd_v11(struct device_queue_manager *dqm,
> +			 struct qcm_process_device *qpd)
> +{
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
> index 4f3295b29dfb..62ca1c8fcbaf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
> @@ -30,10 +30,17 @@ static int update_qpd_v12(struct device_queue_manager *dqm,
>   			 struct qcm_process_device *qpd);
>   static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q,
>   			    struct qcm_process_device *qpd);
> +static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
> +				   struct qcm_process_device *qpd,
> +				   enum cache_policy default_policy,
> +				   enum cache_policy alternate_policy,
> +				   void __user *alternate_aperture_base,
> +				   uint64_t alternate_aperture_size);
>   
>   void device_queue_manager_init_v12(
>   	struct device_queue_manager_asic_ops *asic_ops)
>   {
> +	asic_ops->set_cache_memory_policy = set_cache_memory_policy_v12;
>   	asic_ops->update_qpd = update_qpd_v12;
>   	asic_ops->init_sdma_vm = init_sdma_vm_v12;
>   	asic_ops->mqd_manager_init = mqd_manager_init_v12;
> @@ -48,28 +55,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
>   		private_base;
>   }
>   
> -static int update_qpd_v12(struct device_queue_manager *dqm,
> -			 struct qcm_process_device *qpd)
> +static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
> +				   struct qcm_process_device *qpd,
> +				   enum cache_policy default_policy,
> +				   enum cache_policy alternate_policy,
> +				   void __user *alternate_aperture_base,
> +				   uint64_t alternate_aperture_size)
>   {
> -	struct kfd_process_device *pdd;
> -
> -	pdd = qpd_to_pdd(qpd);
> -
> -	/* check if sh_mem_config register already configured */
> -	if (qpd->sh_mem_config == 0) {
> -		qpd->sh_mem_config =
> -			(SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
> -				SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
> -			(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
> -
> -		qpd->sh_mem_ape1_limit = 0;
> -		qpd->sh_mem_ape1_base = 0;
> -	}
> +	qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
> +			      SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
> +			      (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
>   
> -	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
> +	qpd->sh_mem_ape1_limit = 0;
> +	qpd->sh_mem_ape1_base = 0;
> +	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
>   
>   	pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
> +	return true;
> +}
>   
> +static int update_qpd_v12(struct device_queue_manager *dqm,
> +			 struct qcm_process_device *qpd)
> +{
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
> index 67137e674f1d..c734eb9b505f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
> @@ -30,10 +30,17 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
>   			 struct qcm_process_device *qpd);
>   static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
>   			    struct qcm_process_device *qpd);
> +static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
> +				   struct qcm_process_device *qpd,
> +				   enum cache_policy default_policy,
> +				   enum cache_policy alternate_policy,
> +				   void __user *alternate_aperture_base,
> +				   uint64_t alternate_aperture_size);
>   
>   void device_queue_manager_init_v9(
>   	struct device_queue_manager_asic_ops *asic_ops)
>   {
> +	asic_ops->set_cache_memory_policy = set_cache_memory_policy_v9;
>   	asic_ops->update_qpd = update_qpd_v9;
>   	asic_ops->init_sdma_vm = init_sdma_vm_v9;
>   	asic_ops->mqd_manager_init = mqd_manager_init_v9;
> @@ -48,10 +55,37 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
>   		private_base;
>   }
>   
> +static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
> +				   struct qcm_process_device *qpd,
> +				   enum cache_policy default_policy,
> +				   enum cache_policy alternate_policy,
> +				   void __user *alternate_aperture_base,
> +				   uint64_t alternate_aperture_size)
> +{
> +	qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
> +				SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
> +
> +	if (dqm->dev->kfd->noretry)
> +		qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
> +
> +	if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
> +		KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) ||
> +		KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0))
> +		qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
> +
> +	qpd->sh_mem_ape1_limit = 0;
> +	qpd->sh_mem_ape1_base = 0;
> +	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
> +
> +	pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases,
> +		 qpd->sh_mem_config);
> +	return true;
> +}
> +
>   static int update_qpd_v9(struct device_queue_manager *dqm,
>   			 struct qcm_process_device *qpd)
>   {
> -	struct kfd_process_device *pdd;
> +	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
>   
>   	pdd = qpd_to_pdd(qpd);
>   



More information about the amd-gfx mailing list