[PATCH 20/24] drm/amdkfd: enable pc sampling work to trigger trap

Felix Kuehling felix.kuehling at amd.com
Wed Nov 22 22:31:45 UTC 2023


On 2023-11-03 09:11, James Zhu wrote:
> Enable a delay work to trigger pc sampling trap.
>
> Signed-off-by: James Zhu <James.Zhu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c      |  3 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c | 39 ++++++++++++++++++++
>   drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h |  1 +
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h        |  1 +
>   4 files changed, 44 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index bcaeedac8fe0..fb21902e433a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -35,6 +35,7 @@
>   #include "kfd_migrate.h"
>   #include "amdgpu.h"
>   #include "amdgpu_xcp.h"
> +#include "kfd_pc_sampling.h"
>   
>   #define MQD_SIZE_ALIGNED 768
>   
> @@ -537,6 +538,8 @@ static void kfd_pc_sampling_init(struct kfd_node *dev)
>   {
>   	mutex_init(&dev->pcs_data.mutex);
>   	idr_init_base(&dev->pcs_data.hosttrap_entry.base.pc_sampling_idr, 1);
> +	INIT_WORK(&dev->pcs_data.hosttrap_entry.base.pc_sampling_work,
> +		kfd_pc_sample_handler);
>   }
>   
>   static void kfd_pc_sampling_exit(struct kfd_node *dev)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
> index 2c4ac5b4cc4b..e8f0559b618e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
> @@ -38,6 +38,43 @@ struct supported_pc_sample_info supported_formats[] = {
>   	{ IP_VERSION(9, 4, 2), &sample_info_hosttrap_9_0_0 },
>   };
>   
> +void kfd_pc_sample_handler(struct work_struct *work)
> +{
> +	struct amdgpu_device *adev;
> +	struct kfd_node *node;
> +	uint32_t timeout = 0;
> +
> +	node = container_of(work, struct kfd_node,
> +					pcs_data.hosttrap_entry.base.pc_sampling_work);
> +
> +	mutex_lock(&node->pcs_data.mutex);
> +	if (node->pcs_data.hosttrap_entry.base.active_count &&
> +		node->pcs_data.hosttrap_entry.base.pc_sample_info.value &&
> +		node->kfd2kgd->trigger_pc_sample_trap) {
> +		switch (node->pcs_data.hosttrap_entry.base.pc_sample_info.type) {
> +		case KFD_IOCTL_PCS_TYPE_TIME_US:
> +			timeout = (uint32_t)node->pcs_data.hosttrap_entry.base.pc_sample_info.value;
> +			break;
> +		default:
> +			pr_debug("PC Sampling type %d not supported.",
> +					node->pcs_data.hosttrap_entry.base.pc_sample_info.type);
> +		}
> +	}
> +	mutex_unlock(&node->pcs_data.mutex);
> +	if (!timeout)
> +		return;
> +
> +	adev = node->adev;
> +	while (!READ_ONCE(node->pcs_data.hosttrap_entry.base.stop_enable)) {

This worker basically runs indefinitely (controlled by user mode).


> +		node->kfd2kgd->trigger_pc_sample_trap(adev, node->vm_info.last_vmid_kfd,
> +				&node->pcs_data.hosttrap_entry.base.target_simd,
> +				&node->pcs_data.hosttrap_entry.base.target_wave_slot,
> +				node->pcs_data.hosttrap_entry.base.pc_sample_info.method);
> +		pr_debug_ratelimited("triggered a host trap.");
> +		usleep_range(timeout, timeout + 10);

This will cause drift of the interval. Instead what you should do, is 
calculate the wait time at the end of every iteration based on the 
current time and the interval.


> +	}
> +}
> +
>   static int kfd_pc_sample_query_cap(struct kfd_process_device *pdd,
>   					struct kfd_ioctl_pc_sample_args __user *user_args)
>   {
> @@ -101,6 +138,7 @@ static int kfd_pc_sample_start(struct kfd_process_device *pdd,
>   		} else {
>   			kfd_process_set_trap_pc_sampling_flag(&pdd->qpd,
>   				pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info.method, true);
> +			schedule_work(&pdd->dev->pcs_data.hosttrap_entry.base.pc_sampling_work);

Scheduling a worker that runs indefinitely on the system workqueue is 
probably a bad idea. It could block other work items indefinitely. I 
think you are misusing the work queue API here. What you really want is 
probably, to crease a kernel thread.

Regards,
   Felix


>   			break;
>   		}
>   	}
> @@ -123,6 +161,7 @@ static int kfd_pc_sample_stop(struct kfd_process_device *pdd,
>   	mutex_unlock(&pdd->dev->pcs_data.mutex);
>   
>   	if (pc_sampling_stop) {
> +		cancel_work_sync(&pdd->dev->pcs_data.hosttrap_entry.base.pc_sampling_work);
>   		kfd_process_set_trap_pc_sampling_flag(&pdd->qpd,
>   			pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info.method, false);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
> index 4eeded4ea5b6..cb93909e6bd3 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
> @@ -30,5 +30,6 @@
>   
>   int kfd_pc_sample(struct kfd_process_device *pdd,
>   					struct kfd_ioctl_pc_sample_args __user *args);
> +void kfd_pc_sample_handler(struct work_struct *work);
>   
>   #endif /* KFD_PC_SAMPLING_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index badaa4d68cc4..b7062033fda4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -263,6 +263,7 @@ struct kfd_dev_pc_sampling_data {
>   	uint32_t target_wave_slot;  /* target wave slot for trap */
>   	bool stop_enable;           /* pc sampling stop in process */
>   	struct idr pc_sampling_idr;
> +	struct work_struct pc_sampling_work;
>   	struct kfd_pc_sample_info pc_sample_info;
>   };
>   


More information about the amd-gfx mailing list