[PATCH v2] drm/amdkfd: Replace pr_err with dev_err

Felix Kuehling felix.kuehling at amd.com
Mon Aug 28 17:33:51 UTC 2023


On 2023-08-26 09:41, Asad Kamal wrote:
> Replace pr_err with dev_err to show the bus-id of
> failing device with kfd queue errors
>
> Signed-off-by: Asad Kamal <asad.kamal at amd.com>
> Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 116 +++++++++++-------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   2 +-
>   2 files changed, 71 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index b166f30f083e..cd6cfffd6436 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -232,8 +232,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
>   
>   	queue_type = convert_to_mes_queue_type(q->properties.type);
>   	if (queue_type < 0) {
> -		pr_err("Queue type not supported with MES, queue:%d\n",
> -				q->properties.type);
> +		dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n",
> +			q->properties.type);
>   		return -EINVAL;
>   	}
>   	queue_input.queue_type = (uint32_t)queue_type;
> @@ -244,9 +244,9 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
>   	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
>   	amdgpu_mes_unlock(&adev->mes);
>   	if (r) {
> -		pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
> +		dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n",
>   			q->properties.doorbell_off);
> -		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
> +		dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
>   		kfd_hws_hang(dqm);
>   	}
>   
> @@ -272,9 +272,9 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
>   	amdgpu_mes_unlock(&adev->mes);
>   
>   	if (r) {
> -		pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
> +		dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n",
>   			q->properties.doorbell_off);
> -		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
> +		dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
>   		kfd_hws_hang(dqm);
>   	}
>   
> @@ -284,6 +284,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
>   static int remove_all_queues_mes(struct device_queue_manager *dqm)
>   {
>   	struct device_process_node *cur;
> +	struct device *dev = dqm->dev->adev->dev;
>   	struct qcm_process_device *qpd;
>   	struct queue *q;
>   	int retval = 0;
> @@ -294,7 +295,7 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
>   			if (q->properties.is_active) {
>   				retval = remove_queue_mes(dqm, q, qpd);
>   				if (retval) {
> -					pr_err("%s: Failed to remove queue %d for dev %d",
> +					dev_err(dev, "%s: Failed to remove queue %d for dev %d",
>   						__func__,
>   						q->properties.queue_id,
>   						dqm->dev->id);
> @@ -443,6 +444,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
>   			struct qcm_process_device *qpd,
>   			struct queue *q)
>   {
> +	struct device *dev = dqm->dev->adev->dev;
>   	int allocated_vmid = -1, i;
>   
>   	for (i = dqm->dev->vm_info.first_vmid_kfd;
> @@ -454,7 +456,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
>   	}
>   
>   	if (allocated_vmid < 0) {
> -		pr_err("no more vmid to allocate\n");
> +		dev_err(dev, "no more vmid to allocate\n");
>   		return -ENOSPC;
>   	}
>   
> @@ -510,10 +512,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>   				struct qcm_process_device *qpd,
>   				struct queue *q)
>   {
> +	struct device *dev = dqm->dev->adev->dev;
> +
>   	/* On GFX v7, CP doesn't flush TC at dequeue */
>   	if (q->device->adev->asic_type == CHIP_HAWAII)
>   		if (flush_texture_cache_nocpsch(q->device, qpd))
> -			pr_err("Failed to flush TC\n");
> +			dev_err(dev, "Failed to flush TC\n");
>   
>   	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>   
> @@ -708,7 +712,7 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
>   	pr_debug("Killing all process wavefronts\n");
>   
>   	if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
> -		pr_err("no vmid pasid mapping supported \n");
> +		dev_err(dev->adev->dev, "no vmid pasid mapping supported\n");
>   		return -EOPNOTSUPP;
>   	}
>   
> @@ -729,7 +733,7 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
>   	}
>   
>   	if (vmid > last_vmid_to_scan) {
> -		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
> +		dev_err(dev->adev->dev, "Didn't find vmid for pasid 0x%x\n", p->pasid);
>   		return -EFAULT;
>   	}
>   
> @@ -821,6 +825,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
>   {
>   	int retval;
>   	uint64_t sdma_val = 0;
> +	struct device *dev = dqm->dev->adev->dev;
>   	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
>   	struct mqd_manager *mqd_mgr =
>   		dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
> @@ -831,7 +836,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
>   		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
>   							&sdma_val);
>   		if (retval)
> -			pr_err("Failed to read SDMA queue counter for queue: %d\n",
> +			dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n",
>   				q->properties.queue_id);
>   	}
>   
> @@ -850,6 +855,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
>   			struct mqd_update_info *minfo)
>   {
>   	int retval = 0;
> +	struct device *dev = dqm->dev->adev->dev;
>   	struct mqd_manager *mqd_mgr;
>   	struct kfd_process_device *pdd;
>   	bool prev_active = false;
> @@ -875,7 +881,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
>   			retval = remove_queue_mes(dqm, q, &pdd->qpd);
>   
>   		if (retval) {
> -			pr_err("unmap queue failed\n");
> +			dev_err(dev, "unmap queue failed\n");
>   			goto out_unlock;
>   		}
>   	} else if (prev_active &&
> @@ -894,7 +900,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
>   				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
>   				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
>   		if (retval) {
> -			pr_err("destroy mqd failed\n");
> +			dev_err(dev, "destroy mqd failed\n");
>   			goto out_unlock;
>   		}
>   	}
> @@ -1088,6 +1094,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
>   				      struct qcm_process_device *qpd)
>   {
>   	struct queue *q;
> +	struct device *dev = dqm->dev->adev->dev;
>   	struct kfd_process_device *pdd;
>   	int retval = 0;
>   
> @@ -1121,7 +1128,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
>   		if (dqm->dev->kfd->shared_resources.enable_mes) {
>   			retval = remove_queue_mes(dqm, q, qpd);
>   			if (retval) {
> -				pr_err("Failed to evict queue %d\n",
> +				dev_err(dev, "Failed to evict queue %d\n",
>   					q->properties.queue_id);
>   				goto out;
>   			}
> @@ -1225,6 +1232,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
>   					struct qcm_process_device *qpd)
>   {
>   	struct queue *q;
> +	struct device *dev = dqm->dev->adev->dev;
>   	struct kfd_process_device *pdd;
>   	uint64_t eviction_duration;
>   	int retval = 0;
> @@ -1265,7 +1273,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
>   		if (dqm->dev->kfd->shared_resources.enable_mes) {
>   			retval = add_queue_mes(dqm, q, qpd);
>   			if (retval) {
> -				pr_err("Failed to restore queue %d\n",
> +				dev_err(dev, "Failed to restore queue %d\n",
>   					q->properties.queue_id);
>   				goto out;
>   			}
> @@ -1474,18 +1482,19 @@ static void pre_reset(struct device_queue_manager *dqm)
>   static int allocate_sdma_queue(struct device_queue_manager *dqm,
>   				struct queue *q, const uint32_t *restore_sdma_id)
>   {
> +	struct device *dev = dqm->dev->adev->dev;
>   	int bit;
>   
>   	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
>   		if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
> -			pr_err("No more SDMA queue to allocate\n");
> +			dev_err(dev, "No more SDMA queue to allocate\n");
>   			return -ENOMEM;
>   		}
>   
>   		if (restore_sdma_id) {
>   			/* Re-use existing sdma_id */
>   			if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) {
> -				pr_err("SDMA queue already in use\n");
> +				dev_err(dev, "SDMA queue already in use\n");
>   				return -EBUSY;
>   			}
>   			clear_bit(*restore_sdma_id, dqm->sdma_bitmap);
> @@ -1504,13 +1513,13 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
>   				kfd_get_num_sdma_engines(dqm->dev);
>   	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
>   		if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
> -			pr_err("No more XGMI SDMA queue to allocate\n");
> +			dev_err(dev, "No more XGMI SDMA queue to allocate\n");
>   			return -ENOMEM;
>   		}
>   		if (restore_sdma_id) {
>   			/* Re-use existing sdma_id */
>   			if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) {
> -				pr_err("SDMA queue already in use\n");
> +				dev_err(dev, "SDMA queue already in use\n");
>   				return -EBUSY;
>   			}
>   			clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap);
> @@ -1562,6 +1571,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
>   {
>   	int i, mec;
>   	struct scheduling_resources res;
> +	struct device *dev = dqm->dev->adev->dev;
>   
>   	res.vmid_mask = dqm->dev->compute_vmid_bitmap;
>   
> @@ -1582,7 +1592,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
>   		 * definition of res.queue_mask needs updating
>   		 */
>   		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
> -			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
> +			dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i);
>   			break;
>   		}
>   
> @@ -1625,6 +1635,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
>   
>   static int start_cpsch(struct device_queue_manager *dqm)
>   {
> +	struct device *dev = dqm->dev->adev->dev;
>   	int retval;
>   
>   	retval = 0;
> @@ -1671,7 +1682,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
>   		retval = pm_update_grace_period(&dqm->packet_mgr,
>   						grace_period);
>   		if (retval)
> -			pr_err("Setting grace timeout failed\n");
> +			dev_err(dev, "Setting grace timeout failed\n");
>   		else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
>   			/* Update dqm->wait_times maintained in software */
>   			dqm->dev->kfd2kgd->build_grace_period_packet_info(
> @@ -1881,15 +1892,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>   	return retval;
>   }
>   
> -int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
> -				uint64_t fence_value,
> -				unsigned int timeout_ms)
> +int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
> +			      uint64_t fence_value,
> +			      unsigned int timeout_ms)
>   {
>   	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
> +	struct device *dev = dqm->dev->adev->dev;
> +	uint64_t *fence_addr =  dqm->fence_addr;
>   
>   	while (*fence_addr != fence_value) {
>   		if (time_after(jiffies, end_jiffies)) {
> -			pr_err("qcm fence wait loop timeout expired\n");
> +			dev_err(dev, "qcm fence wait loop timeout expired\n");
>   			/* In HWS case, this is used to halt the driver thread
>   			 * in order not to mess up CP states before doing
>   			 * scandumps for FW debugging.
> @@ -1908,6 +1921,7 @@ int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
>   /* dqm->lock mutex has to be locked before calling this function */
>   static int map_queues_cpsch(struct device_queue_manager *dqm)
>   {
> +	struct device *dev = dqm->dev->adev->dev;
>   	int retval;
>   
>   	if (!dqm->sched_running)
> @@ -1920,7 +1934,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
>   	retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
>   	pr_debug("%s sent runlist\n", __func__);
>   	if (retval) {
> -		pr_err("failed to execute runlist\n");
> +		dev_err(dev, "failed to execute runlist\n");
>   		return retval;
>   	}
>   	dqm->active_runlist = true;
> @@ -1935,8 +1949,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
>   				uint32_t grace_period,
>   				bool reset)
>   {
> -	int retval = 0;
> +	struct device *dev = dqm->dev->adev->dev;
>   	struct mqd_manager *mqd_mgr;
> +	int retval = 0;
>   
>   	if (!dqm->sched_running)
>   		return 0;
> @@ -1959,10 +1974,10 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
>   	pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
>   				KFD_FENCE_COMPLETED);
>   	/* should be timed out */
> -	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
> -				queue_preemption_timeout_ms);
> +	retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED,
> +					   queue_preemption_timeout_ms);
>   	if (retval) {
> -		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
> +		dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
>   		kfd_hws_hang(dqm);
>   		return retval;
>   	}
> @@ -1977,7 +1992,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
>   	 */
>   	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
>   	if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
> -		pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
> +		dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
>   		while (halt_if_hws_hang)
>   			schedule();
>   		return -ETIME;
> @@ -1987,7 +2002,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
>   	if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
>   		if (pm_update_grace_period(&dqm->packet_mgr,
>   					USE_DEFAULT_GRACE_PERIOD))
> -			pr_err("Failed to reset grace period\n");
> +			dev_err(dev, "Failed to reset grace period\n");
>   	}
>   
>   	pm_release_ib(&dqm->packet_mgr);
> @@ -2061,6 +2076,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>   	struct mqd_manager *mqd_mgr;
>   	uint64_t sdma_val = 0;
>   	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
> +	struct device *dev = dqm->dev->adev->dev;
>   
>   	/* Get the SDMA queue stats */
>   	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
> @@ -2068,7 +2084,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>   		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
>   							&sdma_val);
>   		if (retval)
> -			pr_err("Failed to read SDMA queue counter for queue: %d\n",
> +			dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n",
>   				q->properties.queue_id);
>   	}
>   
> @@ -2349,6 +2365,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>   {
>   	int retval;
>   	struct queue *q;
> +	struct device *dev = dqm->dev->adev->dev;
>   	struct kernel_queue *kq, *kq_next;
>   	struct mqd_manager *mqd_mgr;
>   	struct device_process_node *cur, *next_dpn;
> @@ -2382,7 +2399,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>   			if (dqm->dev->kfd->shared_resources.enable_mes) {
>   				retval = remove_queue_mes(dqm, q, qpd);
>   				if (retval)
> -					pr_err("Failed to remove queue %d\n",
> +					dev_err(dev, "Failed to remove queue %d\n",
>   						q->properties.queue_id);
>   			}
>   		}
> @@ -2437,12 +2454,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>   static int init_mqd_managers(struct device_queue_manager *dqm)
>   {
>   	int i, j;
> +	struct device *dev = dqm->dev->adev->dev;
>   	struct mqd_manager *mqd_mgr;
>   
>   	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
>   		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
>   		if (!mqd_mgr) {
> -			pr_err("mqd manager [%d] initialization failed\n", i);
> +			dev_err(dev, "mqd manager [%d] initialization failed\n", i);
>   			goto out_free;
>   		}
>   		dqm->mqd_mgrs[i] = mqd_mgr;
> @@ -2552,7 +2570,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
>   		dqm->ops.checkpoint_mqd = checkpoint_mqd;
>   		break;
>   	default:
> -		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
> +		dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy);
>   		goto out_free;
>   	}
>   
> @@ -2590,7 +2608,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
>   		goto out_free;
>   
>   	if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
> -		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
> +		dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n");
>   		goto out_free;
>   	}
>   
> @@ -2649,17 +2667,18 @@ int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
>   				struct qcm_process_device *qpd)
>   {
>   	int r;
> +	struct device *dev = dqm->dev->adev->dev;
>   	int updated_vmid_mask;
>   
>   	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> -		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
> +		dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
>   		return -EINVAL;
>   	}
>   
>   	dqm_lock(dqm);
>   
>   	if (dqm->trap_debug_vmid != 0) {
> -		pr_err("Trap debug id already reserved\n");
> +		dev_err(dev, "Trap debug id already reserved\n");
>   		r = -EBUSY;
>   		goto out_unlock;
>   	}
> @@ -2695,19 +2714,20 @@ int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
>   int release_debug_trap_vmid(struct device_queue_manager *dqm,
>   			struct qcm_process_device *qpd)
>   {
> +	struct device *dev = dqm->dev->adev->dev;
>   	int r;
>   	int updated_vmid_mask;
>   	uint32_t trap_debug_vmid;
>   
>   	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> -		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
> +		dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
>   		return -EINVAL;
>   	}
>   
>   	dqm_lock(dqm);
>   	trap_debug_vmid = dqm->trap_debug_vmid;
>   	if (dqm->trap_debug_vmid == 0) {
> -		pr_err("Trap debug id is not reserved\n");
> +		dev_err(dev, "Trap debug id is not reserved\n");
>   		r = -EINVAL;
>   		goto out_unlock;
>   	}
> @@ -2844,6 +2864,7 @@ int resume_queues(struct kfd_process *p,
>   	for (i = 0; i < p->n_pdds; i++) {
>   		struct kfd_process_device *pdd = p->pdds[i];
>   		struct device_queue_manager *dqm = pdd->dev->dqm;
> +		struct device *dev = dqm->dev->adev->dev;
>   		struct qcm_process_device *qpd = &pdd->qpd;
>   		struct queue *q;
>   		int r, per_device_resumed = 0;
> @@ -2894,7 +2915,7 @@ int resume_queues(struct kfd_process *p,
>   					0,
>   					USE_DEFAULT_GRACE_PERIOD);
>   		if (r) {
> -			pr_err("Failed to resume process queues\n");
> +			dev_err(dev, "Failed to resume process queues\n");
>   			if (queue_ids) {
>   				list_for_each_entry(q, &qpd->queues_list, list) {
>   					int q_idx = q_array_get_index(
> @@ -2946,6 +2967,7 @@ int suspend_queues(struct kfd_process *p,
>   	for (i = 0; i < p->n_pdds; i++) {
>   		struct kfd_process_device *pdd = p->pdds[i];
>   		struct device_queue_manager *dqm = pdd->dev->dqm;
> +		struct device *dev = dqm->dev->adev->dev;
>   		struct qcm_process_device *qpd = &pdd->qpd;
>   		struct queue *q;
>   		int r, per_device_suspended = 0;
> @@ -2994,7 +3016,7 @@ int suspend_queues(struct kfd_process *p,
>   			grace_period);
>   
>   		if (r)
> -			pr_err("Failed to suspend process queues.\n");
> +			dev_err(dev, "Failed to suspend process queues.\n");
>   		else
>   			total_suspended += per_device_suspended;
>   
> @@ -3081,10 +3103,11 @@ void set_queue_snapshot_entry(struct queue *q,
>   
>   int debug_lock_and_unmap(struct device_queue_manager *dqm)
>   {
> +	struct device *dev = dqm->dev->adev->dev;
>   	int r;
>   
>   	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> -		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
> +		dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
>   		return -EINVAL;
>   	}
>   
> @@ -3102,10 +3125,11 @@ int debug_lock_and_unmap(struct device_queue_manager *dqm)
>   
>   int debug_map_and_unlock(struct device_queue_manager *dqm)
>   {
> +	struct device *dev = dqm->dev->adev->dev;
>   	int r;
>   
>   	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> -		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
> +		dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
>   		return -EINVAL;
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 3d9ce44d88da..b315311dfe2a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1343,7 +1343,7 @@ int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
>   			   int *num_qss_entries,
>   			   uint32_t *entry_size);
>   
> -int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
> +int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
>   			      uint64_t fence_value,
>   			      unsigned int timeout_ms);
>   


More information about the amd-gfx mailing list