[PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()

Felix Kuehling felix.kuehling at amd.com
Fri Nov 22 21:21:32 UTC 2019


I'm not sure about this one. Looks like the interface is getting 
needlessly more complicated. Now the caller has to keep track of the 
runlist IB address and size just to pass those to another function. I 
could understand this if there was a use case that needs to separate the 
allocation of the runlist and sending it to the HW. But I don't see that.

Some background for why I think the interface is the way it is: The 
runlist IB is continuously executed by the HWS firmware. If the runlist 
is oversubscribed, the HWS firmware will loop through it. So the IB must 
remain allocated until pm_send_unmap_queue is called. Currently 
pm_send_runlist creates the runlist IB and sends it to the HWS. You're 
separating that into creation and sending. Do you see a case where you 
need to send the same runlist multiple times? Or do something else 
between creating the runlist and sending it to the HWS?

pm_release_ib releases the runlist IB, assuming that he HWS is no longer 
using it. Maybe this could be combined with pm_send_unmap_queue. I'm not 
100% sure because there are some filter parameters that may leave some 
queues mapped. If the two can be combined, I'd suggest the following 
name and interface changes to reflect how I think this is being used today:

  * pm_send_runlist -> pm_create_and_send_runlist
  * pm_send_unmap_queue + pm_release_ib -> pm_preempt_and_free_runlist

I see you're doing a lot of cleanup and refactoring in this area of the 
code. Is there some bigger picture here, some idea of the end-state 
you're trying to get to? Knowing where you're going with this may make 
it easier to review the code.

Regards,
   Felix

On 2019-11-21 4:26 p.m., Yong Zhao wrote:
> This is consistent with the calling sequence in unmap_queues_cpsch().
>
> Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
> Signed-off-by: Yong Zhao <Yong.Zhao at amd.com>
> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++++++++++++++--
>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +++++--------------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  7 ++++++-
>   3 files changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 510f2d1bb8bb..fd7d90136b94 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm)
>   static int map_queues_cpsch(struct device_queue_manager *dqm)
>   {
>   	int retval;
> +	uint64_t rl_ib_gpu_addr;
> +	size_t rl_ib_size;
>   
>   	if (!dqm->sched_running)
>   		return 0;
> @@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
>   	if (dqm->active_runlist)
>   		return 0;
>   
> -	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
> +	retval = pm_create_runlist_ib(&dqm->packets, &dqm->queues,
> +				&rl_ib_gpu_addr, &rl_ib_size);
> +	if (retval)
> +		goto fail_create_runlist_ib;
> +
> +	pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
> +
> +	retval = pm_send_runlist(&dqm->packets, &dqm->queues,
> +			rl_ib_gpu_addr, rl_ib_size);
>   	pr_debug("%s sent runlist\n", __func__);
>   	if (retval) {
>   		pr_err("failed to execute runlist\n");
> -		return retval;
> +		goto fail_create_runlist_ib;
>   	}
>   	dqm->active_runlist = true;
>   
>   	return retval;
> +
> +fail_create_runlist_ib:
> +	pm_destroy_runlist_ib(&dqm->packets);
> +	return retval;
>   }
>   
>   /* dqm->lock mutex has to be locked before calling this function */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> index 4a9433257428..6ec54e9f9392 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> @@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
>   	return retval;
>   }
>   
> -static int pm_create_runlist_ib(struct packet_manager *pm,
> +int pm_create_runlist_ib(struct packet_manager *pm,
>   				struct list_head *queues,
>   				uint64_t *rl_gpu_addr,
>   				size_t *rl_size_bytes)
> @@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
>   		/* build map process packet */
>   		if (proccesses_mapped >= pm->dqm->processes_count) {
>   			pr_debug("Not enough space left in runlist IB\n");
> -			pm_destroy_runlist_ib(pm);
>   			return -ENOMEM;
>   		}
>   
> @@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm,
>   	return retval;
>   }
>   
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +			uint64_t rl_ib_gpu_addr, size_t rl_ib_size)
>   {
> -	uint64_t rl_gpu_ib_addr;
>   	uint32_t *rl_buffer;
> -	size_t rl_ib_size, packet_size_dwords;
> +	size_t packet_size_dwords;
>   	int retval;
>   
> -	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
> -					&rl_ib_size);
> -	if (retval)
> -		goto fail_create_runlist_ib;
> -
> -	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
> -
>   	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
>   	mutex_lock(&pm->lock);
>   
> @@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>   	if (retval)
>   		goto fail_acquire_packet_buffer;
>   
> -	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
> +	retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr,
>   					rl_ib_size / sizeof(uint32_t), false);
>   	if (retval)
>   		goto fail_create_runlist;
> @@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
>   	kq_rollback_packet(pm->priv_queue);
>   fail_acquire_packet_buffer:
>   	mutex_unlock(&pm->lock);
> -fail_create_runlist_ib:
> -	pm_destroy_runlist_ib(pm);
>   	return retval;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 389cda7c8f1a..6accb605b9f0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
>   void pm_uninit(struct packet_manager *pm);
>   int pm_send_set_resources(struct packet_manager *pm,
>   				struct scheduling_resources *res);
> -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
> +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
> +		uint64_t rl_ib_gpu_addr, size_t rl_ib_size);
>   int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
>   				uint32_t fence_value);
>   
> @@ -989,6 +990,10 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
>   			uint32_t filter_param, bool reset,
>   			unsigned int sdma_engine);
>   
> +int pm_create_runlist_ib(struct packet_manager *pm,
> +				struct list_head *queues,
> +				uint64_t *rl_gpu_addr,
> +				size_t *rl_size_bytes);
>   void pm_destroy_runlist_ib(struct packet_manager *pm);
>   
>   /* Following PM funcs can be shared among VI and AI */


More information about the amd-gfx mailing list