<!DOCTYPE html><html><head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> </head> <body> <div class="moz-cite-prefix">On 10/29/2024 10:01 AM, Mukul Joshi wrote: </div> <blockquote type="cite" cite="mid:9697eb8f-bab6-482a-ad82-0939ea9e17bc@amd.com"> <div class="moz-cite-prefix">On 10/28/2024 5:40 PM, Xiaogang.Chen wrote: </div> <blockquote type="cite" cite="mid:20241028214014.59940-1-xiaogang.chen@amd.com"> <pre class="moz-quote-pre" wrap="">From: Xiaogang Chen <a class="moz-txt-link-rfc2396E" href="mailto:xiaogang.chen@amd.com" moz-do-not-send="true"><xiaogang.chen@amd.com></a> To allow user better understand the cause triggering runlist oversubscription. No function change. Signed-off-by: Xiaogang Chen <a class="moz-txt-link-abbreviated moz-txt-link-freetext" href="mailto:Xiaogang.Chen@amd.com" moz-do-not-send="true">Xiaogang.Chen@amd.com</a> --- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 55 ++++++++++++++----- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 37930629edc5..e22be6da23b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -28,6 +28,10 @@ #include "kfd_kernel_queue.h" #include "kfd_priv.h" +#define OVER_SUBSCRIPTION_PROCESS_COUNT 1 << 0 +#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT 1 << 1 +#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT 1 << 2 + static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) { @@ -40,7 +44,7 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, - bool *over_subscription) + int *over_subscription) { unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; unsigned int map_queue_size; @@ -58,17 +62,20 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - *over_subscription = false; + *over_subscription = 0; if (node->max_proc_per_quantum > 1) max_proc_per_quantum = node->max_proc_per_quantum; - if ((process_count > max_proc_per_quantum) || - compute_queue_count > get_cp_queues_num(pm->dqm) || - gws_queue_count > 1) { - *over_subscription = true; + if (process_count > max_proc_per_quantum) + *over_subscription = *over_subscription || OVER_SUBSCRIPTION_PROCESS_COUNT;</pre> </blockquote> <pre>I think you want to use the Bitwise OR (|) and not the Logical OR (||) here. This will always set over_subscription to 1.</pre> </blockquote> <pre>yes, actually should use |=.</pre> <pre>Regards</pre> <pre>Xiaogang </pre> <blockquote type="cite" cite="mid:9697eb8f-bab6-482a-ad82-0939ea9e17bc@amd.com"> <pre> Regards,</pre> <pre> Mukul</pre> <blockquote type="cite" cite="mid:20241028214014.59940-1-xiaogang.chen@amd.com"> <pre class="moz-quote-pre" wrap="">+ if (compute_queue_count > get_cp_queues_num(pm->dqm)) + *over_subscription = *over_subscription || OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT; + if (gws_queue_count > 1) + *over_subscription = *over_subscription || OVER_SUBSCRIPTION_GWS_QUEUE_COUNT; + + if (*over_subscription) dev_dbg(dev, "Over subscribed runlist\n"); - } map_queue_size = pm->pmf->map_queues_size; /* calculate run list ib allocation size */ @@ -89,7 +96,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_buffer, uint64_t *rl_gpu_buffer, unsigned int *rl_buffer_size, - bool *is_over_subscription) + int *is_over_subscription) { struct kfd_node *node = pm->dqm->dev; struct device *dev = node->adev->dev; @@ -134,7 +141,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, struct qcm_process_device *qpd; struct queue *q; struct kernel_queue *kq; - bool is_over_subscription; + int is_over_subscription; rl_wptr = retval = processes_mapped = 0; @@ -212,16 +219,38 @@ static int pm_create_runlist_ib(struct packet_manager *pm, dev_dbg(dev, "Finished map process and queues to runlist\n"); if (is_over_subscription) { - if (!pm->is_over_subscription) - dev_warn( + if (!pm->is_over_subscription) { + + if (is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT) { + dev_warn( dev, - "Runlist is getting oversubscribed. Expect reduced ROCm performance.\n"); + "process number is more than maximum number of processes that" + " HWS can schedule concurrently. Runlist is getting" + " oversubscribed. Expect reduced ROCm performance.\n"); + } + + if (is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT) { + dev_warn( + dev, + "compute queue number is more than assigned compute queues." + " Runlist is getting" + " oversubscribed. Expect reduced ROCm performance.\n"); + } + + if (is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT) { + dev_warn( + dev, + "compute queue for cooperative workgroup is more than allowed." + " Runlist is getting" + " oversubscribed. Expect reduced ROCm performance.\n"); + } + } retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true); } - pm->is_over_subscription = is_over_subscription; + pm->is_over_subscription = is_over_subscription ? true : false; for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) pr_debug("0x%2X ", rl_buffer[i]); </pre> </blockquote> </blockquote> </body> </html>