<div dir="ltr"><div>Monk or perhaps Felix,</div><div><br></div><div>Do you by any chance know why the CS ioctl returns -EINVAL for all compute submissions if num_kcq <= 4 and what could cause that?</div><div><br></div><div>If not, is there any way to disable mid-IB preemption for compute?<br></div><div><br></div><div>Thanks,</div><div>Marek<br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Fri, Jul 31, 2020 at 9:53 AM Felix Kuehling <<a href="mailto:felix.kuehling@amd.com">felix.kuehling@amd.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Am 2020-07-31 um 3:51 a.m. schrieb Monk Liu:<br>
> what:<br>
> the MQD's save and restore of KCQ (kernel compute queue)<br>
> cost lots of clocks during world switch which impacts a lot<br>
> to multi-VF performance<br>
><br>
> how:<br>
> introduce a paramter to control the number of KCQ to avoid<br>
> performance drop if there is no kernel compute queue needed<br>
><br>
> notes:<br>
> this paramter only affects gfx 8/9/10<br>
><br>
> v2:<br>
> refine namings<br>
><br>
> v3:<br>
> choose queues for each ring to that try best to cross pipes evenly.<br>
><br>
> v4:<br>
> fix indentation<br>
> some cleanupsin the gfx_compute_queue_acquire()<br>
><br>
> v5:<br>
> further fix on indentations<br>
> more cleanupsin gfx_compute_queue_acquire()<br>
><br>
> TODO:<br>
> in the future we will let hypervisor driver to set this paramter<br>
> automatically thus no need for user to configure it through<br>
> modprobe in virtual machine<br>
><br>
> Signed-off-by: Monk Liu <<a href="mailto:Monk.Liu@amd.com" target="_blank">Monk.Liu@amd.com</a>><br>
<br>
This patch is Reviewed-by: Felix Kuehling <<a href="mailto:Felix.Kuehling@amd.com" target="_blank">Felix.Kuehling@amd.com</a>><br>
<br>
<br>
> ---<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 49 ++++++++++++------------------<br>
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++---------<br>
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++++---------<br>
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 ++++++++++---------<br>
> 7 files changed, 76 insertions(+), 73 deletions(-)<br>
><br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> index e97c088..de11136 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;<br>
> #ifdef CONFIG_DRM_AMDGPU_CIK<br>
> extern int amdgpu_cik_support;<br>
> #endif<br>
> +extern int amdgpu_num_kcq;<br>
> <br>
> #define AMDGPU_VM_MAX_NUM_CTX 4096<br>
> #define AMDGPU_SG_THRESHOLD (256*1024*1024)<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> index 62ecac9..cf445bab 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)<br>
> <br>
> amdgpu_gmc_tmz_set(adev);<br>
> <br>
> + if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {<br>
> + amdgpu_num_kcq = 8;<br>
> + dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n");<br>
> + }<br>
> +<br>
> return 0;<br>
> }<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c<br>
> index 6291f5f..b545c40 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c<br>
> @@ -150,6 +150,7 @@ int amdgpu_noretry;<br>
> int amdgpu_force_asic_type = -1;<br>
> int amdgpu_tmz = 0;<br>
> int amdgpu_reset_method = -1; /* auto */<br>
> +int amdgpu_num_kcq = -1;<br>
> <br>
> struct amdgpu_mgpu_info mgpu_info = {<br>
> .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),<br>
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);<br>
> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");<br>
> module_param_named(reset_method, amdgpu_reset_method, int, 0444);<br>
> <br>
> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");<br>
> +module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);<br>
> +<br>
> static const struct pci_device_id pciidlist[] = {<br>
> #ifdef CONFIG_DRM_AMDGPU_SI<br>
> {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c<br>
> index 8eff017..0cd9de6 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c<br>
> @@ -202,40 +202,29 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,<br>
> <br>
> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)<br>
> {<br>
> - int i, queue, pipe, mec;<br>
> + int i, queue, pipe;<br>
> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);<br>
> -<br>
> - /* policy for amdgpu compute queue ownership */<br>
> - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {<br>
> - queue = i % adev->gfx.mec.num_queue_per_pipe;<br>
> - pipe = (i / adev->gfx.mec.num_queue_per_pipe)<br>
> - % adev->gfx.mec.num_pipe_per_mec;<br>
> - mec = (i / adev->gfx.mec.num_queue_per_pipe)<br>
> - / adev->gfx.mec.num_pipe_per_mec;<br>
> -<br>
> - /* we've run out of HW */<br>
> - if (mec >= adev->gfx.mec.num_mec)<br>
> - break;<br>
> -<br>
> - if (multipipe_policy) {<br>
> - /* policy: amdgpu owns the first two queues of the first MEC */<br>
> - if (mec == 0 && queue < 2)<br>
> - set_bit(i, adev->gfx.mec.queue_bitmap);<br>
> - } else {<br>
> - /* policy: amdgpu owns all queues in the first pipe */<br>
> - if (mec == 0 && pipe == 0)<br>
> - set_bit(i, adev->gfx.mec.queue_bitmap);<br>
> + int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *<br>
> + adev->gfx.mec.num_queue_per_pipe,<br>
> + adev->gfx.num_compute_rings);<br>
> +<br>
> + if (multipipe_policy) {<br>
> + /* policy: make queues evenly cross all pipes on MEC1 only */<br>
> + for (i = 0; i < max_queues_per_mec; i++) {<br>
> + pipe = i % adev->gfx.mec.num_pipe_per_mec;<br>
> + queue = (i / adev->gfx.mec.num_pipe_per_mec) %<br>
> + adev->gfx.mec.num_queue_per_pipe;<br>
> +<br>
> + set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,<br>
> + adev->gfx.mec.queue_bitmap);<br>
> }<br>
> + } else {<br>
> + /* policy: amdgpu owns all queues in the given pipe */<br>
> + for (i = 0; i < max_queues_per_mec; ++i)<br>
> + set_bit(i, adev->gfx.mec.queue_bitmap);<br>
> }<br>
> <br>
> - /* update the number of active compute rings */<br>
> - adev->gfx.num_compute_rings =<br>
> - bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);<br>
> -<br>
> - /* If you hit this case and edited the policy, you probably just<br>
> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */<br>
> - if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))<br>
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;<br>
> + dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));<br>
> }<br>
> <br>
> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
> index f571e25..4172bc8 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)<br>
> amdgpu_gfx_compute_queue_acquire(adev);<br>
> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;<br>
> <br>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,<br>
> - AMDGPU_GEM_DOMAIN_GTT,<br>
> - &adev->gfx.mec.hpd_eop_obj,<br>
> - &adev->gfx.mec.hpd_eop_gpu_addr,<br>
> - (void **)&hpd);<br>
> - if (r) {<br>
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);<br>
> - gfx_v10_0_mec_fini(adev);<br>
> - return r;<br>
> - }<br>
> + if (mec_hpd_size) {<br>
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,<br>
> + AMDGPU_GEM_DOMAIN_GTT,<br>
> + &adev->gfx.mec.hpd_eop_obj,<br>
> + &adev->gfx.mec.hpd_eop_gpu_addr,<br>
> + (void **)&hpd);<br>
> + if (r) {<br>
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);<br>
> + gfx_v10_0_mec_fini(adev);<br>
> + return r;<br>
> + }<br>
> <br>
> - memset(hpd, 0, mec_hpd_size);<br>
> + memset(hpd, 0, mec_hpd_size);<br>
> <br>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);<br>
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);<br>
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);<br>
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);<br>
> + }<br>
> <br>
> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {<br>
> mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;<br>
> @@ -7162,7 +7164,7 @@ static int gfx_v10_0_early_init(void *handle)<br>
> break;<br>
> }<br>
> <br>
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;<br>
> + adev->gfx.num_compute_rings = amdgpu_num_kcq;<br>
> <br>
> gfx_v10_0_set_kiq_pm4_funcs(adev);<br>
> gfx_v10_0_set_ring_funcs(adev);<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
> index 8d72089..7df567a 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)<br>
> amdgpu_gfx_compute_queue_acquire(adev);<br>
> <br>
> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;<br>
> + if (mec_hpd_size) {<br>
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,<br>
> + AMDGPU_GEM_DOMAIN_VRAM,<br>
> + &adev->gfx.mec.hpd_eop_obj,<br>
> + &adev->gfx.mec.hpd_eop_gpu_addr,<br>
> + (void **)&hpd);<br>
> + if (r) {<br>
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);<br>
> + return r;<br>
> + }<br>
> <br>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,<br>
> - AMDGPU_GEM_DOMAIN_VRAM,<br>
> - &adev->gfx.mec.hpd_eop_obj,<br>
> - &adev->gfx.mec.hpd_eop_gpu_addr,<br>
> - (void **)&hpd);<br>
> - if (r) {<br>
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);<br>
> - return r;<br>
> - }<br>
> -<br>
> - memset(hpd, 0, mec_hpd_size);<br>
> + memset(hpd, 0, mec_hpd_size);<br>
> <br>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);<br>
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);<br>
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);<br>
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);<br>
> + }<br>
> <br>
> return 0;<br>
> }<br>
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)<br>
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;<br>
> <br>
> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;<br>
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;<br>
> + adev->gfx.num_compute_rings = amdgpu_num_kcq;<br>
> adev->gfx.funcs = &gfx_v8_0_gfx_funcs;<br>
> gfx_v8_0_set_ring_funcs(adev);<br>
> gfx_v8_0_set_irq_funcs(adev);<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
> index e4e751f..ef07e59 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)<br>
> /* take ownership of the relevant compute queues */<br>
> amdgpu_gfx_compute_queue_acquire(adev);<br>
> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;<br>
> + if (mec_hpd_size) {<br>
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,<br>
> + AMDGPU_GEM_DOMAIN_VRAM,<br>
> + &adev->gfx.mec.hpd_eop_obj,<br>
> + &adev->gfx.mec.hpd_eop_gpu_addr,<br>
> + (void **)&hpd);<br>
> + if (r) {<br>
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);<br>
> + gfx_v9_0_mec_fini(adev);<br>
> + return r;<br>
> + }<br>
> <br>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,<br>
> - AMDGPU_GEM_DOMAIN_VRAM,<br>
> - &adev->gfx.mec.hpd_eop_obj,<br>
> - &adev->gfx.mec.hpd_eop_gpu_addr,<br>
> - (void **)&hpd);<br>
> - if (r) {<br>
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);<br>
> - gfx_v9_0_mec_fini(adev);<br>
> - return r;<br>
> - }<br>
> -<br>
> - memset(hpd, 0, mec_hpd_size);<br>
> + memset(hpd, 0, mec_hpd_size);<br>
> <br>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);<br>
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);<br>
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);<br>
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);<br>
> + }<br>
> <br>
> mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;<br>
> <br>
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)<br>
> adev->gfx.num_gfx_rings = 0;<br>
> else<br>
> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;<br>
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;<br>
> + adev->gfx.num_compute_rings = amdgpu_num_kcq;<br>
> gfx_v9_0_set_kiq_pm4_funcs(adev);<br>
> gfx_v9_0_set_ring_funcs(adev);<br>
> gfx_v9_0_set_irq_funcs(adev);<br>
_______________________________________________<br>
amd-gfx mailing list<br>
<a href="mailto:amd-gfx@lists.freedesktop.org" target="_blank">amd-gfx@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a><br>
</blockquote></div>