[PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v2)
Felix Kuehling
felix.kuehling at amd.com
Mon Jul 27 23:32:57 UTC 2020
Am 2020-07-27 um 6:47 a.m. schrieb Monk Liu:
> what:
> the MQD's save and restore of kernel compute queues cost lots of clocks
> during world switch which impacts a lot to multi-VF performance
>
> how:
> introduce a paramter to control the number of kernel compute queues to
> avoid performance drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through
> modprobe in virtual machine
>
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 27 +++++++++++++-------------
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++++++++--------------
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 ++++++++++++++--------------
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 +++++++++++++++---------------
> 7 files changed, 71 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..71a3d6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
> #ifdef CONFIG_DRM_AMDGPU_CIK
> extern int amdgpu_cik_support;
> #endif
> +extern int amdgpu_num_kcq_user_set;
>
> #define AMDGPU_VM_MAX_NUM_CTX 4096
> #define AMDGPU_SG_THRESHOLD (256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..18b93ef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
>
> amdgpu_gmc_tmz_set(adev);
>
> + if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0) {
> + amdgpu_num_kcq_user_set = 8;
> + dev_warn(adev-dev, "set KCQ number to 8 due to invalid paramter provided by user\n");
> + }
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..03a94e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;
> int amdgpu_force_asic_type = -1;
> int amdgpu_tmz = 0;
> int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq_user_set = 8;
>
> struct amdgpu_mgpu_info mgpu_info = {
> .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
> module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>
> +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
> +
> static const struct pci_device_id pciidlist[] = {
> #ifdef CONFIG_DRM_AMDGPU_SI
> {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..0b59049 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>
> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
> {
> - int i, queue, pipe, mec;
> + int i, queue, pipe, mec, j = 0;
> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>
> /* policy for amdgpu compute queue ownership */
> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>
> if (multipipe_policy) {
> /* policy: amdgpu owns the first two queues of the first MEC */
> - if (mec == 0 && queue < 2)
> - set_bit(i, adev->gfx.mec.queue_bitmap);
> + if (mec == 0 && queue < 2) {
> + if (j++ < adev->gfx.num_compute_rings)
This is not ideal, because it wouldn't distribute the queues evenly
across pipes if there are fewer than 7. I would change how queue and
pipe are calculated from i for the multipipe_policy case:
if (multipipe_policy) {
pipe = i % adev->gfx.mec.num_pipe_per_mec;
queue = (i / adev->gfx.mec.num_pipe_per_mec)
% adev->gfx.mec.num_queue_per_pipe;
} else {
/* previous way */
}
Then your change will work better.
Regards,
Felix
> + set_bit(i, adev->gfx.mec.queue_bitmap);
> + else
> + break;
> + }
> } else {
> /* policy: amdgpu owns all queues in the first pipe */
> - if (mec == 0 && pipe == 0)
> - set_bit(i, adev->gfx.mec.queue_bitmap);
> + if (mec == 0 && pipe == 0) {
> + if (j++ < adev->gfx.num_compute_rings)
> + set_bit(i, adev->gfx.mec.queue_bitmap);
> + else
> + break;
> + }
> }
> }
>
> - /* update the number of active compute rings */
> - adev->gfx.num_compute_rings =
> - bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> -
> - /* If you hit this case and edited the policy, you probably just
> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> - if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + dev_info(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
> }
>
> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index db9f1e8..2ad8393 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
> amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_GTT,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> - gfx_v10_0_mec_fini(adev);
> - return r;
> - }
> + if (mec_hpd_size) {
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
> + if (r) {
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> + gfx_v10_0_mec_fini(adev);
> + return r;
> + }
>
> - memset(hpd, 0, mec_hpd_size);
> + memset(hpd, 0, mec_hpd_size);
>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + }
>
> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
> mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
> break;
> }
>
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
>
> gfx_v10_0_set_kiq_pm4_funcs(adev);
> gfx_v10_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8d72089..6d95b4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
> amdgpu_gfx_compute_queue_acquire(adev);
>
> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
> + if (mec_hpd_size) {
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
> + if (r) {
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> + return r;
> + }
>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> - return r;
> - }
> -
> - memset(hpd, 0, mec_hpd_size);
> + memset(hpd, 0, mec_hpd_size);
>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + }
>
> return 0;
> }
> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
> adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
> gfx_v8_0_set_ring_funcs(adev);
> gfx_v8_0_set_irq_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index e4e751f..43bcfe3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
> /* take ownership of the relevant compute queues */
> amdgpu_gfx_compute_queue_acquire(adev);
> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
> + if (mec_hpd_size) {
> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->gfx.mec.hpd_eop_obj,
> + &adev->gfx.mec.hpd_eop_gpu_addr,
> + (void **)&hpd);
> + if (r) {
> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> + gfx_v9_0_mec_fini(adev);
> + return r;
> + }
>
> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->gfx.mec.hpd_eop_obj,
> - &adev->gfx.mec.hpd_eop_gpu_addr,
> - (void **)&hpd);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
> - gfx_v9_0_mec_fini(adev);
> - return r;
> - }
> -
> - memset(hpd, 0, mec_hpd_size);
> + memset(hpd, 0, mec_hpd_size);
>
> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
> + }
>
> mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
>
> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
> adev->gfx.num_gfx_rings = 0;
> else
> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
> gfx_v9_0_set_kiq_pm4_funcs(adev);
> gfx_v9_0_set_ring_funcs(adev);
> gfx_v9_0_set_irq_funcs(adev);
More information about the amd-gfx
mailing list