[PATCH 09/26] drm/amdgpu: allow split of queues with kfd at queue granularity v3
Felix Kuehling
felix.kuehling at amd.com
Tue Apr 11 21:36:23 UTC 2017
Comments inline.
On 17-04-06 02:21 AM, Andres Rodriguez wrote:
> Previously the queue/pipe split with kfd operated with pipe
> granularity. This patch allows amdgpu to take ownership of an arbitrary
> set of queues.
>
> It also consolidates the last few magic numbers in the compute
> initialization process into mec_init.
>
> v2: support for gfx9
> v3: renamed AMDGPU_MAX_QUEUES to AMDGPU_MAX_COMPUTE_QUEUES
>
> Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net>
> Acked-by: Christian König <christian.koenig at amd.com>
> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++
> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 83 +++++++++++++++++-------
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 81 +++++++++++++++++++-----
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 84 +++++++++++++++++++++++--
> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 1 +
> 5 files changed, 212 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8187587..b765aca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
[snip]
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 3340012..55e61a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -42,21 +42,20 @@
> #include "gca/gfx_7_2_enum.h"
> #include "gca/gfx_7_2_sh_mask.h"
>
> #include "gmc/gmc_7_0_d.h"
> #include "gmc/gmc_7_0_sh_mask.h"
>
> #include "oss/oss_2_0_d.h"
> #include "oss/oss_2_0_sh_mask.h"
>
> #define GFX7_NUM_GFX_RINGS 1
> -#define GFX7_NUM_COMPUTE_RINGS 8
> #define GFX7_MEC_HPD_SIZE 2048
>
>
> static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
> static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
> static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
>
> MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
> MODULE_FIRMWARE("radeon/bonaire_me.bin");
> MODULE_FIRMWARE("radeon/bonaire_ce.bin");
> @@ -2817,47 +2816,79 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
> if (unlikely(r != 0))
> dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
> amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
> amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>
> amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
> adev->gfx.mec.hpd_eop_obj = NULL;
> }
> }
>
> +static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
> +{
> + int i, queue, pipe, mec;
> +
> + /* policy for amdgpu compute queue ownership */
> + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> + queue = i % adev->gfx.mec.num_queue_per_pipe;
> + pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> + % adev->gfx.mec.num_pipe_per_mec;
> + mec = (i / adev->gfx.mec.num_queue_per_pipe)
> + / adev->gfx.mec.num_pipe_per_mec;
> +
> + /* we've run out of HW */
> + if (mec > adev->gfx.mec.num_mec)
[FK] Shouldn't this be >=?
> + break;
> +
> + /* policy: amdgpu owns all queues in the first pipe */
> + if (mec == 0 && pipe == 0)
> + set_bit(i, adev->gfx.mec.queue_bitmap);
> + }
> +
> + /* update the number of active compute rings */
> + adev->gfx.num_compute_rings =
> + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> +
> + /* If you hit this case and edited the policy, you probably just
> + * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> + WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS);
> + if (adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)
[FK] You can write this shorter:
if (WARN_ON(condition))
statement;
> + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +}
> +
[snip]
> @@ -4769,42 +4800,52 @@ static int gfx_v7_0_sw_init(void *handle)
> ring = &adev->gfx.gfx_ring[i];
> ring->ring_obj = NULL;
> sprintf(ring->name, "gfx");
> r = amdgpu_ring_init(adev, ring, 1024,
> &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
> if (r)
> return r;
> }
>
> /* set up the compute queues */
> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> + for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
> unsigned irq_type;
>
> - /* max 32 queues per MEC */
> - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
> - DRM_ERROR("Too many (%d) compute rings!\n", i);
> - break;
> - }
> - ring = &adev->gfx.compute_ring[i];
> + if (!test_bit(i, adev->gfx.mec.queue_bitmap))
> + continue;
> +
> + ring = &adev->gfx.compute_ring[ring_id];
> +
> + /* mec0 is me1 */
> + ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
> + / adev->gfx.mec.num_pipe_per_mec)
> + + 1;
> + ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> + % adev->gfx.mec.num_pipe_per_mec;
> + ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
> +
> ring->ring_obj = NULL;
> ring->use_doorbell = true;
> - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
> - ring->me = 1; /* first MEC */
> - ring->pipe = i / 8;
> - ring->queue = i % 8;
> + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
> sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
> - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
> +
> + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
> + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
> + + ring->pipe;
> +
> /* type-2 packets are deprecated on MEC, use type-3 instead */
> r = amdgpu_ring_init(adev, ring, 1024,
> &adev->gfx.eop_irq, irq_type);
> if (r)
> return r;
> +
> + ring_id++;
[FK] Is there any case where ring_id != i? I'm just wondering what's the
purpose of a separate variable for this. I'm guessing it's for a later
change in the queue allocation policy ...
> }
>
> /* reserve GDS, GWS and OA resource for gfx */
> r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
> PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
> &adev->gds.gds_gfx_bo, NULL, NULL);
> if (r)
> return r;
>
> r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index b34e597..c010b5d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -45,21 +45,20 @@
> #include "gca/gfx_8_0_enum.h"
> #include "gca/gfx_8_0_sh_mask.h"
> #include "gca/gfx_8_0_enum.h"
>
> #include "dce/dce_10_0_d.h"
> #include "dce/dce_10_0_sh_mask.h"
>
> #include "smu/smu_7_1_3_d.h"
>
> #define GFX8_NUM_GFX_RINGS 1
> -#define GFX8_NUM_COMPUTE_RINGS 8
> #define GFX8_MEC_HPD_SIZE 2048
>
>
> #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
> #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
> #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
> #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
>
> #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
> #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
> @@ -1410,47 +1409,82 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
> }
> static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
> struct amdgpu_irq_src *irq)
> {
> amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
> amdgpu_ring_fini(ring);
> }
>
> #define GFX8_MEC_HPD_SIZE 2048
>
> +static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
> +{
> + int i, queue, pipe, mec;
> +
> + /* policy for amdgpu compute queue ownership */
> + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> + queue = i % adev->gfx.mec.num_queue_per_pipe;
> + pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> + % adev->gfx.mec.num_pipe_per_mec;
> + mec = (i / adev->gfx.mec.num_queue_per_pipe)
> + / adev->gfx.mec.num_pipe_per_mec;
> +
> + /* we've run out of HW */
> + if (mec > adev->gfx.mec.num_mec)
[FK] >=
> + break;
> +
> + /* policy: amdgpu owns all queues in the first pipe */
> + if (mec == 0 && pipe == 0)
> + set_bit(i, adev->gfx.mec.queue_bitmap);
> + }
[snip]
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5dcc1b0..72eeda3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -31,21 +31,20 @@
> #include "vega10/GC/gc_9_0_offset.h"
> #include "vega10/GC/gc_9_0_sh_mask.h"
> #include "vega10/vega10_enum.h"
> #include "vega10/HDP/hdp_4_0_offset.h"
>
> #include "soc15_common.h"
> #include "clearstate_gfx9.h"
> #include "v9_structs.h"
>
> #define GFX9_NUM_GFX_RINGS 1
> -#define GFX9_NUM_COMPUTE_RINGS 8
> #define GFX9_NUM_SE 4
> #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
>
> MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
> MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
> MODULE_FIRMWARE("amdgpu/vega10_me.bin");
> MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
> MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
> MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
>
> @@ -469,45 +468,79 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
> amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj);
> amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
>
> amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj);
> adev->gfx.mec.mec_fw_obj = NULL;
> }
> }
>
> #define MEC_HPD_SIZE 2048
>
> +static void gfx_v9_0_compute_queue_acquire(struct amdgpu_device *adev)
> +{
> + int i, queue, pipe, mec;
> +
> + /* policy for amdgpu compute queue ownership */
> + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
> + queue = i % adev->gfx.mec.num_queue_per_pipe;
> + pipe = (i / adev->gfx.mec.num_queue_per_pipe)
> + % adev->gfx.mec.num_pipe_per_mec;
> + mec = (i / adev->gfx.mec.num_queue_per_pipe)
> + / adev->gfx.mec.num_pipe_per_mec;
> +
> + /* we've run out of HW */
> + if (mec > adev->gfx.mec.num_mec)
> + break;
[FK] >=
> +
> + /* policy: amdgpu owns all queues in the first pipe */
> + if (mec == 0 && pipe == 0)
> + set_bit(i, adev->gfx.mec.queue_bitmap);
> + }
> +
> + /* update the number of active compute rings */
> + adev->gfx.num_compute_rings =
> + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
> +
> + /* If you hit this case and edited the policy, you probably just
> + * need to increase AMDGPU_MAX_COMPUTE_RINGS */
> + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
> + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> +}
> +
[snip]
More information about the amd-gfx
mailing list