[PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9 (v6)
Christian König
christian.koenig at amd.com
Mon Sep 26 06:43:09 UTC 2022
Am 23.09.22 um 15:16 schrieb jiadong.zhu at amd.com:
> From: "Jiadong.Zhu" <Jiadong.Zhu at amd.com>
>
> Set ring functions with software ring callbacks on gfx9.
>
> The software ring could be tested by debugfs_test_ib case.
>
> v2: Set sw_ring 2 to enable software ring by default.
> v3: Remove the parameter for software ring enablement.
> v4: Use amdgpu_ring_init/fini for software rings.
> v5: Update for code format. Fix conflict.
> v6: Remove unnecessary checks and enable software ring on gfx9 by default.
>
> Acked-by: Luben Tuikov <luben.tuikov at amd.com>
> Cc: Christian Koenig <Christian.Koenig at amd.com>
> Cc: Luben Tuikov <Luben.Tuikov at amd.com>
> Cc: Andrey Grodzovsky <Andrey.Grodzovsky at amd.com>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 108 ++++++++++++++++++++++-
> 3 files changed, 109 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 9996dadb39f7..4fdfc3ec134a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -348,6 +348,7 @@ struct amdgpu_gfx {
>
> bool is_poweron;
>
> + struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
> struct amdgpu_ring_mux muxer;
> };
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 40b1277b4f0c..f08ee1ac281c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -39,6 +39,7 @@ struct amdgpu_vm;
> #define AMDGPU_MAX_RINGS 28
> #define AMDGPU_MAX_HWIP_RINGS 8
> #define AMDGPU_MAX_GFX_RINGS 2
> +#define AMDGPU_MAX_SW_GFX_RINGS 2
> #define AMDGPU_MAX_COMPUTE_RINGS 8
> #define AMDGPU_MAX_VCE_RINGS 3
> #define AMDGPU_MAX_UVD_ENC_RINGS 2
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5349ca4d19e3..e688665cd1e0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -47,6 +47,7 @@
>
> #include "amdgpu_ras.h"
>
> +#include "amdgpu_sw_ring.h"
> #include "gfx_v9_4.h"
> #include "gfx_v9_0.h"
> #include "gfx_v9_4_2.h"
> @@ -56,6 +57,7 @@
> #include "asic_reg/gc/gc_9_0_default.h"
>
> #define GFX9_NUM_GFX_RINGS 1
> +#define GFX9_NUM_SW_GFX_RINGS 2
> #define GFX9_MEC_HPD_SIZE 4096
> #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
> #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
> @@ -2273,6 +2275,7 @@ static int gfx_v9_0_sw_init(void *handle)
> struct amdgpu_ring *ring;
> struct amdgpu_kiq *kiq;
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> + unsigned int hw_prio;
>
> switch (adev->ip_versions[GC_HWIP][0]) {
> case IP_VERSION(9, 0, 1):
> @@ -2356,6 +2359,9 @@ static int gfx_v9_0_sw_init(void *handle)
> sprintf(ring->name, "gfx_%d", i);
> ring->use_doorbell = true;
> ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
> +
> + /* disable scheduler on the real ring */
> + ring->no_scheduler = true;
> r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
> AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
> AMDGPU_RING_PRIO_DEFAULT, NULL);
> @@ -2363,6 +2369,42 @@ static int gfx_v9_0_sw_init(void *handle)
> return r;
> }
>
> + /* set up the software rings */
> + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
> + ring = &adev->gfx.sw_gfx_ring[i];
> + ring->ring_obj = NULL;
> + if (!i)
> + sprintf(ring->name, "gfx_sw");
> + else
> + sprintf(ring->name, "gfx_sw_%d", i);
I think we should use something like gfx_low/gfx_high for the ring name
here.
That this is implemented by a sw muxer is pretty much irrelevant for
overspace.
Maybe use a static array for the names or something like this.
Apart from that looks good to me.
Regards,
Christian.
> + ring->use_doorbell = true;
> + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
> + ring->is_sw_ring = true;
> + hw_prio = (i == 1) ? AMDGPU_RING_PRIO_2 :
> + AMDGPU_RING_PRIO_DEFAULT;
> + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
> + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
> + NULL);
> + if (r)
> + return r;
> + ring->wptr = 0;
> + }
> +
> + /* init the muxer and add software rings */
> + r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
> + GFX9_NUM_SW_GFX_RINGS);
> + if (r) {
> + DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
> + return r;
> + }
> + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
> + r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, &adev->gfx.sw_gfx_ring[i]);
> + if (r) {
> + DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
> + return r;
> + }
> + }
> +
> /* set up the compute queues - allocate horizontally across pipes */
> ring_id = 0;
> for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
> @@ -2413,6 +2455,10 @@ static int gfx_v9_0_sw_fini(void *handle)
> int i;
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
> + amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
> + amdgpu_ring_mux_fini(&adev->gfx.muxer);
> +
> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
> for (i = 0; i < adev->gfx.num_compute_rings; i++)
> @@ -5877,7 +5923,9 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
>
> switch (me_id) {
> case 0:
> - amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
> + /* Fence signals are handled on the software rings*/
> + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
> + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
> break;
> case 1:
> case 2:
> @@ -6882,6 +6930,61 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
> .emit_mem_sync = gfx_v9_0_emit_mem_sync,
> };
>
> +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
> + .type = AMDGPU_RING_TYPE_GFX,
> + .align_mask = 0xff,
> + .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> + .support_64bit_ptrs = true,
> + .secure_submission_supported = true,
> + .vmhub = AMDGPU_GFXHUB_0,
> + .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
> + .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
> + .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
> + .emit_frame_size = /* totally 242 maximum if 16 IBs */
> + 5 + /* COND_EXEC */
> + 7 + /* PIPELINE_SYNC */
> + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + 2 + /* VM_FLUSH */
> + 8 + /* FENCE for VM_FLUSH */
> + 20 + /* GDS switch */
> + 4 + /* double SWITCH_BUFFER,
> + * the first COND_EXEC jump to the place just
> + * prior to this double SWITCH_BUFFER
> + */
> + 5 + /* COND_EXEC */
> + 7 + /* HDP_flush */
> + 4 + /* VGT_flush */
> + 14 + /* CE_META */
> + 31 + /* DE_META */
> + 3 + /* CNTX_CTRL */
> + 5 + /* HDP_INVL */
> + 8 + 8 + /* FENCE x2 */
> + 2 + /* SWITCH_BUFFER */
> + 7, /* gfx_v9_0_emit_mem_sync */
> + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
> + .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
> + .emit_fence = gfx_v9_0_ring_emit_fence,
> + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
> + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
> + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
> + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
> + .test_ring = gfx_v9_0_ring_test_ring,
> + .test_ib = gfx_v9_0_ring_test_ib,
> + .insert_nop = amdgpu_sw_ring_insert_nop,
> + .pad_ib = amdgpu_ring_generic_pad_ib,
> + .emit_switch_buffer = gfx_v9_ring_emit_sb,
> + .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
> + .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
> + .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
> + .emit_wreg = gfx_v9_0_ring_emit_wreg,
> + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> + .soft_recovery = gfx_v9_0_ring_soft_recovery,
> + .emit_mem_sync = gfx_v9_0_emit_mem_sync,
> +};
> +
> static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
> .type = AMDGPU_RING_TYPE_COMPUTE,
> .align_mask = 0xff,
> @@ -6959,6 +7062,9 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
>
> + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
> + adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
> +
> for (i = 0; i < adev->gfx.num_compute_rings; i++)
> adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
> }
More information about the amd-gfx
mailing list