[PATCH 1/6] drm/amdgpu/gfx10: Implement cleaner shader support for GFX10 hardware

Alex Deucher alexdeucher at gmail.com
Wed Oct 2 16:41:14 UTC 2024


On Wed, Oct 2, 2024 at 12:41 AM Srinivasan Shanmugam
<srinivasan.shanmugam at amd.com> wrote:
>
> The patch modifies the gfx_v10_0_kiq_set_resources function to write
> the cleaner shader's memory controller address to the ring buffer. It
> also adds a new function, gfx_v10_0_ring_emit_cleaner_shader, which
> emits the PACKET3_RUN_CLEANER_SHADER packet to the ring buffer.
>
> This patch adds support for the PACKET3_RUN_CLEANER_SHADER packet in the
> gfx_v10_0 module. This packet is used to emit the cleaner shader, which
> is used to clear GPU memory before it's reused, helping to prevent data
> leakage between different processes.
>
> Finally, the patch updates the ring function structures to include the
> new gfx_v10_0_ring_emit_cleaner_shader function. This allows the
> cleaner shader to be emitted as part of the ring's operations.
>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 40 +++++++++++++++++++++++---
>  1 file changed, 36 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index c544ea2aea6e..2b230971c58a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -3677,13 +3677,19 @@ static int gfx_v10_0_set_powergating_state(void *handle,
>                                           enum amd_powergating_state state);
>  static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>  {
> +       struct amdgpu_device *adev = kiq_ring->adev;
> +       u64 shader_mc_addr;
> +
> +       /* Cleaner shader MC address */
> +       shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
> +
>         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
>         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
>                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
>         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
>         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
> -       amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
> -       amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
> +       amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
> +       amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
>         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
>         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
>  }
> @@ -4557,6 +4563,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
>                 break;
>         }
>
> +       adev->gfx.xcc_mask = 1;

I think you can drop this.  The xcc mask is calculated in
amdgpu_discovery.c for devices which have an IP discovery table.

Alex

>         adev->gfx.config.gb_addr_config = gb_addr_config;
>
>         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
> @@ -4726,6 +4733,11 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
>                 adev->gfx.mec.num_queue_per_pipe = 8;
>                 break;
>         }
> +       switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
> +       default:
> +               adev->gfx.enable_cleaner_shader = false;
> +               break;
> +       }
>
>         /* KIQ event */
>         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
> @@ -4842,6 +4854,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
>
>         gfx_v10_0_alloc_ip_dump(adev);
>
> +       r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
> +       if (r)
> +               return r;
>         return 0;
>  }
>
> @@ -4881,6 +4896,8 @@ static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
>         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
>         amdgpu_gfx_kiq_fini(adev, 0);
>
> +       amdgpu_gfx_cleaner_shader_sw_fini(adev);
> +
>         gfx_v10_0_pfp_fini(adev);
>         gfx_v10_0_ce_fini(adev);
>         gfx_v10_0_me_fini(adev);
> @@ -4891,6 +4908,7 @@ static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
>                 gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
>
>         gfx_v10_0_free_microcode(adev);
> +       amdgpu_gfx_sysfs_isolation_shader_fini(adev);
>
>         kfree(adev->gfx.ip_dump_core);
>         kfree(adev->gfx.ip_dump_compute_queues);
> @@ -7374,6 +7392,9 @@ static int gfx_v10_0_hw_init(void *handle)
>         if (!amdgpu_emu_mode)
>                 gfx_v10_0_init_golden_registers(adev);
>
> +       amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
> +                                      adev->gfx.cleaner_shader_ptr);
> +
>         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
>                 /**
>                  * For gfx 10, rlc firmware loading relies on smu firmware is
> @@ -9699,6 +9720,13 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
>         amdgpu_gfx_off_ctrl(adev, true);
>  }
>
> +static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
> +{
> +       /* Emit the cleaner shader */
> +       amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
> +       amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
> +}
> +
>  static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
>         .name = "gfx_v10_0",
>         .early_init = gfx_v10_0_early_init,
> @@ -9749,7 +9777,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>                 5 + /* HDP_INVL */
>                 8 + 8 + /* FENCE x2 */
>                 2 + /* SWITCH_BUFFER */
> -               8, /* gfx_v10_0_emit_mem_sync */
> +               8 + /* gfx_v10_0_emit_mem_sync */
> +               2, /* gfx_v10_0_ring_emit_cleaner_shader */
>         .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
>         .emit_ib = gfx_v10_0_ring_emit_ib_gfx,
>         .emit_fence = gfx_v10_0_ring_emit_fence,
> @@ -9772,6 +9801,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>         .soft_recovery = gfx_v10_0_ring_soft_recovery,
>         .emit_mem_sync = gfx_v10_0_emit_mem_sync,
>         .reset = gfx_v10_0_reset_kgq,
> +       .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
>  };
>
>  static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> @@ -9791,7 +9821,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>                 2 + /* gfx_v10_0_ring_emit_vm_flush */
>                 8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
> -               8, /* gfx_v10_0_emit_mem_sync */
> +               8 + /* gfx_v10_0_emit_mem_sync */
> +               2, /* gfx_v10_0_ring_emit_cleaner_shader */
>         .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
>         .emit_ib = gfx_v10_0_ring_emit_ib_compute,
>         .emit_fence = gfx_v10_0_ring_emit_fence,
> @@ -9809,6 +9840,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>         .soft_recovery = gfx_v10_0_ring_soft_recovery,
>         .emit_mem_sync = gfx_v10_0_emit_mem_sync,
>         .reset = gfx_v10_0_reset_kcq,
> +       .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
>  };
>
>  static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> --
> 2.34.1
>


More information about the amd-gfx mailing list