[PATCH 11/15] drm/amdgpu/vcn: switch work handler to be per instance
Lazar, Lijo
lijo.lazar at amd.com
Thu Nov 21 07:40:08 UTC 2024
On 11/14/2024 3:14 AM, Alex Deucher wrote:
> Have a separate work handler for each VCN instance. This
> paves the way for per instance VCN power gating at runtime.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 83 +++++++++++++------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 8 ++-
> drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 17 ++---
> drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 5 +-
> drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 5 +-
> drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 5 +-
> 10 files changed, 75 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index b4a550795470..3383e4146c6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -118,12 +118,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
> unsigned int fw_shared_size, log_offset;
> int i, r;
>
> - INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
> - mutex_init(&adev->vcn.vcn_pg_lock);
> mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
> - atomic_set(&adev->vcn.total_submission_cnt, 0);
> - for (i = 0; i < adev->vcn.num_vcn_inst; i++)
> + for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> + mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
> + adev->vcn.inst[i].adev = adev;
> + adev->vcn.inst[i].inst = i;
> + atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
> + INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
> atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
> + }
>
> if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
> (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
> @@ -271,10 +274,10 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
> amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
>
> amdgpu_ucode_release(&adev->vcn.inst[j].fw);
> + mutex_destroy(&adev->vcn.inst[j].vcn_pg_lock);
> }
>
> mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
> - mutex_destroy(&adev->vcn.vcn_pg_lock);
>
> return 0;
> }
> @@ -325,8 +328,10 @@ int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev)
> int amdgpu_vcn_suspend(struct amdgpu_device *adev)
> {
> bool in_ras_intr = amdgpu_ras_intr_triggered();
> + int i;
>
> - cancel_delayed_work_sync(&adev->vcn.idle_work);
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
> + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
>
> /* err_event_athub will corrupt VCPU buffer, so we need to
> * restore fw data and clear buffer in amdgpu_vcn_resume() */
> @@ -382,46 +387,45 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
>
> static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
> {
> - struct amdgpu_device *adev =
> - container_of(work, struct amdgpu_device, vcn.idle_work.work);
> + struct amdgpu_vcn_inst *vcn_inst =
> + container_of(work, struct amdgpu_vcn_inst, idle_work.work);
> + struct amdgpu_device *adev = vcn_inst->adev;
> unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
> - unsigned int i, j;
> + unsigned int i = vcn_inst->inst, j;
> int r = 0;
>
> - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
> - if (adev->vcn.harvest_config & (1 << j))
> - continue;
> -
> - for (i = 0; i < adev->vcn.num_enc_rings; ++i)
> - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
> + if (adev->vcn.harvest_config & (1 << i))
> + return;
This check is no longer required. Also, fence[AMDGPU_MAX_VCN_INSTANCES]
may be avoided. fences is good enough to count the total outstanding
fences for enc/dec rings.
Apart from that looks fine.
Thanks,
Lijo
>
> - /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
> - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
> - !adev->vcn.using_unified_queue) {
> - struct dpg_pause_state new_state;
> + for (j = 0; j < adev->vcn.num_enc_rings; ++j)
> + fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]);
>
> - if (fence[j] ||
> - unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
> - new_state.fw_based = VCN_DPG_STATE__PAUSE;
> - else
> - new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
> + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
> + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
> + !adev->vcn.using_unified_queue) {
> + struct dpg_pause_state new_state;
>
> - adev->vcn.pause_dpg_mode(adev, j, &new_state);
> - }
> + if (fence[i] ||
> + unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt)))
> + new_state.fw_based = VCN_DPG_STATE__PAUSE;
> + else
> + new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
>
> - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
> - fences += fence[j];
> + adev->vcn.pause_dpg_mode(adev, i, &new_state);
> }
>
> - if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
> + fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec);
> + fences += fence[i];
> +
> + if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
> amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
> - AMD_PG_STATE_GATE);
> + AMD_PG_STATE_GATE);
> r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
> - false);
> + false);
> if (r)
> dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
> } else {
> - schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
> + schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
> }
> }
>
> @@ -430,18 +434,18 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
> struct amdgpu_device *adev = ring->adev;
> int r = 0;
>
> - atomic_inc(&adev->vcn.total_submission_cnt);
> + atomic_inc(&adev->vcn.inst[ring->me].total_submission_cnt);
>
> - if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
> + if (!cancel_delayed_work_sync(&adev->vcn.inst[ring->me].idle_work)) {
> r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
> true);
> if (r)
> dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
> }
>
> - mutex_lock(&adev->vcn.vcn_pg_lock);
> + mutex_lock(&adev->vcn.inst[ring->me].vcn_pg_lock);
> amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
> - AMD_PG_STATE_UNGATE);
> + AMD_PG_STATE_UNGATE);
>
> /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
> if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
> @@ -466,7 +470,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
>
> adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
> }
> - mutex_unlock(&adev->vcn.vcn_pg_lock);
> + mutex_unlock(&adev->vcn.inst[ring->me].vcn_pg_lock);
> }
>
> void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
> @@ -479,9 +483,10 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
> !adev->vcn.using_unified_queue)
> atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
>
> - atomic_dec(&ring->adev->vcn.total_submission_cnt);
> + atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt);
>
> - schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
> + schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work,
> + VCN_IDLE_TIMEOUT);
> }
>
> int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> index 7b528123b36e..0d9a4d946eac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> @@ -279,6 +279,8 @@ struct amdgpu_vcn_fw_shared {
> };
>
> struct amdgpu_vcn_inst {
> + struct amdgpu_device *adev;
> + int inst;
> struct amdgpu_bo *vcpu_bo;
> void *cpu_addr;
> uint64_t gpu_addr;
> @@ -300,6 +302,9 @@ struct amdgpu_vcn_inst {
> const struct firmware *fw; /* VCN firmware */
> uint8_t vcn_config;
> uint32_t vcn_codec_disable_mask;
> + atomic_t total_submission_cnt;
> + struct mutex vcn_pg_lock;
> + struct delayed_work idle_work;
> };
>
> struct amdgpu_vcn_ras {
> @@ -308,7 +313,6 @@ struct amdgpu_vcn_ras {
>
> struct amdgpu_vcn {
> unsigned fw_version;
> - struct delayed_work idle_work;
> unsigned num_enc_rings;
> enum amd_powergating_state cur_state;
> bool indirect_sram;
> @@ -316,9 +320,7 @@ struct amdgpu_vcn {
> uint8_t num_vcn_inst;
> struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
> struct amdgpu_vcn_reg internal;
> - struct mutex vcn_pg_lock;
> struct mutex vcn1_jpeg1_workaround;
> - atomic_t total_submission_cnt;
>
> unsigned harvest_config;
> int (*pause_dpg_mode)(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> index 5ea96c983517..9ca964e11540 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> @@ -150,7 +150,7 @@ static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
> return r;
>
> /* Override the work func */
> - adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler;
> + adev->vcn.inst->idle_work.work.func = vcn_v1_0_idle_work_handler;
>
> amdgpu_vcn_setup_ucode(adev);
>
> @@ -277,7 +277,7 @@ static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
>
> - cancel_delayed_work_sync(&adev->vcn.idle_work);
> + cancel_delayed_work_sync(&adev->vcn.inst->idle_work);
>
> if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
> (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
> @@ -301,7 +301,7 @@ static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
> bool idle_work_unexecuted;
>
> - idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work);
> + idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.inst->idle_work);
> if (idle_work_unexecuted) {
> if (adev->pm.dpm_enabled)
> amdgpu_dpm_enable_vcn(adev, false, 0);
> @@ -1828,8 +1828,9 @@ static int vcn_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
>
> static void vcn_v1_0_idle_work_handler(struct work_struct *work)
> {
> - struct amdgpu_device *adev =
> - container_of(work, struct amdgpu_device, vcn.idle_work.work);
> + struct amdgpu_vcn_inst *vcn_inst =
> + container_of(work, struct amdgpu_vcn_inst, idle_work.work);
> + struct amdgpu_device *adev = vcn_inst->adev;
> unsigned int fences = 0, i;
>
> for (i = 0; i < adev->vcn.num_enc_rings; ++i)
> @@ -1862,14 +1863,14 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
> amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
> AMD_PG_STATE_GATE);
> } else {
> - schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
> + schedule_delayed_work(&adev->vcn.inst->idle_work, VCN_IDLE_TIMEOUT);
> }
> }
>
> static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
> {
> struct amdgpu_device *adev = ring->adev;
> - bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
> + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst->idle_work);
>
> mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
>
> @@ -1921,7 +1922,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
>
> void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring)
> {
> - schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
> + schedule_delayed_work(&ring->adev->vcn.inst->idle_work, VCN_IDLE_TIMEOUT);
> mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
> index e42cfc731ad8..115f33c3ab68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
> @@ -313,7 +313,7 @@ static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
>
> - cancel_delayed_work_sync(&adev->vcn.idle_work);
> + cancel_delayed_work_sync(&adev->vcn.inst->idle_work);
>
> if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
> (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> index b9be304aa294..6fb08ed09310 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> @@ -390,12 +390,12 @@ static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
> int i;
>
> - cancel_delayed_work_sync(&adev->vcn.idle_work);
> -
> for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> if (adev->vcn.harvest_config & (1 << i))
> continue;
>
> + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
> +
> if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
> (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
> RREG32_SOC15(VCN, i, mmUVD_STATUS)))
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> index a3627700ed48..70a1b85a4efa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> @@ -422,12 +422,12 @@ static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
> int i;
>
> - cancel_delayed_work_sync(&adev->vcn.idle_work);
> -
> for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> if (adev->vcn.harvest_config & (1 << i))
> continue;
>
> + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
> +
> if (!amdgpu_sriov_vf(adev)) {
> if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
> (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
> index e11bd6cae8fc..656c2a1c4bf9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
> @@ -359,11 +359,12 @@ static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
> int i;
>
> - cancel_delayed_work_sync(&adev->vcn.idle_work);
> -
> for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> if (adev->vcn.harvest_config & (1 << i))
> continue;
> +
> + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
> +
> if (!amdgpu_sriov_vf(adev)) {
> if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
> (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> index d4d92d4e2cab..8eea78c498da 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> @@ -331,8 +331,10 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
> static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> + int i;
>
> - cancel_delayed_work_sync(&adev->vcn.idle_work);
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
> + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
>
> if (adev->vcn.cur_state != AMD_PG_STATE_GATE)
> vcn_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
> index 26bd1954b5e3..7086f98c2ddc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
> @@ -300,11 +300,12 @@ static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
> int i;
>
> - cancel_delayed_work_sync(&adev->vcn.idle_work);
> -
> for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> if (adev->vcn.harvest_config & (1 << i))
> continue;
> +
> + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
> +
> if (!amdgpu_sriov_vf(adev)) {
> if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
> (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
> index 95b31167f552..4c641813aeff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
> @@ -274,11 +274,12 @@ static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
> int i;
>
> - cancel_delayed_work_sync(&adev->vcn.idle_work);
> -
> for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> if (adev->vcn.harvest_config & (1 << i))
> continue;
> +
> + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
> +
> if (!amdgpu_sriov_vf(adev)) {
> if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
> (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
More information about the amd-gfx
mailing list