<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Calibri;font-size:10pt;color:#0000FF;margin:5pt;font-style:normal;font-weight:normal;text-decoration:none;" align="Left">
[AMD Official Use Only - AMD Internal Distribution Only]<br>
</p>
<br>
<div>
<p class="elementToProof" style="margin-top: 0px; margin-bottom: 0px;"><span style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: black;">V4 is Reviewed-by: Boyuan Zhang
<a href="mailto:Boyuan.Zhang@amd.com" id="OWAe9ae1d9d-ee89-4485-f7c4-71ccfa2343b8" class="OWAAutoLink" title="mailto:Boyuan.Zhang@amd.com" style="margin-top: 0px; margin-bottom: 0px;" data-linkindex="1">
<Boyuan.Zhang@amd.com></a></span></p>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> amd-gfx <amd-gfx-bounces@lists.freedesktop.org> on behalf of Alex Deucher <alexdeucher@gmail.com><br>
<b>Sent:</b> March 7, 2025 10:22 AM<br>
<b>To:</b> Deucher, Alexander <Alexander.Deucher@amd.com><br>
<b>Cc:</b> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Subject:</b> Re: [PATCH] drm/amdgpu/vcn: fix idle work handler for VCN 2.5</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">Ping?  This fixes a regression on VCN 2.5.<br>
<br>
Thanks,<br>
<br>
Alex<br>
<br>
On Thu, Mar 6, 2025 at 10:05 AM Alex Deucher <alexdeucher@gmail.com> wrote:<br>
><br>
> Ping?<br>
><br>
> Thanks,<br>
><br>
> Alex<br>
><br>
> On Wed, Mar 5, 2025 at 2:42 PM Alex Deucher <alexander.deucher@amd.com> wrote:<br>
> ><br>
> > VCN 2.5 uses the PG callback to enable VCN DPM which is<br>
> > a global state.  As such, we need to make sure all instances<br>
> > are in the same state.<br>
> ><br>
> > v2: switch to a ref count (Lijo)<br>
> > v3: switch to its own idle work handler<br>
> > v4: fix logic in DPG handling<br>
> ><br>
> > Fixes: 4ce4fe27205c ("drm/amdgpu/vcn: use per instance callbacks for idle work handler")<br>
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com><br>
> > ---<br>
> >  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 120 +++++++++++++++++++++++++-<br>
> >  1 file changed, 116 insertions(+), 4 deletions(-)<br>
> ><br>
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c<br>
> > index dff1a88590363..ff03436698a4f 100644<br>
> > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c<br>
> > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c<br>
> > @@ -107,6 +107,115 @@ static int amdgpu_ih_clientid_vcns[] = {<br>
> >         SOC15_IH_CLIENTID_VCN1<br>
> >  };<br>
> ><br>
> > +static void vcn_v2_5_idle_work_handler(struct work_struct *work)<br>
> > +{<br>
> > +       struct amdgpu_vcn_inst *vcn_inst =<br>
> > +               container_of(work, struct amdgpu_vcn_inst, idle_work.work);<br>
> > +       struct amdgpu_device *adev = vcn_inst->adev;<br>
> > +       unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};<br>
> > +       unsigned int i, j;<br>
> > +       int r = 0;<br>
> > +<br>
> > +       for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {<br>
> > +               struct amdgpu_vcn_inst *v = &adev->vcn.inst[i];<br>
> > +<br>
> > +               if (adev->vcn.harvest_config & (1 << i))<br>
> > +                       continue;<br>
> > +<br>
> > +               for (j = 0; j < v->num_enc_rings; ++j)<br>
> > +                       fence[i] += amdgpu_fence_count_emitted(&v->ring_enc[j]);<br>
> > +<br>
> > +               /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */<br>
> > +               if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&<br>
> > +                   !v->using_unified_queue) {<br>
> > +                       struct dpg_pause_state new_state;<br>
> > +<br>
> > +                       if (fence[i] ||<br>
> > +                           unlikely(atomic_read(&v->dpg_enc_submission_cnt)))<br>
> > +                               new_state.fw_based = VCN_DPG_STATE__PAUSE;<br>
> > +                       else<br>
> > +                               new_state.fw_based = VCN_DPG_STATE__UNPAUSE;<br>
> > +<br>
> > +                       v->pause_dpg_mode(v, &new_state);<br>
> > +               }<br>
> > +<br>
> > +               fence[i] += amdgpu_fence_count_emitted(&v->ring_dec);<br>
> > +               fences += fence[i];<br>
> > +<br>
> > +       }<br>
> > +<br>
> > +       if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) {<br>
> > +               amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,<br>
> > +                                                      AMD_PG_STATE_GATE);<br>
> > +               r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,<br>
> > +                                                   false);<br>
> > +               if (r)<br>
> > +                       dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);<br>
> > +       } else {<br>
> > +               schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);<br>
> > +       }<br>
> > +}<br>
> > +<br>
> > +static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)<br>
> > +{<br>
> > +       struct amdgpu_device *adev = ring->adev;<br>
> > +       struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me];<br>
> > +       int r = 0;<br>
> > +<br>
> > +       atomic_inc(&adev->vcn.inst[0].total_submission_cnt);<br>
> > +<br>
> > +       if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) {<br>
> > +               r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,<br>
> > +                                                   true);<br>
> > +               if (r)<br>
> > +                       dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);<br>
> > +       }<br>
> > +<br>
> > +       mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);<br>
> > +       amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,<br>
> > +                                              AMD_PG_STATE_UNGATE);<br>
> > +<br>
> > +       /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */<br>
> > +       if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&<br>
> > +           !v->using_unified_queue) {<br>
> > +               struct dpg_pause_state new_state;<br>
> > +<br>
> > +               if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {<br>
> > +                       atomic_inc(&v->dpg_enc_submission_cnt);<br>
> > +                       new_state.fw_based = VCN_DPG_STATE__PAUSE;<br>
> > +               } else {<br>
> > +                       unsigned int fences = 0;<br>
> > +                       unsigned int i;<br>
> > +<br>
> > +                       for (i = 0; i < v->num_enc_rings; ++i)<br>
> > +                               fences += amdgpu_fence_count_emitted(&v->ring_enc[i]);<br>
> > +<br>
> > +                       if (fences || atomic_read(&v->dpg_enc_submission_cnt))<br>
> > +                               new_state.fw_based = VCN_DPG_STATE__PAUSE;<br>
> > +                       else<br>
> > +                               new_state.fw_based = VCN_DPG_STATE__UNPAUSE;<br>
> > +               }<br>
> > +               v->pause_dpg_mode(v, &new_state);<br>
> > +       }<br>
> > +       mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock);<br>
> > +}<br>
> > +<br>
> > +static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring)<br>
> > +{<br>
> > +       struct amdgpu_device *adev = ring->adev;<br>
> > +<br>
> > +       /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */<br>
> > +       if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&<br>
> > +           ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&<br>
> > +           !adev->vcn.inst[ring->me].using_unified_queue)<br>
> > +               atomic_dec(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);<br>
> > +<br>
> > +       atomic_dec(&adev->vcn.inst[0].total_submission_cnt);<br>
> > +<br>
> > +       schedule_delayed_work(&adev->vcn.inst[0].idle_work,<br>
> > +                             VCN_IDLE_TIMEOUT);<br>
> > +}<br>
> > +<br>
> >  /**<br>
> >   * vcn_v2_5_early_init - set function pointers and load microcode<br>
> >   *<br>
> > @@ -201,6 +310,9 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block)<br>
> >                 if (r)<br>
> >                         return r;<br>
> ><br>
> > +               /* Override the work func */<br>
> > +               adev->vcn.inst[j].idle_work.work.func = vcn_v2_5_idle_work_handler;<br>
> > +<br>
> >                 amdgpu_vcn_setup_ucode(adev, j);<br>
> ><br>
> >                 r = amdgpu_vcn_resume(adev, j);<br>
> > @@ -1661,8 +1773,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {<br>
> >         .insert_start = vcn_v2_0_dec_ring_insert_start,<br>
> >         .insert_end = vcn_v2_0_dec_ring_insert_end,<br>
> >         .pad_ib = amdgpu_ring_generic_pad_ib,<br>
> > -       .begin_use = amdgpu_vcn_ring_begin_use,<br>
> > -       .end_use = amdgpu_vcn_ring_end_use,<br>
> > +       .begin_use = vcn_v2_5_ring_begin_use,<br>
> > +       .end_use = vcn_v2_5_ring_end_use,<br>
> >         .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,<br>
> >         .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,<br>
> >         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,<br>
> > @@ -1759,8 +1871,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {<br>
> >         .insert_nop = amdgpu_ring_insert_nop,<br>
> >         .insert_end = vcn_v2_0_enc_ring_insert_end,<br>
> >         .pad_ib = amdgpu_ring_generic_pad_ib,<br>
> > -       .begin_use = amdgpu_vcn_ring_begin_use,<br>
> > -       .end_use = amdgpu_vcn_ring_end_use,<br>
> > +       .begin_use = vcn_v2_5_ring_begin_use,<br>
> > +       .end_use = vcn_v2_5_ring_end_use,<br>
> >         .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,<br>
> >         .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,<br>
> >         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,<br>
> > --<br>
> > 2.48.1<br>
> ><br>
</div>
</span></font></div>
</div>
</body>
</html>