[PATCH] drm/amdgpu: SI support for UVD and VCE power managment
Luben Tuikov
luben.tuikov at amd.com
Fri Jun 26 01:01:07 UTC 2020
On 2020-06-24 7:49 a.m., Alex Jivin wrote:
> Port functionality from the Radeon driver to support
> UVD and VCE power management.
>
> Signed-off-by: Alex Jivin <alex.jivin at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 67 +++--
> drivers/gpu/drm/amd/amdgpu/si.c | 361 ++++++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/si_dpm.c | 19 ++
> drivers/gpu/drm/amd/amdgpu/sid.h | 32 +++
> 5 files changed, 462 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 6c7dd0a707c9..4b5a9a259a21 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1083,6 +1083,15 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
> tmp_ |= ((val) & ~(mask)); \
> WREG32_PLL(reg, tmp_); \
> } while (0)
> +
> +#define WREG32_SMC_P(_Reg, _Val, _Mask) \
> + do { \
> + u32 tmp = RREG32_SMC(_Reg); \
> + tmp &= (_Mask); \
> + tmp |= ((_Val) & ~(_Mask)); \
> + WREG32_SMC(_Reg, tmp); \
> + } while (0)
> +
> #define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false))
> #define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg))
> #define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index 347b06d3c140..26c8e39a78bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -3558,21 +3558,36 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
> {
> int ret = 0;
There is no reason to pre-initialize "ret" to 0.
Remove it here and define it locally in the "else"
as "int ret;".
>
> - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
> - if (ret)
> - DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
> - enable ? "enable" : "disable", ret);
> -
> - /* enable/disable Low Memory PState for UVD (4k videos) */
> - if (adev->asic_type == CHIP_STONEY &&
> - adev->uvd.decode_image_width >= WIDTH_4K) {
> - struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
> + if (adev->family == AMDGPU_FAMILY_SI) {
> + if (enable) {
> + mutex_lock(&adev->pm.mutex);
> + adev->pm.dpm.uvd_active = true;
> + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
> + mutex_unlock(&adev->pm.mutex);
> + } else {
> + mutex_lock(&adev->pm.mutex);
> + adev->pm.dpm.uvd_active = false;
> + mutex_unlock(&adev->pm.mutex);
> + }
Move the mutex around the comound "if" to have single matching
mutex_lock()
if (...) {
...
} else {
...
}
mutex_unlock();
I'd not be surprised if the compiler does this already, as it can prove
that the mutex will be taken and released, regardless of the value of "enable".
>
> - if (hwmgr && hwmgr->hwmgr_func &&
> - hwmgr->hwmgr_func->update_nbdpm_pstate)
> - hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr,
> - !enable,
> - true);
> + amdgpu_pm_compute_clocks(adev);
> + } else {
> + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
> + if (ret)
> + DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
Remove the space before the quoted newline.
> + enable ? "enable" : "disable", ret);
> +
> + /* enable/disable Low Memory PState for UVD (4k videos) */
> + if (adev->asic_type == CHIP_STONEY &&
> + adev->uvd.decode_image_width >= WIDTH_4K) {
The line above should be aligned with the open parenthesis.
> + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
> +
> + if (hwmgr && hwmgr->hwmgr_func &&
> + hwmgr->hwmgr_func->update_nbdpm_pstate)
> + hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr,
> + !enable,
> + true);
> + }
> }
> }
>
> @@ -3580,10 +3595,26 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
> {
> int ret = 0;
int ret;
>
> - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
> - if (ret)
> - DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
> - enable ? "enable" : "disable", ret);
> + if (adev->family == AMDGPU_FAMILY_SI) {
> + if (enable) {
> + mutex_lock(&adev->pm.mutex);
> + adev->pm.dpm.vce_active = true;
> + /* XXX select vce level based on ring/task */
> + adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
> + mutex_unlock(&adev->pm.mutex);
> + } else {
> + mutex_lock(&adev->pm.mutex);
> + adev->pm.dpm.vce_active = false;
> + mutex_unlock(&adev->pm.mutex);
> + }
Move the mutex outside the if() as it is independent of
the conditional "if (enable)", i.e. the mutex is ALWAYS
taken.
> +
> + amdgpu_pm_compute_clocks(adev);
> + } else {
> + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
> + if (ret)
> + DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
Remove the space before the quoted newline.
> + enable ? "enable" : "disable", ret);
> + }
> }
>
> void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
> index cda9aa5e4b9e..471489436b49 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> @@ -1266,12 +1266,6 @@ static u32 si_get_xclk(struct amdgpu_device *adev)
> return reference_clock;
> }
>
> -//xxx:not implemented
> -static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
> -{
> - return 0;
> -}
> -
> static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
> {
> if (!ring || !ring->funcs->emit_wreg) {
> @@ -1427,6 +1421,358 @@ static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
> return (nak_r + nak_g);
> }
>
> +static int si_uvd_send_upll_ctlreq(struct amdgpu_device *adev,
> + unsigned cg_upll_func_cntl)
> +{
> + unsigned i;
Use "unsigned int".
Or just "int". Both will do equally
well to iterate from 0 to SI_MAX_CTLACKS_ASSERTION_WAIT.
You're not using this varaible in "size_t" promotion,
so it's all good.
> +
> + /* Make sure UPLL_CTLREQ is deasserted */
> + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
> +
> + mdelay(10);
> +
> + /* Assert UPLL_CTLREQ */
> + WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
Here you'll probably need to wait a little bit, for the hardware
to actually act on your register write, before
you read the status below. In my experience CPUs are very fast
compared to exernal HW over a bus, and the first back-to-back
READ will almost certainly return "not yet".
Traditionally the pause here to "do something" is larger than
the "polling pause" below--the "polling pause" being smaller
as it could be done "anytime now".
> +
> + /* Wait for CTLACK and CTLACK2 to get asserted */
> + for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) {
> + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
> +
> + if ((RREG32(cg_upll_func_cntl) & mask) == mask)
> + break;
> + mdelay(10);
> + }
> +
> + /* Deassert UPLL_CTLREQ */
> + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
> +
> + if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
> + DRM_ERROR("Timeout setting UVD clocks!\n");
> + return -ETIMEDOUT;
There is a corner case here. Look at your loop above.
You check for assertion, then you wait 10 ms, then
increment i, and if i >= SI_MAX... you quit and declare
defeat.
What if it was asserted during that very last delay?
10 ms is a long time...
> + }
> +
> + return 0;
> +}
> +
> +static unsigned si_uvd_calc_upll_post_div(unsigned vco_freq,
> + unsigned target_freq,
> + unsigned pd_min,
> + unsigned pd_even)
> +{
> + unsigned post_div = vco_freq / target_freq;
unsigned int;
> +
> + /* Adjust to post divider minimum value */
> + if (post_div < pd_min)
> + post_div = pd_min;
> +
> + /* We alway need a frequency less than or equal the target */
> + if ((vco_freq / post_div) > target_freq)
> + post_div += 1;
Had we not conditionally overwritten post_div with pd_min above,
then this check is equivlent to checking "vco_freq % target_freq > 0"
If there is a remainder, then increment post_div. It's almost as if
it is trying to find a divisor of vco_freq to get target_freq without
a remainder--when plugged into the loop of the calling function.
> +
> + /* Post dividers above a certain value must be even */
> + if (post_div > pd_even && post_div % 2)
> + post_div += 1;
If we didn't have the "if (post_div > pd_even"
then this would be just a single line without
a conditional:
post_div = (post_div + 1) & ~1.
> +
> + return post_div;
> +}
> +
> +/**
> + * si_calc_upll_dividers - calc UPLL clock dividers
> + *
> + * @adev: amdgpu_device pointer
> + * @vclk: wanted VCLK
> + * @dclk: wanted DCLK
> + * @vco_min: minimum VCO frequency
> + * @vco_max: maximum VCO frequency
> + * @fb_factor: factor to multiply vco freq with
> + * @fb_mask: limit and bitmask for feedback divider
> + * @pd_min: post divider minimum
> + * @pd_max: post divider maximum
> + * @pd_even: post divider must be even above this value
> + * @optimal_fb_div: resulting feedback divider
> + * @optimal_vclk_div: resulting vclk post divider
> + * @optimal_dclk_div: resulting dclk post divider
> + *
> + * Calculate dividers for UVDs UPLL (except APUs).
A more detailed comment here would be good--explaining exactly
what is wanted and how it is achieved. The iterative (almost
Newtonian-approximation like) method of calculating those values
should be described and most importantly, regardless of the method
used, what we want to get.
> + * Returns zero on success; -EINVAL on error.
> + */
> +static int si_calc_upll_dividers(struct amdgpu_device *adev,
> + unsigned vclk, unsigned dclk,
> + unsigned vco_min, unsigned vco_max,
> + unsigned fb_factor, unsigned fb_mask,
> + unsigned pd_min, unsigned pd_max,
> + unsigned pd_even,
> + unsigned *optimal_fb_div,
> + unsigned *optimal_vclk_div,
> + unsigned *optimal_dclk_div)
> +{
> + unsigned vco_freq, ref_freq = adev->clock.spll.reference_freq;
> +
> + /* Start off with something large */
> + unsigned optimal_score = ~0;
unsigned int;
> +
> + /* Loop through vco from low to high */
> + vco_min = max(max(vco_min, vclk), dclk);
> + for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
> + uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
> + unsigned vclk_div, dclk_div, score;
> +
> + do_div(fb_div, ref_freq);
> +
> + /* fb div out of range ? */
> + if (fb_div > fb_mask)
> + break; /* It can oly get worse */
> +
> + fb_div &= fb_mask;
> +
> + /* Calc vclk divider with current vco freq */
> + vclk_div = si_uvd_calc_upll_post_div(vco_freq, vclk,
> + pd_min, pd_even);
> + if (vclk_div > pd_max)
> + break; /* vco is too big, it has to stop */
> +
> + /* Calc dclk divider with current vco freq */
> + dclk_div = si_uvd_calc_upll_post_div(vco_freq, dclk,
> + pd_min, pd_even);
> + if (dclk_div > pd_max)
> + break; /* vco is too big, it has to stop */
> +
> + /* Calc score with current vco freq */
> + score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
> +
> + /* Determine if this vco setting is better than current optimal settings */
> + if (score < optimal_score) {
> + *optimal_fb_div = fb_div;
> + *optimal_vclk_div = vclk_div;
> + *optimal_dclk_div = dclk_div;
> + optimal_score = score;
> + if (optimal_score == 0)
> + break; /* It can't get better than this */
> + }
> + }
Isn't there a closed form of this iterative calculation?
(I feel there should be, but not sure what it is trying to achieve.)
> +
> + /* Did we found a valid setup ? */
> + if (optimal_score == ~0)
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
> +{
> + unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
unsigned int;
> + int a;
> +
> + /* Bypass vclk and dclk with bclk */
> + WREG32_P(CG_UPLL_FUNC_CNTL_2,
> + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
> + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
> +
> + /* Put PLL in bypass mode */
> + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
> +
> + if (!vclk || !dclk) {
> + /* Keep the Bypass mode */
> + return 0;
> + }
> +
> + a = si_calc_upll_dividers(adev, vclk, dclk, 125000, 250000,
> + 16384, 0x03FFFFFF, 0, 128, 5,
> + &fb_div, &vclk_div, &dclk_div);
Hardcoded constants?
Will those hadcoded constants make sense for all boards in amdgpu?
How about the hardcoded time delays?
> + if (a)
> + return a;
> +
> + /* Set RESET_ANTI_MUX to 0 */
> + WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
> +
> + /* Set VCO_MODE to 1 */
> + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
> +
> + /* Disable sleep mode */
> + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
> +
> + /* Deassert UPLL_RESET */
> + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
> +
> + mdelay(1);
> +
> + a = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL);
> + if (a)
> + return a;
> +
> + /* Assert UPLL_RESET again */
> + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
> +
> + /* Disable spread spectrum. */
> + WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
> +
> + /* Set feedback divider */
> + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
> +
> + /* Set ref divider to 0 */
> + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
> +
> + if (fb_div < 307200)
> + WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
> + else
> + WREG32_P(CG_UPLL_FUNC_CNTL_4,
> + UPLL_SPARE_ISPARE9,
> + ~UPLL_SPARE_ISPARE9);
> +
> + /* Set PDIV_A and PDIV_B */
> + WREG32_P(CG_UPLL_FUNC_CNTL_2,
> + UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
> + ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
> +
> + /* Give the PLL some time to settle */
> + mdelay(15);
> +
> + /* Deassert PLL_RESET */
> + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
> +
> + mdelay(15);
> +
> + /* Switch from bypass mode to normal mode */
> + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
> +
> + a = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL);
> + if (a)
> + return a;
> +
> + /* Switch VCLK and DCLK selection */
> + WREG32_P(CG_UPLL_FUNC_CNTL_2,
> + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
> + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
> +
> + mdelay(100);
> +
> + return 0;
> +}
> +
> +static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev)
> +{
> + unsigned i;
> +
> + /* Make sure VCEPLL_CTLREQ is deasserted */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
> +
> + mdelay(10);
> +
> + /* Assert UPLL_CTLREQ */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
> +
> + /* Wait for CTLACK and CTLACK2 to get asserted */
> + for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) {
> + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
> +
> + if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
> + break;
> + mdelay(10);
> + }
> +
> + /* Deassert UPLL_CTLREQ */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
> +
> + if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
> + DRM_ERROR("Timeout setting UVD clocks!\n");
> + return -ETIMEDOUT;
> + }
> +
> + return 0;
> +}
> +
> +static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
> +{
> + unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
unsigned int;
> + int a;
> +
> + /* Bypass evclk and ecclk with bclk */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
> + EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
> + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
> +
> + /* Put PLL in bypass mode */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
> + ~VCEPLL_BYPASS_EN_MASK);
> +
> + if (!evclk || !ecclk) {
> + /* Keep the Bypass mode, put PLL to sleep */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
> + ~VCEPLL_SLEEP_MASK);
> + return 0;
> + }
> +
> + a = si_calc_upll_dividers(adev, evclk, ecclk, 125000, 250000,
> + 16384, 0x03FFFFFF, 0, 128, 5,
> + &fb_div, &evclk_div, &ecclk_div);
> + if (a)
> + return a;
> +
> + /* Set RESET_ANTI_MUX to 0 */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
> +
> + /* Set VCO_MODE to 1 */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
> + ~VCEPLL_VCO_MODE_MASK);
> +
> + /* Toggle VCEPLL_SLEEP to 1 then back to 0 */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
> + ~VCEPLL_SLEEP_MASK);
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
> +
> + /* Deassert VCEPLL_RESET */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
> +
> + mdelay(1);
> +
> + a = si_vce_send_vcepll_ctlreq(adev);
> + if (a)
> + return a;
> +
> + /* Assert VCEPLL_RESET again */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
> +
> + /* Disable spread spectrum. */
> + WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
> +
> + /* Set feedback divider */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3,
> + VCEPLL_FB_DIV(fb_div),
> + ~VCEPLL_FB_DIV_MASK);
> +
> + /* Set ref divider to 0 */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
> +
> + /* Set PDIV_A and PDIV_B */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
> + VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
> + ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
> +
> + /* Give the PLL some time to settle */
> + mdelay(15);
> +
> + /* Deassert PLL_RESET */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
> +
> + mdelay(15);
> +
> + /* Switch from bypass mode to normal mode */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
> +
> + a = si_vce_send_vcepll_ctlreq(adev);
> + if (a)
> + return a;
> +
> + /* Switch VCLK and DCLK selection */
> + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
> + EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
> + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
> +
> + mdelay(100);
> +
> + return 0;
> +}
> +
> static const struct amdgpu_asic_funcs si_asic_funcs =
> {
> .read_disabled_bios = &si_read_disabled_bios,
> @@ -1437,7 +1783,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
> .set_vga_state = &si_vga_set_state,
> .get_xclk = &si_get_xclk,
> .set_uvd_clocks = &si_set_uvd_clocks,
> - .set_vce_clocks = NULL,
> + .set_vce_clocks = &si_set_vce_clocks,
> .get_pcie_lanes = &si_get_pcie_lanes,
> .set_pcie_lanes = &si_set_pcie_lanes,
> .get_config_memsize = &si_get_config_memsize,
> @@ -2228,4 +2574,3 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
> }
> return 0;
> }
> -
> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
> index c00ba4b23c9a..ea914b256ebd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
> @@ -6953,6 +6953,24 @@ static int si_power_control_set_level(struct amdgpu_device *adev)
> return 0;
> }
>
> +static void si_set_vce_clock(struct amdgpu_device *adev,
> + struct amdgpu_ps *new_rps,
> + struct amdgpu_ps *old_rps)
> +{
> + if ((old_rps->evclk != new_rps->evclk) ||
> + (old_rps->ecclk != new_rps->ecclk)) {
You don't need parenthesis around != due to precedence rules.
> + /* Turn the clocks on when encoding, off otherwise */
> + if (new_rps->evclk || new_rps->ecclk) {
> + /* Place holder for future VCE1.0 porting to amdgpu
> + vce_v1_0_enable_mgcg(adev, false, false);*/
Don't comment out code like this as it is not very visible that it is a comment.
Instead the kernel prefers block comments like this:
/* Existing comment.
* commented out code;
* ...
*/
> + } else {
> + /* Place holder for future VCE1.0 porting to amdgpu
> + vce_v1_0_enable_mgcg(adev, true, false);
> + amdgpu_asic_set_vce_clocks(adev, new_rps->evclk, new_rps->ecclk);*/
See note on comment above.
Since the porting is done (with this patch), shouldn't these be uncommented?
Regrds,
Luben
> + }
> + }
> +}
> +
> static int si_dpm_set_power_state(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -7029,6 +7047,7 @@ static int si_dpm_set_power_state(void *handle)
> return ret;
> }
> ni_set_uvd_clock_after_set_eng_clock(adev, new_ps, old_ps);
> + si_set_vce_clock(adev, new_ps, old_ps);
> if (eg_pi->pcie_performance_request)
> si_notify_link_speed_change_after_state_change(adev, new_ps, old_ps);
> ret = si_set_power_state_conditionally_enable_ulv(adev, new_ps);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
> index 75b5d441b628..6156a795ad6f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sid.h
> +++ b/drivers/gpu/drm/amd/amdgpu/sid.h
> @@ -47,6 +47,7 @@
> #define SI_MAX_LDS_NUM 0xFFFF
> #define SI_MAX_TCC 16
> #define SI_MAX_TCC_MASK 0xFFFF
> +#define SI_MAX_CTLACKS_ASSERTION_WAIT 100
>
> #define AMDGPU_NUM_OF_VMIDS 8
>
> @@ -2479,4 +2480,35 @@
>
> #define MC_VM_FB_OFFSET 0x81a
>
> +/* Discrete VCE clocks */
> +#define CG_VCEPLL_FUNC_CNTL 0xc0030600
> +#define VCEPLL_RESET_MASK 0x00000001
> +#define VCEPLL_SLEEP_MASK 0x00000002
> +#define VCEPLL_BYPASS_EN_MASK 0x00000004
> +#define VCEPLL_CTLREQ_MASK 0x00000008
> +#define VCEPLL_VCO_MODE_MASK 0x00000600
> +#define VCEPLL_REF_DIV_MASK 0x003F0000
> +#define VCEPLL_CTLACK_MASK 0x40000000
> +#define VCEPLL_CTLACK2_MASK 0x80000000
> +
> +#define CG_VCEPLL_FUNC_CNTL_2 0xc0030601
> +#define VCEPLL_PDIV_A(x) ((x) << 0)
> +#define VCEPLL_PDIV_A_MASK 0x0000007F
> +#define VCEPLL_PDIV_B(x) ((x) << 8)
> +#define VCEPLL_PDIV_B_MASK 0x00007F00
> +#define EVCLK_SRC_SEL(x) ((x) << 20)
> +#define EVCLK_SRC_SEL_MASK 0x01F00000
> +#define ECCLK_SRC_SEL(x) ((x) << 25)
> +#define ECCLK_SRC_SEL_MASK 0x3E000000
> +
> +#define CG_VCEPLL_FUNC_CNTL_3 0xc0030602
> +#define VCEPLL_FB_DIV(x) ((x) << 0)
> +#define VCEPLL_FB_DIV_MASK 0x01FFFFFF
> +
> +#define CG_VCEPLL_FUNC_CNTL_4 0xc0030603
> +
> +#define CG_VCEPLL_FUNC_CNTL_5 0xc0030604
> +#define CG_VCEPLL_SPREAD_SPECTRUM 0xc0030606
> +#define VCEPLL_SSEN_MASK 0x00000001
> +
> #endif
>
More information about the amd-gfx
mailing list