drm-openchrome: Branch 'drm-next-6.8' - 74 commits - drivers/gpu/drm

Tue Jan 23 21:18:17 UTC 2024

drivers/gpu/drm/amd/amdgpu/amdgpu.h                                    |    3 
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c                             |   11 
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h                             |    2 
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c                       |    4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c                             |   33 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c                          |    2 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c                                |   47 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c                                |   21 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c                                |    7 
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c                                |    2 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c                                |   26 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h                              |    1 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c                              |    2 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h                                 |    2 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c                                |   10 
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h                               |    1 
 drivers/gpu/drm/amd/amdgpu/athub_v3_0.c                                |    8 
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c                                 |   15 -
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c                                 |    5 
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c                               |   10 
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c                                 |    3 
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c                                 |    3 
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c                                  |    2 
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c                               |    6 
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                                  |    2 
 drivers/gpu/drm/amd/amdkfd/kfd_process.c                               |    7 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c                                   |   42 +--
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c                      |    8 
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c                |    2 
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c              |    5 
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c         |   92 +++++-
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h         |   11 
 drivers/gpu/drm/amd/display/dc/core/dc.c                               |   18 -
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c                      |   18 -
 drivers/gpu/drm/amd/display/dc/core/dc_state.c                         |    8 
 drivers/gpu/drm/amd/display/dc/dc.h                                    |    3 
 drivers/gpu/drm/amd/display/dc/dc_stream.h                             |    2 
 drivers/gpu/drm/amd/display/dc/dc_types.h                              |   12 
 drivers/gpu/drm/amd/display/dc/dce/dce_audio.c                         |    2 
 drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c                  |    9 
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c          |   14 +
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c                   |   11 
 drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c                   |    6 
 drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c                |   32 +-
 drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c          |    2 
 drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c              |    2 
 drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c                |    2 
 drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c                |   45 ---
 drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h                        |    1 
 drivers/gpu/drm/amd/display/dc/inc/resource.h                          |    3 
 drivers/gpu/drm/amd/display/dc/link/link_dpms.c                        |   50 +++
 drivers/gpu/drm/amd/display/dc/link/link_validation.c                  |   60 +++-
 drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c           |   36 +-
 drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c        |   60 +++-
 drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h        |    9 
 drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c |   11 
 drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c                 |   22 +
 drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c                 |   15 -
 drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c         |    2 
 drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h         |    3 
 drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c       |    2 
 drivers/gpu/drm/amd/display/include/audio_types.h                      |    2 
 drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h         |    8 
 drivers/gpu/drm/amd/pm/amdgpu_pm.c                                     |   28 +-
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c         |    2 
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c                    |   17 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c                   |   15 -
 drivers/gpu/drm/nouveau/nouveau_vmm.c                                  |    3 
 drivers/gpu/drm/xe/Kconfig                                             |    2 
 drivers/gpu/drm/xe/Makefile                                            |    1 
 drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h                 |    2 
 drivers/gpu/drm/xe/tests/xe_bo.c                                       |    5 
 drivers/gpu/drm/xe/tests/xe_migrate.c                                  |    2 
 drivers/gpu/drm/xe/xe_bo.c                                             |   16 -
 drivers/gpu/drm/xe/xe_device.c                                         |    2 
 drivers/gpu/drm/xe/xe_device_types.h                                   |    8 
 drivers/gpu/drm/xe/xe_exec.c                                           |    7 
 drivers/gpu/drm/xe/xe_exec_queue.c                                     |    5 
 drivers/gpu/drm/xe/xe_exec_queue_types.h                               |    6 
 drivers/gpu/drm/xe/xe_gt_freq.c                                        |    3 
 drivers/gpu/drm/xe/xe_guc.c                                            |    7 
 drivers/gpu/drm/xe/xe_guc_submit.c                                     |    7 
 drivers/gpu/drm/xe/xe_migrate.c                                        |  133 ++++++----
 drivers/gpu/drm/xe/xe_mmio.c                                           |    2 
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c                                 |    4 
 drivers/gpu/drm/xe/xe_vm.c                                             |   15 -
 86 files changed, 693 insertions(+), 464 deletions(-)

New commits:
commit 5e172468736649a1ba59a02d10538d09f535a399
Merge: d6b706bb0a20 009f0a64f9cc
Author: Kevin Brace <kevinbrace at bracecomputerlab.com>
Date:   Tue Jan 23 14:38:42 2024 -0600

    Merge tag 'drm-next-2024-01-19' of git://anongit.freedesktop.org/drm/drm into drm-next-6.8
    
    drm fixes for 6.8-rc1
    
    amdgpu:
    - DSC fixes
    - DC resource pool fixes
    - OTG fix
    - DML2 fixes
    - Aux fix
    - GFX10 RLC firmware handling fix
    - Revert a broken workaround for SMU 13.0.2
    - DC writeback fix
    - Enable gfxoff when ROCm apps are active on gfx11 with the proper FW version
    
    amdkfd:
    - Fix dma-buf exports using GEM handles
    
    nouveau:
    - fix a unneeded WARN_ON triggering
    
    xe:
    - Fix for definition of wakeref_t
    - Fix for an error code aliasing
    - Fix for VM_UNBIND_ALL in the case there are no bound VMAs
    - Fixes for a number of __iomem address space mismatches reported by sparse
    - Fixes for the assignment of exec_queue priority
    - A Fix for skip_guc_pc not taking effect
    - Workaround for a build problem on GCC 11
    - A couple of fixes for error paths
    - Fix a Flat CCS compression metadata copy issue
    - Fix a misplace array bounds checking
    - Don't have display support depend on EXPERT (as discussed on IRC)

commit 009f0a64f9ccee9db9d758b883059e5c74bb7330
Merge: cacea81390fd bf3ff145df18
Author: Dave Airlie <airlied at redhat.com>
Date:   Fri Jan 19 16:13:44 2024 +1000

    Merge tag 'drm-xe-next-fixes-2024-01-16' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
    
    Driver Changes:
    - Fix for definition of wakeref_t
    - Fix for an error code aliasing
    - Fix for VM_UNBIND_ALL in the case there are no bound VMAs
    - Fixes for a number of __iomem address space mismatches reported by sparse
    - Fixes for the assignment of exec_queue priority
    - A Fix for skip_guc_pc not taking effect
    - Workaround for a build problem on GCC 11
    - A couple of fixes for error paths
    - Fix a Flat CCS compression metadata copy issue
    - Fix a misplace array bounds checking
    - Don't have display support depend on EXPERT (as discussed on IRC)
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>
    
    From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom at linux.intel.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20240116102204.106520-1-thomas.hellstrom@linux.intel.com

commit cacea81390fd8c8c85404e5eb2adeb83d87a912e
Author: Dave Airlie <airlied at redhat.com>
Date:   Thu Jan 18 06:19:57 2024 +1000

    nouveau/vmm: don't set addr on the fail path to avoid warning
    
    nvif_vmm_put gets called if addr is set, but if the allocation
    fails we don't need to call put, otherwise we get a warning like
    
    [523232.435671] ------------[ cut here ]------------
    [523232.435674] WARNING: CPU: 8 PID: 1505697 at drivers/gpu/drm/nouveau/nvif/vmm.c:68 nvif_vmm_put+0x72/0x80 [nouveau]
    [523232.435795] Modules linked in: uinput rfcomm snd_seq_dummy snd_hrtimer nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink qrtr bnep sunrpc binfmt_misc intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common isst_if_common iwlmvm nfit libnvdimm vfat fat x86_pkg_temp_thermal intel_powerclamp mac80211 snd_soc_avs snd_soc_hda_codec coretemp snd_hda_ext_core snd_soc_core snd_hda_codec_realtek kvm_intel snd_hda_codec_hdmi snd_compress snd_hda_codec_generic ac97_bus snd_pcm_dmaengine snd_hda_intel libarc4 snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec kvm iwlwifi snd_hda_core btusb snd_hwdep btrtl snd_seq btintel irqbypass btbcm rapl snd_seq_device eeepc_wmi btmtk intel_cstate iTCO_wdt cfg80211 snd_pcm asus_wmi bluetooth intel_pmc_bxt iTCO_vendor_support s
 nd_timer ledtrig_audio pktcdvd snd mei_me
    [523232.435828]  sparse_keymap intel_uncore i2c_i801 platform_profile wmi_bmof mei pcspkr ioatdma soundcore i2c_smbus rfkill idma64 dca joydev acpi_tad loop zram nouveau drm_ttm_helper ttm video drm_exec drm_gpuvm gpu_sched crct10dif_pclmul i2c_algo_bit nvme crc32_pclmul crc32c_intel drm_display_helper polyval_clmulni nvme_core polyval_generic e1000e mxm_wmi cec ghash_clmulni_intel r8169 sha512_ssse3 nvme_common wmi pinctrl_sunrisepoint uas usb_storage ip6_tables ip_tables fuse
    [523232.435849] CPU: 8 PID: 1505697 Comm: gnome-shell Tainted: G        W          6.6.0-rc7-nvk-uapi+ #12
    [523232.435851] Hardware name: System manufacturer System Product Name/ROG STRIX X299-E GAMING II, BIOS 1301 09/24/2021
    [523232.435852] RIP: 0010:nvif_vmm_put+0x72/0x80 [nouveau]
    [523232.435934] Code: 00 00 48 89 e2 be 02 00 00 00 48 c7 04 24 00 00 00 00 48 89 44 24 08 e8 fc bf ff ff 85
    c0 75 0a 48 c7 43 08 00 00 00 00 eb b3 <0f> 0b eb f2 e8 f5 c9 b2 e6 0f 1f 44 00 00 90 90 90 90 90 90 90 90
    [523232.435936] RSP: 0018:ffffc900077ffbd8 EFLAGS: 00010282
    [523232.435937] RAX: 00000000fffffffe RBX: ffffc900077ffc00 RCX: 0000000000000010
    [523232.435938] RDX: 0000000000000010 RSI: ffffc900077ffb38 RDI: ffffc900077ffbd8
    [523232.435940] RBP: ffff888e1c4f2140 R08: 0000000000000000 R09: 0000000000000000
    [523232.435940] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888503811800
    [523232.435941] R13: ffffc900077ffca0 R14: ffff888e1c4f2140 R15: ffff88810317e1e0
    [523232.435942] FS:  00007f933a769640(0000) GS:ffff88905fa00000(0000) knlGS:0000000000000000
    [523232.435943] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [523232.435944] CR2: 00007f930bef7000 CR3: 00000005d0322001 CR4: 00000000003706e0
    [523232.435945] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    [523232.435946] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    [523232.435964] Call Trace:
    [523232.435965]  <TASK>
    [523232.435966]  ? nvif_vmm_put+0x72/0x80 [nouveau]
    [523232.436051]  ? __warn+0x81/0x130
    [523232.436055]  ? nvif_vmm_put+0x72/0x80 [nouveau]
    [523232.436138]  ? report_bug+0x171/0x1a0
    [523232.436142]  ? handle_bug+0x3c/0x80
    [523232.436144]  ? exc_invalid_op+0x17/0x70
    [523232.436145]  ? asm_exc_invalid_op+0x1a/0x20
    [523232.436149]  ? nvif_vmm_put+0x72/0x80 [nouveau]
    [523232.436230]  ? nvif_vmm_put+0x64/0x80 [nouveau]
    [523232.436342]  nouveau_vma_del+0x80/0xd0 [nouveau]
    [523232.436506]  nouveau_vma_new+0x1a0/0x210 [nouveau]
    [523232.436671]  nouveau_gem_object_open+0x1d0/0x1f0 [nouveau]
    [523232.436835]  drm_gem_handle_create_tail+0xd1/0x180
    [523232.436840]  drm_prime_fd_to_handle_ioctl+0x12e/0x200
    [523232.436844]  ? __pfx_drm_prime_fd_to_handle_ioctl+0x10/0x10
    [523232.436847]  drm_ioctl_kernel+0xd3/0x180
    [523232.436849]  drm_ioctl+0x26d/0x4b0
    [523232.436851]  ? __pfx_drm_prime_fd_to_handle_ioctl+0x10/0x10
    [523232.436855]  nouveau_drm_ioctl+0x5a/0xb0 [nouveau]
    [523232.437032]  __x64_sys_ioctl+0x94/0xd0
    [523232.437036]  do_syscall_64+0x5d/0x90
    [523232.437040]  ? syscall_exit_to_user_mode+0x2b/0x40
    [523232.437044]  ? do_syscall_64+0x6c/0x90
    [523232.437046]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
    
    Reported-by: Faith Ekstrand <faith.ekstrand at collabora.com>
    Cc: stable at vger.kernel.org
    Signed-off-by: Dave Airlie <airlied at redhat.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20240117213852.295565-1-airlied@gmail.com

diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index a6602c012671..3dda885df5b2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -108,6 +108,9 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm,
 	} else {
 		ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0,
 				   mem->mem.size, &tmp);
+		if (ret)
+			goto done;
+
 		vma->addr = tmp.addr;
 	}
 
commit 0a1123c7b9f17fb06cc51fb9ce2f880a512be408
Merge: 205e18c13545 aa0901a9008e
Author: Dave Airlie <airlied at redhat.com>
Date:   Fri Jan 19 10:21:26 2024 +1000

    Merge tag 'amd-drm-fixes-6.8-2024-01-18' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
    
    amd-drm-fixes-6.8-2024-01-18:
    
    amdgpu:
    - DSC fixes
    - DC resource pool fixes
    - OTG fix
    - DML2 fixes
    - Aux fix
    - GFX10 RLC firmware handling fix
    - Revert a broken workaround for SMU 13.0.2
    - DC writeback fix
    - Enable gfxoff when ROCm apps are active on gfx11 with the proper FW version
    
    amdkfd:
    - Fix dma-buf exports using GEM handles
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>
    
    From: Alex Deucher <alexander.deucher at amd.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20240118223127.4904-1-alexander.deucher@amd.com

commit aa0901a9008eeb2710292aff94e615adf7884d5f
Author: Ori Messinger <Ori.Messinger at amd.com>
Date:   Wed Nov 22 00:12:13 2023 -0500

    drm/amdgpu: Enable GFXOFF for Compute on GFX11
    
    On GFX version 11, GFXOFF was disabled due to a MES KIQ firmware
    issue, which has since been fixed after version 64.
    This patch only re-enables GFXOFF for GFX version 11 if the GPU's
    MES KIQ firmware version is newer than version 64.
    
    V2: Keep GFXOFF disabled on GFX11 if MES KIQ is below version 64.
    V3: Add parentheses to avoid GCC warning for parentheses:
    "suggest parentheses around comparison in operand of ‘&’"
    V4: Remove "V3" from commit title
    V5: Change commit description and insert 'Acked-by'
    
    Signed-off-by: Ori Messinger <Ori.Messinger at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 81af6bf2f052..77e263660288 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -698,10 +698,8 @@ err:
 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
 {
 	enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
-	/* Temporary workaround to fix issues observed in some
-	 * compute applications when GFXOFF is enabled on GFX11.
-	 */
-	if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) {
+	if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+	    ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) {
 		pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
 		amdgpu_gfx_off_ctrl(adev, idle);
 	} else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
commit b2139c96dc954b58b81bc670fc4ea5f034ed062c
Author: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
Date:   Sat Jan 13 14:32:27 2024 +0530

    drm/amd/display: Drop 'acrtc' and add 'new_crtc_state' NULL check for writeback requests.
    
    Return value of 'to_amdgpu_crtc' which is container_of(...) can't be
    null, so it's null check 'acrtc' is dropped.
    
    Fixing the below:
    drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:9302 amdgpu_dm_atomic_commit_tail() error: we previously assumed 'acrtc' could be null (see line 9299)
    
    Added 'new_crtc_state' NULL check for function
    'drm_atomic_get_new_crtc_state' that retrieves the new state for a CRTC,
    while enabling writeback requests.
    
    Cc: stable at vger.kernel.org
    Cc: Alex Hung <alex.hung at amd.com>
    Cc: Aurabindo Pillai <aurabindo.pillai at amd.com>
    Cc: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
    Cc: Hamza Mahfooz <hamza.mahfooz at amd.com>
    Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
    Reviewed-by: Alex Hung <alex.hung at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 10b2a896c498..d55eeb30ccb2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9293,10 +9293,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		if (!new_con_state->writeback_job)
 			continue;
 
-		new_crtc_state = NULL;
+		new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
 
-		if (acrtc)
-			new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+		if (!new_crtc_state)
+			continue;
 
 		if (acrtc->wb_enabled)
 			continue;
commit fb1c93c2e9604a884467a773790016199f78ca08
Author: Christian König <christian.koenig at amd.com>
Date:   Wed Jan 10 15:19:29 2024 +0100

    drm/amdgpu: revert "Adjust removal control flow for smu v13_0_2"
    
    Calling amdgpu_device_ip_resume_phase1() during shutdown leaves the
    HW in an active state and is an unbalanced use of the IP callbacks.
    
    Using the IP callbacks like this can lead to memory leaks, double
    free and imbalanced reference counters.
    
    Leaving the HW in an active state can lead to DMA accesses to memory now
    freed by the driver.
    
    Both is a complete no-go for driver unload so completely revert the
    workaround for now.
    
    This reverts commit f5c7e7797060255dbc8160734ccc5ad6183c5e04.
    
    Signed-off-by: Christian König <christian.koenig at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ecbc58269951..b158d27d0a71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5246,7 +5246,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 	struct amdgpu_device *tmp_adev = NULL;
 	bool need_full_reset, skip_hw_reset, vram_lost = false;
 	int r = 0;
-	bool gpu_reset_for_dev_remove = 0;
 
 	/* Try reset handler method first */
 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
@@ -5266,10 +5265,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
 
-	gpu_reset_for_dev_remove =
-		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
-			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
-
 	/*
 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
 	 * to allow proper links negotiation in FW (within 1 sec)
@@ -5312,18 +5307,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 		amdgpu_ras_intr_cleared();
 	}
 
-	/* Since the mode1 reset affects base ip blocks, the
-	 * phase1 ip blocks need to be resumed. Otherwise there
-	 * will be a BIOS signature error and the psp bootloader
-	 * can't load kdb on the next amdgpu install.
-	 */
-	if (gpu_reset_for_dev_remove) {
-		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
-			amdgpu_device_ip_resume_phase1(tmp_adev);
-
-		goto end;
-	}
-
 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
 		if (need_full_reset) {
 			/* post card */
@@ -5560,11 +5543,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	int i, r = 0;
 	bool need_emergency_restart = false;
 	bool audio_suspended = false;
-	bool gpu_reset_for_dev_remove = false;
-
-	gpu_reset_for_dev_remove =
-			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
-				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
 
 	/*
 	 * Special case: RAS triggered and full reset isn't supported
@@ -5602,7 +5580,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
 			list_add_tail(&tmp_adev->reset_list, &device_list);
-			if (gpu_reset_for_dev_remove && adev->shutdown)
+			if (adev->shutdown)
 				tmp_adev->shutdown = true;
 		}
 		if (!list_is_first(&adev->reset_list, &device_list))
@@ -5687,10 +5665,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
-		if (gpu_reset_for_dev_remove) {
-			/* Workaroud for ASICs need to disable SMC first */
-			amdgpu_device_smu_fini_early(tmp_adev);
-		}
 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
 		/*TODO Should we stop ?*/
 		if (r) {
@@ -5722,9 +5696,6 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
 		if (r && r == -EAGAIN)
 			goto retry;
-
-		if (!r && gpu_reset_for_dev_remove)
-			goto recover_end;
 	}
 
 skip_hw_reset:
@@ -5780,7 +5751,6 @@ skip_sched_resume:
 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
 	}
 
-recover_end:
 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
 					    reset_list);
 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 5c9caf5fa075..cc69005f5b46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2337,38 +2337,6 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 		pm_runtime_forbid(dev->dev);
 	}
 
-	if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
-	    !amdgpu_sriov_vf(adev)) {
-		bool need_to_reset_gpu = false;
-
-		if (adev->gmc.xgmi.num_physical_nodes > 1) {
-			struct amdgpu_hive_info *hive;
-
-			hive = amdgpu_get_xgmi_hive(adev);
-			if (hive->device_remove_count == 0)
-				need_to_reset_gpu = true;
-			hive->device_remove_count++;
-			amdgpu_put_xgmi_hive(hive);
-		} else {
-			need_to_reset_gpu = true;
-		}
-
-		/* Workaround for ASICs need to reset SMU.
-		 * Called only when the first device is removed.
-		 */
-		if (need_to_reset_gpu) {
-			struct amdgpu_reset_context reset_context;
-
-			adev->shutdown = true;
-			memset(&reset_context, 0, sizeof(reset_context));
-			reset_context.method = AMD_RESET_METHOD_NONE;
-			reset_context.reset_req_dev = adev;
-			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-			set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags);
-			amdgpu_device_gpu_recover(adev, NULL, &reset_context);
-		}
-	}
-
 	amdgpu_driver_unload_kms(dev);
 
 	/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index b0335a1c5e90..19899f6b9b2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -32,7 +32,6 @@ enum AMDGPU_RESET_FLAGS {
 
 	AMDGPU_NEED_FULL_RESET = 0,
 	AMDGPU_SKIP_HW_RESET = 1,
-	AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
 };
 
 struct amdgpu_reset_context {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 6cab882e8061..1592c63b3099 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -43,7 +43,6 @@ struct amdgpu_hive_info {
 	} pstate;
 
 	struct amdgpu_reset_domain *reset_domain;
-	uint32_t device_remove_count;
 	atomic_t ras_recovery;
 };
 
commit 3c4e4eb5d872118fef1708abe933a410c5e07e3a
Author: Flora Cui <flora.cui at amd.com>
Date:   Wed Jan 10 19:23:56 2024 +0800

    drm/amdkfd: init drm_client with funcs hook
    
    otherwise drm_client_dev_unregister() would try to
    kfree(&adev->kfd.client).
    
    Fixes: 1819200166ce ("drm/amdkfd: Export DMABufs from KFD using GEM handles")
    Signed-off-by: Flora Cui <flora.cui at amd.com>
    Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 067690ba7bff..81af6bf2f052 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -138,6 +138,9 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
 	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
 }
 
+static const struct drm_client_funcs kfd_client_funcs = {
+	.unregister	= drm_client_release,
+};
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
 	int i;
@@ -161,7 +164,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.enable_mes = adev->enable_mes,
 		};
 
-		ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL);
+		ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
 		if (ret) {
 			dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
 			return;
commit 05638ff6dd6f0f38734b6b3ee2c7cf15520f5c00
Author: Christophe JAILLET <christophe.jaillet at wanadoo.fr>
Date:   Sat Jan 13 15:58:21 2024 +0100

    drm/amd/display: Fix a switch statement in populate_dml_output_cfg_from_stream_state()
    
    It is likely that the statement related to 'dml_edp' is misplaced. So move
    it in the correct "case SIGNAL_TYPE_EDP".
    
    Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2")
    Signed-off-by: Christophe JAILLET <christophe.jaillet at wanadoo.fr>
    Signed-off-by: Hamza Mahfooz <hamza.mahfooz at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index fa6a93dd9629..64d01a9cd68c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -626,8 +626,8 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *
 		if (is_dp2p0_output_encoder(pipe))
 			out->OutputEncoder[location] = dml_dp2p0;
 		break;
-		out->OutputEncoder[location] = dml_edp;
 	case SIGNAL_TYPE_EDP:
+		out->OutputEncoder[location] = dml_edp;
 		break;
 	case SIGNAL_TYPE_HDMI_TYPE_A:
 	case SIGNAL_TYPE_DVI_SINGLE_LINK:
commit bc03c02cc1991a066b23e69bbcc0f66e8f1f7453
Author: Ma Jun <Jun.Ma2 at amd.com>
Date:   Fri Jan 12 13:33:24 2024 +0800

    drm/amdgpu: Fix the null pointer when load rlc firmware
    
    If the RLC firmware is invalid because of wrong header size,
    the pointer to the rlc firmware is released in function
    amdgpu_ucode_request. There will be a null pointer error
    in subsequent use. So skip validation to fix it.
    
    Fixes: 3da9b71563cb ("drm/amd: Use `amdgpu_ucode_*` helpers for GFX10")
    Signed-off-by: Ma Jun <Jun.Ma2 at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 73f6d7e72c73..d63cab294883 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3996,16 +3996,13 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 
 	if (!amdgpu_sriov_vf(adev)) {
 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
-		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
-		/* don't check this.  There are apparently firmwares in the wild with
-		 * incorrect size in the header
-		 */
-		if (err == -ENODEV)
-			goto out;
+		err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 		if (err)
-			dev_dbg(adev->dev,
-				"gfx10: amdgpu_ucode_request() failed \"%s\"\n",
-				fw_name);
+			goto out;
+
+		/* don't validate this firmware. There are apparently firmwares
+		 * in the wild with incorrect size in the header
+		 */
 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
commit bfe79f5fff1300d96203383582b078c7b0aec80a
Author: Wayne Lin <Wayne.Lin at amd.com>
Date:   Tue Jan 2 14:20:37 2024 +0800

    drm/amd/display: Align the returned error code with legacy DP
    
    [Why]
    For usb4 connector, AUX transaction is handled by dmub utilizing a differnt
    code path comparing to legacy DP connector. If the usb4 DP connector is
    disconnected, AUX access will report EBUSY and cause igt at kms_dp_aux_dev
    fail.
    
    [How]
    Align the error code with the one reported by legacy DP as EIO.
    
    Cc: Mario Limonciello <mario.limonciello at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org
    Acked-by: Alex Hung <alex.hung at amd.com>
    Signed-off-by: Wayne Lin <Wayne.Lin at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index eaf8d9f48244..85b7f58a7f35 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -979,6 +979,11 @@ int dm_helper_dmub_aux_transfer_sync(
 		struct aux_payload *payload,
 		enum aux_return_code_type *operation_result)
 {
+	if (!link->hpd_status) {
+		*operation_result = AUX_RET_ERROR_HPD_DISCON;
+		return -1;
+	}
+
 	return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
 			operation_result);
 }
commit d3579f5df0536c2f0fabaa3ea80bb2d179884195
Author: Ovidiu Bunea <ovidiu.bunea at amd.com>
Date:   Mon Dec 18 21:40:45 2023 -0500

    drm/amd/display: Fix DML2 watermark calculation
    
    [Why]
    core_mode_programming in DML2 should output watermark calculations
    to locals, but it incorrectly uses mode_lib
    
    [How]
    update code to match HW DML2
    
    Cc: Mario Limonciello <mario.limonciello at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org
    Reviewed-by: Charlene Liu <charlene.liu at amd.com>
    Acked-by: Alex Hung <alex.hung at amd.com>
    Signed-off-by: Ovidiu Bunea <ovidiu.bunea at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index a6b938a12de1..9be5ebf3a8c0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -9446,13 +9446,13 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 		CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
 
 		// Output
-		CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
-		CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0];
-		CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
-		CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
-		CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0];
-		CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
-		CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0];
+		CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark
+		CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport;
+		CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
+		CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[]
+		CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport;
+		CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported
+		CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport;
 
 		CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 			&mode_lib->scratch,
commit 3ba2a0bfd8cf94eb225e1c60dff16e5c35bde1da
Author: Ilya Bakoulin <ilya.bakoulin at amd.com>
Date:   Wed Jan 3 09:42:04 2024 -0500

    drm/amd/display: Clear OPTC mem select on disable
    
    [Why]
    Not clearing the memory select bits prior to OPTC disable can cause DSC
    corruption issues when attempting to reuse a memory instance for another
    OPTC that enables ODM.
    
    [How]
    Clear the memory select bits prior to disabling an OPTC.
    
    Cc: Mario Limonciello <mario.limonciello at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org
    Reviewed-by: Charlene Liu <charlene.liu at amd.com>
    Acked-by: Alex Hung <alex.hung at amd.com>
    Signed-off-by: Ilya Bakoulin <ilya.bakoulin at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index 1788eb29474b..823493543325 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -173,6 +173,9 @@ static bool optc32_disable_crtc(struct timing_generator *optc)
 			OPTC_SEG3_SRC_SEL, 0xf,
 			OPTC_NUM_OF_INPUT_SEGMENT, 0);
 
+	REG_UPDATE(OPTC_MEMORY_CONFIG,
+			OPTC_MEM_SEL, 0);
+
 	/* disable otg request until end of the first line
 	 * in the vertical blank region
 	 */
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
index 3d6c1b2c2b4d..5b1547508850 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
@@ -145,6 +145,9 @@ static bool optc35_disable_crtc(struct timing_generator *optc)
 			OPTC_SEG3_SRC_SEL, 0xf,
 			OPTC_NUM_OF_INPUT_SEGMENT, 0);
 
+	REG_UPDATE(OPTC_MEMORY_CONFIG,
+			OPTC_MEM_SEL, 0);
+
 	/* disable otg request until end of the first line
 	 * in the vertical blank region
 	 */
commit 4b56f7d47be87cde5f368b67bc7fac53a2c3e8d2
Author: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>
Date:   Fri Dec 15 11:01:42 2023 -0500

    drm/amd/display: Port DENTIST hang and TDR fixes to OTG disable W/A
    
    [Why]
    We can experience DENTIST hangs during optimize_bandwidth or TDRs if
    FIFO is toggled and hangs.
    
    [How]
    Port the DCN35 fixes to DCN314.
    
    Cc: Mario Limonciello <mario.limonciello at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org
    Reviewed-by: Charlene Liu <charlene.liu at amd.com>
    Acked-by: Alex Hung <alex.hung at amd.com>
    Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 878c0e7b78ab..a84f1e376dee 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -145,30 +145,27 @@ static int dcn314_get_active_display_cnt_wa(
 	return display_count;
 }
 
-static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+				  bool safe_to_lower, bool disable)
 {
 	struct dc *dc = clk_mgr_base->ctx->dc;
 	int i;
 
 	for (i = 0; i < dc->res_pool->pipe_count; ++i) {
-		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+		struct pipe_ctx *pipe = safe_to_lower
+			? &context->res_ctx.pipe_ctx[i]
+			: &dc->current_state->res_ctx.pipe_ctx[i];
 
 		if (pipe->top_pipe || pipe->prev_odm_pipe)
 			continue;
 		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
-			struct stream_encoder *stream_enc = pipe->stream_res.stream_enc;
-
 			if (disable) {
-				if (stream_enc && stream_enc->funcs->disable_fifo)
-					pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc);
+				if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+					pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 
-				pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 				reset_sync_context_for_pipe(dc, context, i);
 			} else {
 				pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
-
-				if (stream_enc && stream_enc->funcs->enable_fifo)
-					pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc);
 			}
 		}
 	}
@@ -297,11 +294,11 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 	}
 
 	if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
-		dcn314_disable_otg_wa(clk_mgr_base, context, true);
+		dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
 
 		clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
 		dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
-		dcn314_disable_otg_wa(clk_mgr_base, context, false);
+		dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
 
 		update_dispclk = true;
 	}
commit 8a51cc097dd590a86e8eec5398934ef389ff9a7b
Author: Charlene Liu <charlene.liu at amd.com>
Date:   Thu Dec 28 13:19:33 2023 -0500

    drm/amd/display: Add logging resource checks
    
    [Why]
    When mapping resources, resources could be unavailable.
    
    Cc: Mario Limonciello <mario.limonciello at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org
    Reviewed-by: Sung joon Kim <sungjoon.kim at amd.com>
    Acked-by: Alex Hung <alex.hung at amd.com>
    Signed-off-by: Charlene Liu <charlene.liu at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 69e726630241..aa7c02ba948e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3522,7 +3522,7 @@ static void commit_planes_for_stream(struct dc *dc,
 	top_pipe_to_program = resource_get_otg_master_for_stream(
 				&context->res_ctx,
 				stream);
-
+	ASSERT(top_pipe_to_program != NULL);
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
@@ -4345,6 +4345,8 @@ static bool should_commit_minimal_transition_for_windowed_mpo_odm(struct dc *dc,
 
 	cur_pipe = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, stream);
 	new_pipe = resource_get_otg_master_for_stream(&context->res_ctx, stream);
+	if (!cur_pipe || !new_pipe)
+		return false;
 	cur_is_odm_in_use = resource_get_odm_slice_count(cur_pipe) > 1;
 	new_is_odm_in_use = resource_get_odm_slice_count(new_pipe) > 1;
 	if (cur_is_odm_in_use == new_is_odm_in_use)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index f2abc1096ffb..9fbdb09697fd 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2194,6 +2194,10 @@ void resource_log_pipe_topology_update(struct dc *dc, struct dc_state *state)
 	for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) {
 		otg_master = resource_get_otg_master_for_stream(
 				&state->res_ctx, state->streams[stream_idx]);
+		if (!otg_master	|| otg_master->stream_res.tg == NULL) {
+			DC_LOG_DC("topology update: otg_master NULL stream_idx %d!\n", stream_idx);
+			return;
+		}
 		slice_count = resource_get_opp_heads_for_otg_master(otg_master,
 				&state->res_ctx, opp_heads);
 		for (slice_idx = 0; slice_idx < slice_count; slice_idx++) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 56feee0ff01b..88c6436b28b6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -434,8 +434,9 @@ bool dc_state_add_plane(
 
 	otg_master_pipe = resource_get_otg_master_for_stream(
 			&state->res_ctx, stream);
-	added = resource_append_dpp_pipes_for_plane_composition(state,
-			dc->current_state, pool, otg_master_pipe, plane_state);
+	if (otg_master_pipe)
+		added = resource_append_dpp_pipes_for_plane_composition(state,
+				dc->current_state, pool, otg_master_pipe, plane_state);
 
 	if (added) {
 		stream_status->plane_states[stream_status->plane_count] =
commit aa36d8971fccb55ef3241cbfff9d1799e31d8628
Author: Dillon Varone <dillon.varone at amd.com>
Date:   Thu Dec 28 21:36:39 2023 -0500

    drm/amd/display: Init link enc resources in dc_state only if res_pool presents
    
    [Why & How]
    res_pool is not initialized in all situations such as virtual
    environments, and therefore link encoder resources should not be
    initialized if res_pool is NULL.
    
    Cc: Mario Limonciello <mario.limonciello at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org
    Reviewed-by: Martin Leung <martin.leung at amd.com>
    Acked-by: Alex Hung <alex.hung at amd.com>
    Signed-off-by: Dillon Varone <dillon.varone at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 460a8010c79f..56feee0ff01b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -267,7 +267,8 @@ void dc_state_construct(struct dc *dc, struct dc_state *state)
 	state->clk_mgr = dc->clk_mgr;
 
 	/* Initialise DIG link encoder resource tracking variables. */
-	link_enc_cfg_init(dc, state);
+	if (dc->res_pool)
+		link_enc_cfg_init(dc, state);
 }
 
 void dc_state_destruct(struct dc_state *state)
commit 3bb9b1f958c3d986ed90a3ff009f1e77e9553207
Author: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
Date:   Wed Jan 10 20:58:35 2024 +0530

    drm/amd/display: Fix late derefrence 'dsc' check in 'link_set_dsc_pps_packet()'
    
    In link_set_dsc_pps_packet(), 'struct display_stream_compressor *dsc'
    was dereferenced in a DC_LOGGER_INIT(dsc->ctx->logger); before the 'dsc'
    NULL pointer check.
    
    Fixes the below:
    drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_dpms.c:905 link_set_dsc_pps_packet() warn: variable dereferenced before check 'dsc' (see line 903)
    
    Cc: stable at vger.kernel.org
    Cc: Aurabindo Pillai <aurabindo.pillai at amd.com>
    Cc: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
    Cc: Hamza Mahfooz <hamza.mahfooz at amd.com>
    Cc: Wenjing Liu <wenjing.liu at amd.com>
    Cc: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
    Reviewed-by: Aurabindo Pillai <aurabindo.pillai at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 3de148004c06..3cbfbf8d107e 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -900,11 +900,15 @@ bool link_set_dsc_pps_packet(struct pipe_ctx *pipe_ctx, bool enable, bool immedi
 {
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
-	DC_LOGGER_INIT(dsc->ctx->logger);
 
-	if (!pipe_ctx->stream->timing.flags.DSC || !dsc)
+	if (!pipe_ctx->stream->timing.flags.DSC)
 		return false;
 
+	if (!dsc)
+		return false;
+
+	DC_LOGGER_INIT(dsc->ctx->logger);
+
 	if (enable) {
 		struct dsc_config dsc_cfg;
 		uint8_t dsc_packed_pps[128];
commit d7643fe6fb76edb1f2f1497bf5e8b8f4774b5129
Author: Nathan Chancellor <nathan at kernel.org>
Date:   Wed Jan 10 13:46:47 2024 -0700

    drm/amd/display: Avoid enum conversion warning
    
    Clang warns (or errors with CONFIG_WERROR=y) when performing arithmetic
    with different enumerated types, which is usually a bug:
    
        drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_dpia_bw.c:548:24: error: arithmetic between different enumeration types ('const enum dc_link_rate' and 'const enum dc_lane_count') [-Werror,-Wenum-enum-conversion]
          548 |                         link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8;
              |                         ~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~
        1 error generated.
    
    In this case, there is not a problem because the enumerated types are
    basically treated as '#define' values. Add an explicit cast to an
    integral type to silence the warning.
    
    Closes: https://github.com/ClangBuiltLinux/linux/issues/1976
    Fixes: 5f3bce13266e ("drm/amd/display: Request usb4 bw for mst streams")
    Signed-off-by: Nathan Chancellor <nathan at kernel.org>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
index 4ef1a6a1d129..dd0d2b206462 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -544,8 +544,9 @@ int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link)
 		 */
 		const struct dc_link_settings *link_cap =
 			dc_link_get_link_cap(link);
-		uint32_t link_bw_in_kbps =
-			link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8;
+		uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate *
+					   (uint32_t)link_cap->lane_count *
+					   LINK_RATE_REF_FREQ_IN_KHZ * 8;
 		link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0);
 	}
 
commit a992c90d8ed3929b70ae815ce21ca5651cc0a692
Author: Lijo Lazar <lijo.lazar at amd.com>
Date:   Thu Jan 11 15:28:53 2024 +0530

    drm/amd/pm: Fix smuv13.0.6 current clock reporting
    
    When current clock is equal to max dpm level clock, the level is not
    indicated correctly with *. Fix by comparing current clock against dpm
    level value.
    
    Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
    Reviewed-by: Asad Kamal <asad.kamal at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org # 6.7.x

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 7513d1cfeebd..a28649f21093 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -970,7 +970,9 @@ static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size,
 			if (i < (clocks.num_levels - 1))
 				clk2 = clocks.data[i + 1].clocks_in_khz / 1000;
 
-			if (curr_clk >= clk1 && curr_clk < clk2) {
+			if (curr_clk == clk1) {
+				level = i;
+			} else if (curr_clk >= clk1 && curr_clk < clk2) {
 				level = (curr_clk - clk1) <= (clk2 - curr_clk) ?
 						i :
 						i + 1;
commit 91739a897c12dcec699e53f390be1b4abdeef3a0
Author: Lijo Lazar <lijo.lazar at amd.com>
Date:   Thu Jan 11 09:47:33 2024 +0530

    drm/amd/pm: Add error log for smu v13.0.6 reset
    
    For all mode-2 reset fail cases, add error log.
    
    Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
    Reviewed-by: Asad Kamal <asad.kamal at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org # 6.7.x

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 4ebc6b421c2c..7513d1cfeebd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2235,17 +2235,18 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu)
 			continue;
 		}
 
-		if (ret) {
-			dev_err(adev->dev,
-				"failed to send mode2 message \tparam: 0x%08x error code %d\n",
-				SMU_RESET_MODE_2, ret);
+		if (ret)
 			goto out;
-		}
+
 	} while (ret == -ETIME && timeout);
 
 out:
 	mutex_unlock(&smu->message_lock);
 
+	if (ret)
+		dev_err(adev->dev, "failed to send mode2 reset, error code %d",
+			ret);
+
 	return ret;
 }
 
commit d7a254fad873775ce6c32b77796c81e81e6b7f2e
Author: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
Date:   Tue Jan 9 16:57:26 2024 +0530

    drm/amdkfd: Fix 'node' NULL check in 'svm_range_get_range_boundaries()'
    
    Range interval [start, last] is ordered by rb_tree, rb_prev, rb_next
    return value still needs NULL check, thus modified from "node" to "rb_node".
    
    Fixes the below:
    drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:2691 svm_range_get_range_boundaries() warn: can 'node' even be NULL?
    
    Suggested-by: Philip Yang <Philip.Yang at amd.com>
    Cc: Felix Kuehling <Felix.Kuehling at amd.com>
    Cc: Christian König <christian.koenig at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
    Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 4e9f07c7a937..c50a0dc9c9c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2654,6 +2654,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
 {
 	struct vm_area_struct *vma;
 	struct interval_tree_node *node;
+	struct rb_node *rb_node;
 	unsigned long start_limit, end_limit;
 
 	vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
@@ -2673,16 +2674,15 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
 	if (node) {
 		end_limit = min(end_limit, node->start);
 		/* Last range that ends before the fault address */
-		node = container_of(rb_prev(&node->rb),
-				    struct interval_tree_node, rb);
+		rb_node = rb_prev(&node->rb);
 	} else {
 		/* Last range must end before addr because
 		 * there was no range after addr
 		 */
-		node = container_of(rb_last(&p->svms.objects.rb_root),
-				    struct interval_tree_node, rb);
+		rb_node = rb_last(&p->svms.objects.rb_root);
 	}
-	if (node) {
+	if (rb_node) {
+		node = container_of(rb_node, struct interval_tree_node, rb);
 		if (node->last >= addr) {
 			WARN(1, "Overlap with prev node and page fault addr\n");
 			return -EFAULT;
commit c3d5e297dcae88274dc6924db337a2159279eced
Author: Alex Deucher <alexander.deucher at amd.com>
Date:   Tue Jan 9 10:45:42 2024 -0500

    drm/amdgpu: drop exp hw support check for GC 9.4.3
    
    No longer needed.
    
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org # 6.7.x

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 0431eafa86b5..c7d60dd0fb97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1963,8 +1963,6 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
 		break;
 	case IP_VERSION(9, 4, 3):
-		if (!amdgpu_exp_hw_support)
-			return -EINVAL;
 		amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
 		break;
 	case IP_VERSION(10, 1, 10):
commit 51258acdc4758d43f03ec9cab6f3fa72a2838f0e
Author: Le Ma <le.ma at amd.com>
Date:   Tue Jan 9 18:06:35 2024 +0800

    drm/amdgpu: move debug options init prior to amdgpu device init
    
    To bring debug options into effect in early initialization phase
    
    Signed-off-by: Le Ma <le.ma at amd.com>
    Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>
    Reviewed-by: Christian König <christian.koenig at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0776b0c5e4e4..5c9caf5fa075 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2234,6 +2234,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 
 	pci_set_drvdata(pdev, ddev);
 
+	amdgpu_init_debug_options(adev);
+
 	ret = amdgpu_driver_load_kms(adev, flags);
 	if (ret)
 		goto err_pci;
@@ -2314,8 +2316,6 @@ retry_init:
 			amdgpu_get_secondary_funcs(adev);
 	}
 
-	amdgpu_init_debug_options(adev);
-
 	return 0;
 
 err_pci:
commit d20e1aec8862e48a352ca86969cee6f530dd41d5
Author: Le Ma <le.ma at amd.com>
Date:   Tue Jan 9 17:44:39 2024 +0800

    drm/amdgpu: add debug flag to place fw bo on vram for frontdoor loading
    
    Use debug_mask=0x8 param to help isolating data path issues
    on new systems in early phase.
    
    v2: rename the flag for explicitness (lijo)
    
    Signed-off-by: Le Ma <le.ma at amd.com>
    Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>
    Reviewed-by: Christian König <christian.koenig at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 616b6c911767..3d8a48f46b01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1144,6 +1144,7 @@ struct amdgpu_device {
 	bool                            debug_vm;
 	bool                            debug_largebar;
 	bool                            debug_disable_soft_recovery;
+	bool                            debug_use_vram_fw_buf;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 880137774b4e..0776b0c5e4e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -128,6 +128,7 @@ enum AMDGPU_DEBUG_MASK {
 	AMDGPU_DEBUG_VM = BIT(0),
 	AMDGPU_DEBUG_LARGEBAR = BIT(1),
 	AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+	AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2117,6 +2118,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
 		pr_info("debug: soft reset for GPU recovery disabled\n");
 		adev->debug_disable_soft_recovery = true;
 	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) {
+		pr_info("debug: place fw in vram for frontdoor loading\n");
+		adev->debug_use_vram_fw_buf = true;
+	}
 }
 
 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 1bf975b8d083..0328616473f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -466,7 +466,7 @@ static int psp_sw_init(void *handle)
 	}
 
 	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
-				      amdgpu_sriov_vf(adev) ?
+				      (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
 				      AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 				      &psp->fw_pri_bo,
 				      &psp->fw_pri_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 0efb2568cb65..3e12763e477a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
 		amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-			amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+			(amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+			AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 			&adev->firmware.fw_buf,
 			&adev->firmware.fw_buf_mc,
 			&adev->firmware.fw_buf_ptr);
commit 6c5683bd9ecaa7f199c3122c1010ece5d59b1aef
Author: Le Ma <le.ma at amd.com>
Date:   Tue Jan 9 12:06:25 2024 +0800

    Revert "drm/amdgpu: add param to specify fw bo location for front-door loading"
    
    This reverts commit c572abffe9f50c8ba33060865449313b3f588c35.
    
    Will use debug module param instead of independent module param.
    
    Signed-off-by: Le Ma <le.ma at amd.com>
    Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>
    Reviewed-by: Christian König <christian.koenig at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9da14436a373..616b6c911767 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -254,8 +254,6 @@ extern int amdgpu_agp;
 
 extern int amdgpu_wbrf;
 
-extern int fw_bo_location;
-
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 852cec98ff26..880137774b4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -210,7 +210,6 @@ int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
 int amdgpu_agp = -1; /* auto */
 int amdgpu_wbrf = -1;
-int fw_bo_location = -1;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -990,10 +989,6 @@ MODULE_PARM_DESC(wbrf,
 	"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
 module_param_named(wbrf, amdgpu_wbrf, int, 0444);
 
-MODULE_PARM_DESC(fw_bo_location,
-	"location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram");
-module_param(fw_bo_location, int, 0644);
-
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 2addbdf88394..1bf975b8d083 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -466,7 +466,7 @@ static int psp_sw_init(void *handle)
 	}
 
 	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
-				      (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
+				      amdgpu_sriov_vf(adev) ?
 				      AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 				      &psp->fw_pri_bo,
 				      &psp->fw_pri_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index d334e42fe0eb..0efb2568cb65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,8 +1062,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
 		amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-			(amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
-			AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+			amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 			&adev->firmware.fw_buf,
 			&adev->firmware.fw_buf_mc,
 			&adev->firmware.fw_buf_ptr);
commit 7073934f5d73f8b53308963cee36f0d389ea857c
Author: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
Date:   Mon Jan 8 21:20:28 2024 +0530

    drm/amd/display: Fix variable deferencing before NULL check in edp_setup_replay()
    
    In edp_setup_replay(), 'struct dc *dc' & 'struct dmub_replay *replay'
    was dereferenced before the pointer 'link' & 'replay' NULL check.
    
    Fixes the below:
    drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_edp_panel_control.c:947 edp_setup_replay() warn: variable dereferenced before check 'link' (see line 933)
    
    Cc: stable at vger.kernel.org
    Cc: Bhawanpreet Lakha <Bhawanpreet.Lakha at amd.com>
    Cc: Harry Wentland <harry.wentland at amd.com>
    Cc: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
    Cc: Aurabindo Pillai <aurabindo.pillai at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
    Reviewed-by: Aurabindo Pillai <aurabindo.pillai at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index 7f1196528218..046d3e205415 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -930,8 +930,8 @@ bool edp_get_replay_state(const struct dc_link *link, uint64_t *state)
 bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream)
 {
 	/* To-do: Setup Replay */
-	struct dc *dc = link->ctx->dc;
-	struct dmub_replay *replay = dc->res_pool->replay;
+	struct dc *dc;
+	struct dmub_replay *replay;
 	int i;
 	unsigned int panel_inst;
 	struct replay_context replay_context = { 0 };
@@ -947,6 +947,10 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 	if (!link)
 		return false;
 
+	dc = link->ctx->dc;
+
+	replay = dc->res_pool->replay;
+
 	if (!replay)
 		return false;
 
@@ -975,8 +979,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 
 	replay_context.line_time_in_ns = lineTimeInNs;
 
-	if (replay)
-		link->replay_settings.replay_feature_enabled =
+	link->replay_settings.replay_feature_enabled =
 			replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst);
 	if (link->replay_settings.replay_feature_enabled) {
 
commit 2b9a073b7304f4a9e130d04794c91a0c4f9a5c12
Author: Yifan Zhang <yifan1.zhang at amd.com>
Date:   Tue Jan 9 09:19:22 2024 +0800

    drm/amdgpu: update regGL2C_CTRL4 value in golden setting
    
    This patch to update regGL2C_CTRL4 in golden setting.
    
    Signed-off-by: Yifan Zhang <yifan1.zhang at amd.com>
    Reviewed-by: Tim Huang <Tim.Huang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org # 6.7.x

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index c7242877d5d3..0ea0866c261f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -115,7 +115,7 @@ static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
commit 8a44fdd3cf91debbd09b43bd2519ad2b2486ccf4
Author: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
Date:   Thu Dec 21 18:13:11 2023 +0530

    drm/amdgpu: Release 'adev->pm.fw' before return in 'amdgpu_device_need_post()'
    
    In function 'amdgpu_device_need_post(struct amdgpu_device *adev)' -
    'adev->pm.fw' may not be released before return.
    
    Using the function release_firmware() to release adev->pm.fw.
    
    Thus fixing the below:
    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1571 amdgpu_device_need_post() warn: 'adev->pm.fw' from request_firmware() not released on lines: 1554.
    
    Cc: Monk Liu <Monk.Liu at amd.com>
    Cc: Christian König <christian.koenig at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
    Suggested-by: Lijo Lazar <lijo.lazar at amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5bb444bb36ce..ecbc58269951 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1544,6 +1544,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
 				return true;
 
 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+			release_firmware(adev->pm.fw);
 			if (fw_ver < 0x00160e00)
 				return true;
 		}
commit 8e8272f0dc22e11b2791dc778b07bd66c208d5a8
Author: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
Date:   Fri Jan 5 10:08:58 2024 +0530

    drm/amdgpu: Fix unsigned comparison with less than zero in vpe_u1_8_from_fraction()
    
    The variables 'numerator' and 'denominator', are unsigned 16-bit integer
    types, that can never be less than 0.
    
    Thus fixing the below:
    drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c:62 vpe_u1_8_from_fraction() warn: unsigned 'numerator' is never less than zero.
    drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c:63 vpe_u1_8_from_fraction() warn: unsigned 'denominator' is never less than zero.
    
    Cc: Peyton Lee <peytolee at amd.com>
    Cc: Lang Yu <lang.yu at amd.com>
    Cc: Christian König <christian.koenig at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
    Reviewed-by: Peyton Lee <peyton.lee at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 6f149b54d4d3..b9a15d51eb5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -59,11 +59,8 @@ static inline uint16_t complete_integer_division_u16(
 
 static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
 {
-	bool arg1_negative = numerator < 0;
-	bool arg2_negative = denominator < 0;
-
-	uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator);
-	uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator);
+	u16 arg1_value = numerator;
+	u16 arg2_value = denominator;
 
 	uint16_t remainder;
 
@@ -100,9 +97,6 @@ static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
 		res_value += summand;
 	}
 
-	if (arg1_negative ^ arg2_negative)
-		res_value = -res_value;
-
 	return res_value;
 }
 
commit fac4ebd79fed60e79cccafdad45a2bb8d3795044
Author: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
Date:   Thu Jan 4 15:26:42 2024 +0530

    drm/amdgpu: Fix with right return code '-EIO' in 'amdgpu_gmc_vram_checking()'
    
    The amdgpu_gmc_vram_checking() function in emulation checks whether
    all of the memory range of shared system memory could be accessed by
    GPU, from this aspect, -EIO is returned for error scenarios.
    
    Fixes the below:
    drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c:919 gmc_v6_0_hw_init() warn: missing error code? 'r'
    drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c:1103 gmc_v7_0_hw_init() warn: missing error code? 'r'
    drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c:1223 gmc_v8_0_hw_init() warn: missing error code? 'r'
    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c:2344 gmc_v9_0_hw_init() warn: missing error code? 'r'
    
    Cc: Xiaojian Du <Xiaojian.Du at amd.com>
    Cc: Lijo Lazar <lijo.lazar at amd.com>
    Cc: Christian König <christian.koenig at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
    Suggested-by: Christian König <christian.koenig at amd.com>
    Reviewed-by: Christian König <christian.koenig at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d2f273d77e59..55784a9f26c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -1045,21 +1045,28 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
 	 * seconds, so here, we just pick up three parts for emulation.
 	 */
 	ret = memcmp(vram_ptr, cptr, 10);
-	if (ret)
-		return ret;
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
 
 	ret = memcmp(vram_ptr + (size / 2), cptr, 10);
-	if (ret)
-		return ret;
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
 
 	ret = memcmp(vram_ptr + size - 10, cptr, 10);
-	if (ret)
-		return ret;
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
 
+release_buffer:
 	amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
 			&vram_ptr);
 
-	return 0;
+	return ret;
 }
 
 static ssize_t current_memory_partition_show(
commit 30d8dffab7d00da7fd13ecdb7d41a1f25ed6a4af
Author: Victor Lu <victorchengchi.lu at amd.com>
Date:   Tue Dec 19 11:55:09 2023 -0500

    drm/amdgpu: Do not program VM_L2_CNTL under SRIOV
    
    VM_L2_CNTL* should not be programmed on driver unload under SRIOV.
    These regs are skipped during SRIOV driver init.
    
    Signed-off-by: Victor Lu <victorchengchi.lu at amd.com>
    Reviewed-by: Vignesh Chander <Vignesh.Chander at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 95d06da544e2..49aecdcee006 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -456,10 +456,12 @@ static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev,
 		WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp);
 
 		/* Setup L2 cache */
-		tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
-		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
-		WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
-		WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+		if (!amdgpu_sriov_vf(adev)) {
+			tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+			tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+			WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+			WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+		}
 	}
 }
 
commit 6616b5e1999146b1304abe78232af810080c67e3
Author: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
Date:   Fri Jan 5 12:05:09 2024 +0530

    drm/amd/powerplay: Fix kzalloc parameter 'ATOM_Tonga_PPM_Table' in 'get_platform_power_management_table()'
    
    In 'struct phm_ppm_table *ptr' allocation using kzalloc, an incorrect
    structure type is passed to sizeof() in kzalloc, larger structure types
    were used, thus using correct type 'struct phm_ppm_table' fixes the
    below:
    
    drivers/gpu/drm/amd/amdgpu/../pm/powerplay/hwmgr/process_pptables_v1_0.c:203 get_platform_power_management_table() warn: struct type mismatch 'phm_ppm_table vs _ATOM_Tonga_PPM_Table'
    
    Cc: Eric Huang <JinHuiEric.Huang at amd.com>
    Cc: Christian König <christian.koenig at amd.com>
    Cc: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c
index f2a55c1413f5..17882f8dfdd3 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c
@@ -200,7 +200,7 @@ static int get_platform_power_management_table(
 		struct pp_hwmgr *hwmgr,
 		ATOM_Tonga_PPM_Table *atom_ppm_table)
 {
-	struct phm_ppm_table *ptr = kzalloc(sizeof(ATOM_Tonga_PPM_Table), GFP_KERNEL);
+	struct phm_ppm_table *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
 	struct phm_ppt_v1_information *pp_table_information =
 		(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
commit c9edcc1864f8529fd24441da40a1275232b5efc4
Author: Yifan Zhang <yifan1.zhang at amd.com>
Date:   Mon Jan 8 16:05:27 2024 +0800

    drm/amdgpu: update ATHUB_MISC_CNTL offset for athub v3.3
    
    This patch to update ATHUB_MISC_CNTL offset for athub v3.3
    
    v2: correct a typo (Tim)
    v3: correct patch title (Lang)
    
    Signed-off-by: Yifan Zhang <yifan1.zhang at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Reviewed-by: Tim Huang <Tim.Huang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
index f0737fb3a999..d1bba9c64e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -30,6 +30,8 @@
 
 #define regATHUB_MISC_CNTL_V3_0_1			0x00d7
 #define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX		0
+#define regATHUB_MISC_CNTL_V3_3_0			0x00d8
+#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX		0
 
 
 static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
@@ -40,6 +42,9 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
 	case IP_VERSION(3, 0, 1):
 		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
 		break;
+	case IP_VERSION(3, 3, 0):
+		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0);
+		break;
 	default:
 		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
 		break;
@@ -53,6 +58,9 @@ static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
 	case IP_VERSION(3, 0, 1):
 		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
 		break;
+	case IP_VERSION(3, 3, 0):
+		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data);
+		break;
 	default:
 		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
 		break;
commit 8f8cb7124e86c68ab09aa446664192d3829a40be
Author: Yifan Zhang <yifan1.zhang at amd.com>
Date:   Mon Jan 8 15:46:12 2024 +0800

    drm/amdgpu: update headers for nbio v7.11
    
    This patch is to update headers for nbio v7.11.
    
    Signed-off-by: Yifan Zhang <yifan1.zhang at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Reviewed-by: Tim Huang <Tim.Huang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
index 7ee3d291120d..6f80bfa7e41a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
@@ -8707,10 +8707,10 @@
 #define regBIF_BX1_MM_CFGREGS_CNTL_BASE_IDX                                                             2
 #define regBIF_BX1_BX_RESET_CNTL                                                                        0x00f0
 #define regBIF_BX1_BX_RESET_CNTL_BASE_IDX                                                               2
-#define regBIF_BX1_INTERRUPT_CNTL                                                                       0x8e11
-#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX                                                              5
-#define regBIF_BX1_INTERRUPT_CNTL2                                                                      0x8e12
-#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX                                                             5
+#define regBIF_BX1_INTERRUPT_CNTL                                                                       0x00f1
+#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX                                                              2
+#define regBIF_BX1_INTERRUPT_CNTL2                                                                      0x00f2
+#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX                                                             2
 #define regBIF_BX1_CLKREQB_PAD_CNTL                                                                     0x00f8
 #define regBIF_BX1_CLKREQB_PAD_CNTL_BASE_IDX                                                            2
 #define regBIF_BX1_BIF_FEATURES_CONTROL_MISC                                                            0x00fb
commit 6127d7df4a5b66783da5a55ff60b3920a9c315a2
Author: Alex Deucher <alexander.deucher at amd.com>
Date:   Thu Dec 7 10:36:56 2023 -0500

    drm/amdgpu/pm: clarify debugfs pm output
    
    On APUs power is SoC power, not just GPU.
    Clarify that for UVD/VCE/VCN the IP is powered down,
    not disabled which can confusing and lead to concerns
    that the IP is actually not available.
    
    Reviewed-by: Yang Wang <kevinyang.wang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index f3cb490fe79b..087d57850304 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -4349,11 +4349,19 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
 		seq_printf(m, "\t%u mV (VDDNB)\n", value);
 	size = sizeof(uint32_t);
-	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size))
-		seq_printf(m, "\t%u.%02u W (average GPU)\n", query >> 8, query & 0xff);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) {
+		if (adev->flags & AMD_IS_APU)
+			seq_printf(m, "\t%u.%02u W (average SoC including CPU)\n", query >> 8, query & 0xff);
+		else
+			seq_printf(m, "\t%u.%02u W (average SoC)\n", query >> 8, query & 0xff);
+	}
 	size = sizeof(uint32_t);
-	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size))
-		seq_printf(m, "\t%u.%02u W (current GPU)\n", query >> 8, query & 0xff);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) {
+		if (adev->flags & AMD_IS_APU)
+			seq_printf(m, "\t%u.%02u W (current SoC including CPU)\n", query >> 8, query & 0xff);
+		else
+			seq_printf(m, "\t%u.%02u W (current SoC)\n", query >> 8, query & 0xff);
+	}
 	size = sizeof(value);
 	seq_printf(m, "\n");
 
@@ -4379,9 +4387,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		/* VCN clocks */
 		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) {
 			if (!value) {
-				seq_printf(m, "VCN: Disabled\n");
+				seq_printf(m, "VCN: Powered down\n");
 			} else {
-				seq_printf(m, "VCN: Enabled\n");
+				seq_printf(m, "VCN: Powered up\n");
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size))
 					seq_printf(m, "\t%u MHz (DCLK)\n", value/100);
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size))
@@ -4393,9 +4401,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		/* UVD clocks */
 		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) {
 			if (!value) {
-				seq_printf(m, "UVD: Disabled\n");
+				seq_printf(m, "UVD: Powered down\n");
 			} else {
-				seq_printf(m, "UVD: Enabled\n");
+				seq_printf(m, "UVD: Powered up\n");
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size))
 					seq_printf(m, "\t%u MHz (DCLK)\n", value/100);
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size))
@@ -4407,9 +4415,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		/* VCE clocks */
 		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) {
 			if (!value) {
-				seq_printf(m, "VCE: Disabled\n");
+				seq_printf(m, "VCE: Powered down\n");
 			} else {
-				seq_printf(m, "VCE: Enabled\n");
+				seq_printf(m, "VCE: Powered up\n");
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size))
 					seq_printf(m, "\t%u MHz (ECCLK)\n", value/100);
 			}
commit d02069850fc102b07ae923535d5e212f2c8a34e9
Author: Alex Deucher <alexander.deucher at amd.com>
Date:   Mon Oct 2 14:43:06 2023 -0400

    drm/amdgpu: fall back to INPUT power for AVG power via INFO IOCTL
    
    For backwards compatibility with userspace.
    
    Fixes: 47f1724db4fe ("drm/amd: Introduce `AMDGPU_PP_SENSOR_GPU_INPUT_POWER`")
    Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2897
    Reviewed-by: Yang Wang <kevinyang.wang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b5ebafd4a3ad..bf4f48fe438d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1105,7 +1105,12 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 			if (amdgpu_dpm_read_sensor(adev,
 						   AMDGPU_PP_SENSOR_GPU_AVG_POWER,
 						   (void *)&ui32, &ui32_size)) {
-				return -EINVAL;
+				/* fall back to input power for backwards compat */
+				if (amdgpu_dpm_read_sensor(adev,
+							   AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+							   (void *)&ui32, &ui32_size)) {
+					return -EINVAL;
+				}
 			}
 			ui32 >>= 8;
 			break;
commit 25852d4b97572ff62ffee574cb8bb4bc551af23a
Author: Alex Deucher <alexander.deucher at amd.com>
Date:   Mon Oct 2 14:27:13 2023 -0400

    drm/amdgpu: fix avg vs input power reporting on smu7
    
    Hawaii, Bonaire, Fiji, and Tonga support average power, the others
    support current power.
    
    Reviewed-by: Yang Wang <kevinyang.wang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index b1a8799e2dee..aa91730e4eaf 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -3999,6 +3999,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 	uint32_t sclk, mclk, activity_percent;
 	uint32_t offset, val_vid;
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	/* size must be at least 4 bytes for all sensors */
 	if (*size < 4)
@@ -4042,7 +4043,21 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*size = 4;
 		return 0;
 	case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
-		return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
+		if ((adev->asic_type != CHIP_HAWAII) &&
+		    (adev->asic_type != CHIP_BONAIRE) &&
+		    (adev->asic_type != CHIP_FIJI) &&
+		    (adev->asic_type != CHIP_TONGA))
+			return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
+		else
+			return -EOPNOTSUPP;
+	case AMDGPU_PP_SENSOR_GPU_AVG_POWER:
+		if ((adev->asic_type != CHIP_HAWAII) &&
+		    (adev->asic_type != CHIP_BONAIRE) &&
+		    (adev->asic_type != CHIP_FIJI) &&
+		    (adev->asic_type != CHIP_TONGA))
+			return -EOPNOTSUPP;
+		else
+			return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
 	case AMDGPU_PP_SENSOR_VDDGFX:
 		if ((data->vr_config & VRCONF_VDDGFX_MASK) ==
 		    (VR_SVI2_PLANE_2 << VRCONF_VDDGFX_SHIFT))
commit 02eed83abc1395a1207591aafad9bcfc5cb1abcb
Author: Dafna Hirschfeld <dhirschfeld at habana.ai>
Date:   Sun Jan 7 15:07:00 2024 +0200

    drm/amdkfd: fixes for HMM mem allocation
    
    Fix err return value and reset pgmap->type after checking it.
    
    Fixes: c83dee9b6394 ("drm/amdkfd: add SPM support for SVM")
    Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>
    Signed-off-by: Dafna Hirschfeld <dhirschfeld at habana.ai>
    Signed-off-by: Felix Kuehling <felix.kuehling at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d630100b9e91..f856901055d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -1026,7 +1026,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 	} else {
 		res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
 		if (IS_ERR(res))
-			return -ENOMEM;
+			return PTR_ERR(res);
 		pgmap->range.start = res->start;
 		pgmap->range.end = res->end;
 		pgmap->type = MEMORY_DEVICE_PRIVATE;
@@ -1042,10 +1042,10 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 	r = devm_memremap_pages(adev->dev, pgmap);
 	if (IS_ERR(r)) {
 		pr_err("failed to register HMM device memory\n");
-		/* Disable SVM support capability */
-		pgmap->type = 0;
 		if (pgmap->type == MEMORY_DEVICE_PRIVATE)
 			devm_release_mem_region(adev->dev, res->start, resource_size(res));
+		/* Disable SVM support capability */
+		pgmap->type = 0;
 		return PTR_ERR(r);
 	}
 
commit bf3ff145df184698a8a80b33265064638572366f
Author: Jani Nikula <jani.nikula at intel.com>
Date:   Thu Jan 11 12:47:16 2024 +0200

    drm/xe: display support should not depend on EXPERT
    
    Remove the DRM_XE_DISPLAY config dependency on EXPERT. I can only
    presume the idea was only experts should be able to disable it, but the
    effect is the opposite.
    
    Reported-by: Eero Tamminen <eero.t.tamminen at intel.com>
    Reviewed-by: Francois Dugast <francois.dugast at intel.com>
    Signed-off-by: Jani Nikula <jani.nikula at intel.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20240111104716.3548744-1-jani.nikula@intel.com
    (cherry picked from commit 1c7531f50eaa425eca8ff726287b8df3a4a51e55)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 1cced50d8d8c..e36ae1f0d885 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -47,7 +47,7 @@ config DRM_XE
 
 config DRM_XE_DISPLAY
 	bool "Enable display support"
-	depends on DRM_XE && EXPERT && DRM_XE=m
+	depends on DRM_XE && DRM_XE=m
 	select FB_IOMEM_HELPERS
 	select I2C
 	select I2C_ALGOBIT
commit 8049e3954aeaaeb488cd4e371526721c7fca297e
Author: Brian Welty <brian.welty at intel.com>
Date:   Wed Jan 10 16:21:11 2024 -0800

    drm/xe: Fix bounds checking in __xe_bo_placement_for_flags()
    
    Requesting all memory regions on PVC will fill bo->placements up to
    XE_BO_MAX_PLACEMENTS. The subsequent call to try_add_stolen() will trip
    over the bounds checking even though XE_PL_STOLEN is not expected to
    be used in this case.
    
    This is hit with igt at xe_exec_fault_mode@once-basic-prefetch:
        xe 0000:8c:00.0: [drm] Assertion `*c < (sizeof(bo->placements) / sizeof((bo->placements)[0]) + ((int)(sizeof(struct { int:(-!!(__builtin_types_compatible_p(typeof((bo->placements)), typeof(&(bo->placements)[0])))); }))))` failed!
        WARNING: CPU: 30 PID: 6161 at drivers/gpu/drm/xe/xe_bo.c:203 __xe_bo_placement_for_flags+0x218/0x240 [xe]
    
    Is fixed here by moving the bounds checks closer to where we actually
    write into the bo->placement array.
    
    Fixes: 8c54ee8a8606 ("drm/xe: Ensure that we don't access the placements array out-of-bounds")
    Link: https://patchwork.freedesktop.org/patch/msgid/20240111002111.10190-1-brian.welty@intel.com
    Signed-off-by: Matthew Brost <matthew.brost at intel.com>
    Signed-off-by: Brian Welty <brian.welty at intel.com>
    Reviewed-by: Matthew Brost <matthew.brost at intel.com>
    (cherry picked from commit 52e3fa3e3ea3ee05e32c1a8d72bb3ae306a4da64)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 3cd29bd015a0..0b0e262e2166 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -125,9 +125,9 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
 			   u32 bo_flags, u32 *c)
 {
-	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
-
 	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
 		bo->placements[*c] = (struct ttm_place) {
 			.mem_type = XE_PL_TT,
 		};
@@ -145,6 +145,8 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 	struct xe_mem_region *vram;
 	u64 io_size;
 
+	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
 	xe_assert(xe, vram && vram->usable_size);
 	io_size = vram->io_size;
@@ -175,8 +177,6 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
 			 u32 bo_flags, u32 *c)
 {
-	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
-
 	if (bo->props.preferred_gt == XE_GT1) {
 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
@@ -193,9 +193,9 @@ static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
 			   u32 bo_flags, u32 *c)
 {
-	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
-
 	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
+		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
 		bo->placements[*c] = (struct ttm_place) {
 			.mem_type = XE_PL_STOLEN,
 			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
commit 7425c43c268f859426d02ccb3f043bdbae31cca9
Author: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Date:   Wed Jan 10 17:34:15 2024 +0100

    drm/xe/migrate: Fix CCS copy for small VRAM copy chunks
    
    Since the migrate code is using the identity map for addressing VRAM,
    copy chunks may become as small as 64K if the VRAM resource is fragmented.
    
    However, a chunk size smaller that 1MiB may lead to the *next* chunk's
    offset into the CCS metadata backup memory may not be page-aligned, and
    the XY_CTRL_SURF_COPY_BLT command can't handle that, and even if it could,
    the current code doesn't handle the offset calculaton correctly.
    
    To fix this, make sure we align the size of VRAM copy chunks to 1MiB. If
    the remaining data to copy is smaller than that, that's not a problem,
    so use the remaining size. If the VRAM copy cunk becomes fragmented due
    to the size alignment restriction, don't use the identity map, but instead
    emit PTEs into the page-table like we do for system memory.
    
    v2:
    - Rebase
    v3:
    - Future proof somewhat by taking into account the real data size to
      flat CCS metadata size ratio. (Matt Roper)
    - Invert a couple of if-statements for better readability.
    - Fix support for 4K-granularity VRAM sizes. (Tested on DG1).
    v4:
    - Fix up code comments
    - Fix debug printout format typo.
    v5:
    - Add a Fixes: tag.
    
    Cc: Matt Roper <matthew.d.roper at intel.com>
    Cc: Matthew Auld <matthew.william.auld at gmail.com>
    Cc: Matthew Brost <matthew.brost at intel.com>
    Fixes: e89b384cde62 ("drm/xe/migrate: Update emit_pte to cope with a size level than 4k")
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
    Reviewed-by: Matthew Auld <matthew.auld at intel.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20240110163415.524165-1-thomas.hellstrom@linux.intel.com
    (cherry picked from commit ef51d7542d143f3fd9a48d4e2c307563661668aa)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 7a32faa2f688..a6523df0f1d3 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -331,7 +331,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it);
 
 	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false,
-		 &src_it, XE_PAGE_SIZE, pt);
+		 &src_it, XE_PAGE_SIZE, pt->ttm.resource);
 
 	run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 02fca8f9adc2..e05e9e7282b6 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -62,6 +62,8 @@ struct xe_migrate {
 	 * out of the pt_bo.
 	 */
 	struct drm_suballoc_manager vm_update_sa;
+	/** @min_chunk_size: For dgfx, Minimum chunk size */
+	u64 min_chunk_size;
 };
 
 #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
@@ -363,6 +365,19 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 	if (err)
 		return ERR_PTR(err);
 
+	if (IS_DGFX(xe)) {
+		if (xe_device_has_flat_ccs(xe))
+			/* min chunk size corresponds to 4K of CCS Metadata */
+			m->min_chunk_size = SZ_4K * SZ_64K /
+				xe_device_ccs_bytes(xe, SZ_64K);
+		else
+			/* Somewhat arbitrary to avoid a huge amount of blits */
+			m->min_chunk_size = SZ_64K;
+		m->min_chunk_size = roundup_pow_of_two(m->min_chunk_size);
+		drm_dbg(&xe->drm, "Migrate min chunk size is 0x%08llx\n",
+			(unsigned long long)m->min_chunk_size);
+	}
+
 	return m;
 }
 
@@ -374,16 +389,35 @@ static u64 max_mem_transfer_per_pass(struct xe_device *xe)
 	return MAX_PREEMPTDISABLE_TRANSFER;
 }
 
-static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur)
+static u64 xe_migrate_res_sizes(struct xe_migrate *m, struct xe_res_cursor *cur)
 {
-	/*
-	 * For VRAM we use identity mapped pages so we are limited to current
-	 * cursor size. For system we program the pages ourselves so we have no
-	 * such limitation.
-	 */
-	return min_t(u64, max_mem_transfer_per_pass(xe),
-		     mem_type_is_vram(cur->mem_type) ? cur->size :
-		     cur->remaining);
+	struct xe_device *xe = tile_to_xe(m->tile);
+	u64 size = min_t(u64, max_mem_transfer_per_pass(xe), cur->remaining);
+
+	if (mem_type_is_vram(cur->mem_type)) {
+		/*
+		 * VRAM we want to blit in chunks with sizes aligned to
+		 * min_chunk_size in order for the offset to CCS metadata to be
+		 * page-aligned. If it's the last chunk it may be smaller.
+		 *
+		 * Another constraint is that we need to limit the blit to
+		 * the VRAM block size, unless size is smaller than
+		 * min_chunk_size.
+		 */
+		u64 chunk = max_t(u64, cur->size, m->min_chunk_size);
+
+		size = min_t(u64, size, chunk);
+		if (size > m->min_chunk_size)
+			size = round_down(size, m->min_chunk_size);
+	}
+
+	return size;
+}
+
+static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur)
+{
+	/* If the chunk is not fragmented, allow identity map. */
+	return cur->size >= size;
 }
 
 static u32 pte_update_size(struct xe_migrate *m,
@@ -396,7 +430,12 @@ static u32 pte_update_size(struct xe_migrate *m,
 	u32 cmds = 0;
 
 	*L0_pt = pt_ofs;
-	if (!is_vram) {
+	if (is_vram && xe_migrate_allow_identity(*L0, cur)) {
+		/* Offset into identity map. */
+		*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
+					      cur->start + vram_region_gpu_offset(res));
+		cmds += cmd_size;
+	} else {
 		/* Clip L0 to available size */
 		u64 size = min(*L0, (u64)avail_pts * SZ_2M);
 		u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
@@ -412,11 +451,6 @@ static u32 pte_update_size(struct xe_migrate *m,
 
 		/* Each chunk has a single blit command */
 		cmds += cmd_size;
-	} else {
-		/* Offset into identity map. */
-		*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
-					      cur->start + vram_region_gpu_offset(res));
-		cmds += cmd_size;
 	}
 
 	return cmds;
@@ -426,10 +460,10 @@ static void emit_pte(struct xe_migrate *m,
 		     struct xe_bb *bb, u32 at_pt,
 		     bool is_vram, bool is_comp_pte,
 		     struct xe_res_cursor *cur,
-		     u32 size, struct xe_bo *bo)
+		     u32 size, struct ttm_resource *res)
 {
 	struct xe_device *xe = tile_to_xe(m->tile);
-
+	struct xe_vm *vm = m->q->vm;
 	u16 pat_index;
 	u32 ptes;
 	u64 ofs = at_pt * XE_PAGE_SIZE;
@@ -442,13 +476,6 @@ static void emit_pte(struct xe_migrate *m,
 	else
 		pat_index = xe->pat.idx[XE_CACHE_WB];
 
-	/*
-	 * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently
-	 * we're only emitting VRAM PTEs during sanity tests, so when
-	 * that's moved to a Kunit test, we should condition VRAM PTEs
-	 * on running tests.
-	 */
-
 	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
 
 	while (ptes) {
@@ -468,20 +495,22 @@ static void emit_pte(struct xe_migrate *m,
 
 			addr = xe_res_dma(cur) & PAGE_MASK;
 			if (is_vram) {
-				/* Is this a 64K PTE entry? */
-				if ((m->q->vm->flags & XE_VM_FLAG_64K) &&
-				    !(cur_ofs & (16 * 8 - 1))) {
-					xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K));
+				if (vm->flags & XE_VM_FLAG_64K) {
+					u64 va = cur_ofs * XE_PAGE_SIZE / 8;
+
+					xe_assert(xe, (va & (SZ_64K - 1)) ==
+						  (addr & (SZ_64K - 1)));
+
 					flags |= XE_PTE_PS64;
 				}
 
-				addr += vram_region_gpu_offset(bo->ttm.resource);
+				addr += vram_region_gpu_offset(res);
 				devmem = true;
 			}
 
-			addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
-								 addr, pat_index,
-								 0, devmem, flags);
+			addr = vm->pt_ops->pte_encode_addr(m->tile->xe,
+							   addr, pat_index,
+							   0, devmem, flags);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 
@@ -693,8 +722,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		bool usm = xe->info.has_usm;
 		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
 
-		src_L0 = xe_migrate_res_sizes(xe, &src_it);
-		dst_L0 = xe_migrate_res_sizes(xe, &dst_it);
+		src_L0 = xe_migrate_res_sizes(m, &src_it);
+		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
 
 		drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
 			pass++, src_L0, dst_L0);
@@ -715,6 +744,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 						      &ccs_ofs, &ccs_pt, 0,
 						      2 * avail_pts,
 						      avail_pts);
+			xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
 		}
 
 		/* Add copy commands size here */
@@ -727,20 +757,20 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 			goto err_sync;
 		}
 
-		if (!src_is_vram)
-			emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
-				 src_bo);
-		else
+		if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
 			xe_res_next(&src_it, src_L0);
-
-		if (!dst_is_vram)
-			emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
-				 dst_bo);
 		else
+			emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
+				 src);
+
+		if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
 			xe_res_next(&dst_it, src_L0);
+		else
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
+				 dst);
 
 		if (copy_system_ccs)
-			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo);
+			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
 
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
@@ -949,7 +979,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		bool usm = xe->info.has_usm;
 		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
 
-		clear_L0 = xe_migrate_res_sizes(xe, &src_it);
+		clear_L0 = xe_migrate_res_sizes(m, &src_it);
 
 		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
 
@@ -976,12 +1006,12 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 
 		size -= clear_L0;
 		/* Preemption is enabled again by the ring ops. */
-		if (!clear_vram) {
-			emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
-				 bo);
-		} else {
+		if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
 			xe_res_next(&src_it, clear_L0);
-		}
+		else
+			emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
+				 dst);
+
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
 
commit ec32f4f1bed87f0b87b9b0091231c8685db1138c
Author: Dan Carpenter <dan.carpenter at linaro.org>
Date:   Fri Jan 5 15:20:22 2024 +0300

    drm/xe: unlock on error path in xe_vm_add_compute_exec_queue()
    
    Drop the "&vm->lock" before returning.
    
    Fixes: 24f947d58fe5 ("drm/xe: Use DRM GPUVM helpers for external- and evicted objects")
    Signed-off-by: Dan Carpenter <dan.carpenter at linaro.org>
    Signed-off-by: Matthew Brost <matthew.brost at intel.com>
    Reviewed-by: Matthew Brost <matthew.brost at intel.com>
    (cherry picked from commit cf46019e8550a810cc023af7aa020ba43103b44d)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index b0e3cab6a584..10b6995fbf29 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -335,13 +335,13 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 	down_write(&vm->lock);
 	err = drm_gpuvm_exec_lock(&vm_exec);
 	if (err)
-		return err;
+		goto out_up_write;
 
 	pfence = xe_preempt_fence_create(q, q->compute.context,
 					 ++q->compute.seqno);
 	if (!pfence) {
 		err = -ENOMEM;
-		goto out_unlock;
+		goto out_fini;
 	}
 
 	list_add(&q->compute.link, &vm->preempt.exec_queues);
@@ -364,8 +364,9 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 
 	up_read(&vm->userptr.notifier_lock);
 
-out_unlock:
+out_fini:
 	drm_exec_fini(exec);
+out_up_write:
 	up_write(&vm->lock);
 
 	return err;
commit 616576df35193bbadac31dc42a32d5943e183f45
Author: Dan Carpenter <dan.carpenter at linaro.org>
Date:   Fri Jan 5 15:20:35 2024 +0300

    drm/xe/selftests: Fix an error pointer dereference bug
    
    Check if "bo" is an error pointer before calling xe_bo_lock() on it.
    
    Fixes: d6abc18d6693 ("drm/xe/xe2: Modify xe_bo_test for system memory")
    Signed-off-by: Dan Carpenter <dan.carpenter at linaro.org>
    Signed-off-by: Matthew Brost <matthew.brost at intel.com>
    Reviewed-by: Matthew Brost <matthew.brost at intel.com>
    (cherry picked from commit 88ec23528b32ddb9ce2e8492f2629b0056353697)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 412b2e7ce40c..3436fd9cf2b2 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -125,14 +125,13 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
 
 	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
 			       ttm_bo_type_device, bo_flags);
-
-	xe_bo_lock(bo, false);
-
 	if (IS_ERR(bo)) {
 		KUNIT_FAIL(test, "Failed to create bo.\n");
 		return;
 	}
 
+	xe_bo_lock(bo, false);
+
 	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
 	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
 			       test);
commit ffd915e41a4a2277fd8041dc77603df59acf3e01
Author: Dan Carpenter <dan.carpenter at linaro.org>
Date:   Fri Jan 5 15:22:23 2024 +0300

    drm/xe/device: clean up on error in probe()
    
    This error path should clean up before returning.
    
    Smatch detected this bug:
      drivers/gpu/drm/xe/xe_device.c:487 xe_device_probe() warn: missing unwind goto?
    
    Fixes: 4cb12b71923b ("drm/xe/xe2: Determine bios enablement for flat ccs on igfx")
    Signed-off-by: Dan Carpenter <dan.carpenter at linaro.org>
    Signed-off-by: Matthew Brost <matthew.brost at intel.com>
    Reviewed-by: Matthew Brost <matthew.brost at intel.com>
    (cherry picked from commit c10da95afa68060e13c5f920d96671943a7e54d9)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d9ae77fe7382..b8d8da546670 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -484,7 +484,7 @@ int xe_device_probe(struct xe_device *xe)
 
 	err = xe_device_set_has_flat_ccs(xe);
 	if (err)
-		return err;
+		goto err_irq_shutdown;
 
 	err = xe_mmio_probe_vram(xe);
 	if (err)
commit 190db3b1da8f40131d6153de7469abce16766302
Author: Paul E. McKenney <paulmck at kernel.org>
Date:   Wed Jan 10 06:48:29 2024 -0800

    drm/xe: Fix build bug for GCC 11
    
    Building drivers/gpu/drm/xe/xe_gt_pagefault.c with GCC 11 results
    in the following build errors:
    
    ./include/linux/fortify-string.h:57:33: error: writing 16 bytes into a region of size 0 [-Werror=stringop-overflow=]
       57 | #define __underlying_memcpy     __builtin_memcpy
          |                                 ^
    ./include/linux/fortify-string.h:644:9: note: in expansion of macro ‘__underlying_memcpy’
      644 |         __underlying_##op(p, q, __fortify_size);                        \
          |         ^~~~~~~~~~~~~
    ./include/linux/fortify-string.h:689:26: note: in expansion of macro ‘__fortify_memcpy_chk’
      689 | #define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                  \
          |                          ^~~~~~~~~~~~~~~~~~~~
    drivers/gpu/drm/xe/xe_gt_pagefault.c:340:17: note: in expansion of macro ‘memcpy’
      340 |                 memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32));
          |                 ^~~~~~
    In file included from drivers/gpu/drm/xe/xe_device_types.h:17,
                     from drivers/gpu/drm/xe/xe_vm_types.h:16,
                     from drivers/gpu/drm/xe/xe_bo.h:13,
                     from drivers/gpu/drm/xe/xe_gt_pagefault.c:16:
    drivers/gpu/drm/xe/xe_gt_types.h:102:25: note: at offset [1144, 265324] into destination object ‘tile’ of size 8
      102 |         struct xe_tile *tile;
          |                         ^~~~
    
    Fix these by removing -Wstringop-overflow from drm/xe builds.
    
    Closes: https://lore.kernel.org/all/45ad1d0f-a10f-483e-848a-76a30252edbe@paulmck-laptop/
    Fixes: 7a8bc11782d3 ("drm/xe: Enable W=1 warnings by default")
    Suggested-by: Stephen Rothwell <sfr at rothwell.id.au>
    Signed-off-by: Paul E. McKenney <paulmck at kernel.org>
    [ This particular warning is broken on GCC11. In future changes it will
      be moved to the normal C flags in the top level Makefile (out of
      Makefile.extrawarn), but accounting for the compiler support. Just
      remove it out of xe's forced extra warnings for now ]
    Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
    (cherry picked from commit a109d19992294736abd4f4232ea639e03eb1f9e7)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 53bd2a8ba1ae..efcf0ab7a1a6 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
 subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
 subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
 subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
-subdir-ccflags-y += $(call cc-option, -Wstringop-overflow)
 subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
 # The following turn off the warnings enabled by -Wextra
 ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
commit 23ca3d2fe367794d2816530fa6b141339fddc1c6
Author: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
Date:   Mon Jan 8 14:58:42 2024 -0800

    drm/xe: Check skip_guc_pc before setting SLPC flag
    
    Don't set SLPC GuC feature ctl flag if skip_guc_pc is true.
    
    v2: Skip the freq related sysfs creation as well (Badal)
    v3: Remove unnecessary parenthesis (Lucas)
    
    Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set")
    Fixes: bef52b5c7a19 ("drm/xe: Create a xe_gt_freq component for raw management and sysfs")
    Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
    Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
    Link: https://lore.kernel.org/r/20240108225842.966066-1-vinay.belgaumkar@intel.com
    Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
    (cherry picked from commit 69cac0a8f3ef8db4d62441c4a2686ec676c9facd)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 3adfa6686e7c..e5b0f4ecdbe8 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -196,6 +196,9 @@ void xe_gt_freq_init(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
+	if (xe->info.skip_guc_pc)
+		return;
+
 	gt->freq = kobject_create_and_add("freq0", gt->sysfs);
 	if (!gt->freq) {
 		drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 482cb0df9f15..0a61390c64a7 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -60,7 +60,12 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc)
 
 static u32 guc_ctl_feature_flags(struct xe_guc *guc)
 {
-	return GUC_CTL_ENABLE_SLPC;
+	u32 flags = 0;
+
+	if (!guc_to_xe(guc)->info.skip_guc_pc)
+		flags |= GUC_CTL_ENABLE_SLPC;
+
+	return flags;
 }
 
 static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
commit 19c02225242498eea9267d444ee1276016368d49
Author: Brian Welty <brian.welty at intel.com>
Date:   Fri Jan 5 11:04:40 2024 -0800

    drm/xe: Fix modifying exec_queue priority in xe_migrate_init
    
    After exec_queue has been created, we cannot simply modify q->priority.
    This needs to be done by the backend via q->ops.  However in this case,
    it would be more efficient to simply pass a flag when creating the
    exec_queue and set the desired priority upfront during queue creation.
    
    To that end: new flag EXEC_QUEUE_FLAG_HIGH_PRIORITY is introduced.
    The priority field is moved to be with other scheduling properties and
    is now exec_queue.sched_props.priority. This is no longer set to initial
    value by the backend, but is now set within __xe_exec_queue_create().
    
    Fixes: b4eecedc75c1 ("drm/xe: Fix potential deadlock handling page faults")
    Signed-off-by: Brian Welty <brian.welty at intel.com>
    Signed-off-by: Matthew Brost <matthew.brost at intel.com>
    Reviewed-by: Matthew Brost <matthew.brost at intel.com>
    (cherry picked from commit a8004af338f6b3319476ecbed63ea49bf393fc1f)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 44fe8097b7cd..bcfc4127c7c5 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -67,6 +67,11 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
 	q->sched_props.preempt_timeout_us =
 				hwe->eclass->sched_props.preempt_timeout_us;
+	if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
+	    q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
+		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
+	else
+		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
 
 	if (xe_exec_queue_is_parallel(q)) {
 		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 3d7e704ec3d9..8d4b7feb8c30 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -52,8 +52,6 @@ struct xe_exec_queue {
 	struct xe_vm *vm;
 	/** @class: class of this exec queue */
 	enum xe_engine_class class;
-	/** @priority: priority of this exec queue */
-	enum xe_exec_queue_priority priority;
 	/**
 	 * @logical_mask: logical mask of where job submitted to exec queue can run
 	 */
@@ -84,6 +82,8 @@ struct xe_exec_queue {
 #define EXEC_QUEUE_FLAG_VM			BIT(4)
 /* child of VM queue for multi-tile VM jobs */
 #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
+/* kernel exec_queue only, set priority to highest level */
+#define EXEC_QUEUE_FLAG_HIGH_PRIORITY		BIT(6)
 
 	/**
 	 * @flags: flags for this exec queue, should statically setup aside from ban
@@ -142,6 +142,8 @@ struct xe_exec_queue {
 		u32 timeslice_us;
 		/** @preempt_timeout_us: preemption timeout in micro-seconds */
 		u32 preempt_timeout_us;
+		/** @priority: priority of this exec queue */
+		enum xe_exec_queue_priority priority;
 	} sched_props;
 
 	/** @compute: compute exec queue state */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 5de3ac47c462..54ffcfcdd41f 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -421,7 +421,7 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	struct exec_queue_policy policy;
 	struct xe_device *xe = guc_to_xe(guc);
-	enum xe_exec_queue_priority prio = q->priority;
+	enum xe_exec_queue_priority prio = q->sched_props.priority;
 	u32 timeslice_us = q->sched_props.timeslice_us;
 	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
 
@@ -1231,7 +1231,6 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	err = xe_sched_entity_init(&ge->entity, sched);
 	if (err)
 		goto err_sched;
-	q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
 
 	if (xe_exec_queue_is_lr(q))
 		INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
@@ -1301,14 +1300,14 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
 {
 	struct xe_sched_msg *msg;
 
-	if (q->priority == priority || exec_queue_killed_or_banned(q))
+	if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q))
 		return 0;
 
 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
-	q->priority = priority;
+	q->sched_props.priority = priority;
 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index adf1dab5eba2..02fca8f9adc2 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -344,7 +344,8 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 
 		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
 					    EXEC_QUEUE_FLAG_KERNEL |
-					    EXEC_QUEUE_FLAG_PERMANENT);
+					    EXEC_QUEUE_FLAG_PERMANENT |
+					    EXEC_QUEUE_FLAG_HIGH_PRIORITY);
 	} else {
 		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
 						  XE_ENGINE_CLASS_COPY,
@@ -355,8 +356,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		xe_vm_close_and_put(vm);
 		return ERR_CAST(m->q);
 	}
-	if (xe->info.has_usm)
-		m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
 
 	mutex_init(&m->job_mutex);
 
commit fef257eb6dcb9f39baee9ac44f064cd796ecfd0b
Author: Brian Welty <brian.welty at intel.com>
Date:   Fri Jan 5 11:04:39 2024 -0800

    drm/xe: Fix guc_exec_queue_set_priority
    
    We need to set q->priority prior to calling guc_exec_queue_add_msg() as
    that will call init_policies() and sets the scheduling properties to those
    stored in the exec_queue.
    
    Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
    Signed-off-by: Brian Welty <brian.welty at intel.com>
    Signed-off-by: Matthew Brost <matthew.brost at intel.com>
    Reviewed-by: Matthew Brost <matthew.brost at intel.com>
    (cherry picked from commit b16483f9f8120b530327879fa3ea576e897946da)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 21ac68e3246f..5de3ac47c462 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1308,8 +1308,8 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
 	if (!msg)
 		return -ENOMEM;
 
-	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
 	q->priority = priority;
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
 
 	return 0;
 }
commit 98949068eb559a31f162ab37f56a89bf6c3698ad
Author: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Date:   Tue Jan 9 12:24:05 2024 +0100

    drm/xe: Annotate xe_ttm_stolen_mgr::mapping with __iomem
    
    The pointer points to IO memory, but the __iomem annotation was
    incorrectly placed. Annotate it correctly, update its usage accordingly
    and fix the corresponding sparse error.
    
    Fixes: d8b52a02cb40 ("drm/xe: Implement stolen memory.")
    Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
    Cc: Matthew Brost <matthew.brost at intel.com>
    Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
    Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20240109112405.108136-5-thomas.hellstrom@linux.intel.com
    (cherry picked from commit dcddb6f0b06d454c9a3b2b240a43f0e7310c7f7c)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index d2b00d0bf1e2..e5d7d5e2bec1 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -31,7 +31,7 @@ struct xe_ttm_stolen_mgr {
 	/* GPU base offset */
 	resource_size_t stolen_base;
 
-	void *__iomem mapping;
+	void __iomem *mapping;
 };
 
 static inline struct xe_ttm_stolen_mgr *
@@ -275,7 +275,7 @@ static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
 	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
 
 	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
-		mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset;
+		mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
 
 	mem->bus.offset += mgr->io_base;
 	mem->bus.is_iomem = true;
commit 5c63e7574739c034e072dea0e0a6fcbe8d538666
Author: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Date:   Tue Jan 9 12:24:04 2024 +0100

    drm/xe: Annotate multiple mmio pointers with __iomem
    
    There are a couple of pointers pointing to MMIO space. Annotate them
    with __iomem and fix the corresponding sparse warnings.
    
    Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
    Fixes: 3b0d4a557996 ("drm/xe: Move register MMIO into xe_tile")
    Fixes: 399a13323f0d ("drm/xe: add 28-bit address support in struct xe_reg")
    Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
    Cc: Matthew Brost <matthew.brost at intel.com>
    Cc: Lucas De Marchi <lucas.demarchi at intel.com>
    Cc: Matt Roper <matthew.d.roper at intel.com>
    Cc: Koby Elbaz <kelbaz at habana.ai>
    Cc: Ofir Bitton <obitton at habana.ai>
    Cc: Moti Haimovski <mhaimovski at habana.ai>
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
    Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
    Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20240109112405.108136-4-thomas.hellstrom@linux.intel.com
    (cherry picked from commit 9d612ee52c6096bc70d43f54921ba2831ffbf1ad)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 4b38c6bc6c76..5dc9127a2029 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -146,7 +146,7 @@ struct xe_tile {
 		size_t size;
 
 		/** @regs: pointer to tile's MMIO space (starting with registers) */
-		void *regs;
+		void __iomem *regs;
 	} mmio;
 
 	/**
@@ -159,7 +159,7 @@ struct xe_tile {
 		size_t size;
 
 		/** @regs: pointer to tile's additional MMIO-extension space */
-		void *regs;
+		void __iomem *regs;
 	} mmio_ext;
 
 	/** @mem: memory management info for tile */
@@ -301,7 +301,7 @@ struct xe_device {
 		/** @size: size of MMIO space for device */
 		size_t size;
 		/** @regs: pointer to MMIO space for device */
-		void *regs;
+		void __iomem *regs;
 	} mmio;
 
 	/** @mem: memory info for device */
commit 77232e6a28447c2942558d05f1c3115bdf95a9e7
Author: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Date:   Tue Jan 9 12:24:03 2024 +0100

    drm/xe: Annotate xe_mem_region::mapping with __iomem
    
    The pointer points to IO memory, but the __iomem annotation was
    incorrectly placed. Annotate it correctly, update its usage accordingly
    and fix the corresponding sparse error.
    
    Fixes: 0887a2e7ab62 ("drm/xe: Make xe_mem_region struct")
    Cc: Oak Zeng <oak.zeng at intel.com>
    Cc: Michael J. Ruhl <michael.j.ruhl at intel.com>
    Cc: Matthew Brost <matthew.brost at intel.com>
    Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
    Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20240109112405.108136-3-thomas.hellstrom@linux.intel.com
    (cherry picked from commit 20855b62a30538361e587cfc7c5245f07d4f826a)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 8e4a3b1f6b93..3cd29bd015a0 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -442,7 +442,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 
 		if (vram->mapping &&
 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
-			mem->bus.addr = (u8 *)vram->mapping +
+			mem->bus.addr = (u8 __force *)vram->mapping +
 				mem->bus.offset;
 
 		mem->bus.offset += vram->io_start;
@@ -734,7 +734,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 			/* Create a new VMAP once kernel BO back in VRAM */
 			if (!ret && resource_is_vram(new_mem)) {
 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
-				void *new_addr = vram->mapping +
+				void __iomem *new_addr = vram->mapping +
 					(new_mem->start << PAGE_SHIFT);
 
 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index c45ef17b3473..4b38c6bc6c76 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -97,7 +97,7 @@ struct xe_mem_region {
 	 */
 	resource_size_t actual_physical_size;
 	/** @mapping: pointer to VRAM mappable space */
-	void *__iomem mapping;
+	void __iomem *mapping;
 };
 
 /**
commit 3ec276d06698189506f508f87c0f4f17c11e0251
Author: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Date:   Tue Jan 9 12:24:02 2024 +0100

    drm/xe: Use __iomem for the regs pointer
    
    The regs pointer points to IO memory. Annotate it properly and
    fix the corresponding sparse warning.
    
    Fixes: a4e2f3a299ea ("drm/xe: refactor xe_mmio_probe_tiles to support MMIO extension")
    Cc: Koby Elbaz <kelbaz at habana.ai>
    Cc: Ofir Bitton <obitton at habana.ai>
    Cc: Moti Haimovski <mhaimovski at habana.ai>
    Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
    Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20240109112405.108136-2-thomas.hellstrom@linux.intel.com
    (cherry picked from commit 9d03bf30e78673d827484bbc17a6fd8f5e43a039)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index f660cfb79f50..c8c5d74b6e90 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -303,7 +303,7 @@ void xe_mmio_probe_tiles(struct xe_device *xe)
 	u8 id, tile_count = xe->info.tile_count;
 	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	struct xe_tile *tile;
-	void *regs;
+	void __iomem *regs;
 	u32 mtcfg;
 
 	if (tile_count == 1)
commit 457f4439833487acb18abdd55e95fbb17d43fdca
Author: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Date:   Fri Dec 22 18:59:04 2023 +0100

    drm/xe/vm: Fix an error path
    
    If using the VM_BIND_OP_UNMAP_ALL without any bound vmas for the
    vm, we will end up dereferencing an uninitialized variable and leak a
    bo lock. Fix this.
    
    v2:
    - Updated commit message (Lucas De Marchi)
    
    Reported-by: Dafna Hirschfeld <dhirschfeld at habana.ai>
    Closes: https://lore.kernel.org/intel-xe/jrwua7ckbiozfcaodx4gg2h4taiuxs53j5zlpf3qzvyhyiyl2d@pbs3plurokrj/
    Suggested-by: Dafna Hirschfeld <dhirschfeld at habana.ai>
    Fixes: b06d47be7c83 ("drm/xe: Port Xe to GPUVA")
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
    Acked-by: Lucas De Marchi <lucas.demarchi at intel.com>
    Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20231222175904.16732-1-thomas.hellstrom@linux.intel.com
    (cherry picked from commit 9d0c1c5618be02c5acda7e6bbb728007b0632984)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 0cfe7289b97e..b0e3cab6a584 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2063,9 +2063,11 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 		if (err)
 			return ERR_PTR(err);
 
-		vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj);
-		if (!vm_bo)
-			break;
+		vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
+		if (IS_ERR(vm_bo)) {
+			xe_bo_unlock(bo);
+			return ERR_CAST(vm_bo);
+		}
 
 		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
 		drm_gpuvm_bo_put(vm_bo);
commit 56c253daabc8bd9dfbae52c3d9e0dd34977347a6
Author: Matthew Brost <matthew.brost at intel.com>
Date:   Thu Jan 4 00:00:39 2024 -0800

    drm/xe: Fix exec IOCTL long running exec queue ring full condition
    
    The intent is to return -EWOULDBLOCK to the user if a long running exec
    queue is full during the exec IOCTL. -EWOULDBLOCK aliases to -EAGAIN
    which results in the exec IOCTL doing a retry loop. Fix this by ensuring
    the retry loop is broken when returning -EWOULDBLOCK.
    
    Fixes: 8ae8a2e8dd21 ("drm/xe: Long running job update")
    Reported-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
    Signed-off-by: Matthew Brost <matthew.brost at intel.com>
    Reviewed-by: Brian Welty <brian.welty at intel.com>
    (cherry picked from commit 97d0047cbb17318431eaf37dfe1a6855539340f9)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index d30c0d0689bc..b853feed9ccc 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -115,7 +115,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct xe_sched_job *job;
 	struct dma_fence *rebind_fence;
 	struct xe_vm *vm;
-	bool write_locked;
+	bool write_locked, skip_retry = false;
 	ktime_t end = 0;
 	int err = 0;
 
@@ -227,7 +227,8 @@ retry:
 	}
 
 	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
-		err = -EWOULDBLOCK;
+		err = -EWOULDBLOCK;	/* Aliased to -EAGAIN */
+		skip_retry = true;
 		goto err_exec;
 	}
 
@@ -337,7 +338,7 @@ err_unlock_list:
 		up_write(&vm->lock);
 	else
 		up_read(&vm->lock);
-	if (err == -EAGAIN)
+	if (err == -EAGAIN && !skip_retry)
 		goto retry;
 err_syncs:
 	for (i = 0; i < num_syncs; i++)
commit 7b1a8a5fcee4a85be1f540ac0e09761d421e562d
Author: José Roberto de Souza <jose.souza at intel.com>
Date:   Thu Jan 4 08:18:32 2024 -0800

    drm/xe: Fix definition of intel_wakeref_t
    
    i915 defines it as unsigned long so Xe should do the same to avoid
    compilation warnings:
    
      CC [M]  drivers/gpu/drm/i915/i915_gem.o
      CC [M]  drivers/gpu/drm/xe/i915-display/intel_display_power_well.o
    In file included from ./include/drm/drm_mm.h:51,
                     from drivers/gpu/drm/xe/xe_bo_types.h:11,
                     from drivers/gpu/drm/xe/xe_bo.h:11,
                     from ./drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h:11,
                     from ./drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h:15,
                     from drivers/gpu/drm/i915/display/intel_display_power.c:8:
    drivers/gpu/drm/i915/display/intel_display_power.c: In function ‘print_async_put_domains_state’:
    drivers/gpu/drm/i915/display/intel_display_power.c:408:29: warning: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘int’ [-Wformat=]
      408 |         drm_dbg(&i915->drm, "async_put_wakeref %lu\n",
          |                             ^~~~~~~~~~~~~~~~~~~~~~~~~
      409 |                 power_domains->async_put_wakeref);
          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          |                              |
          |                              int
    ./include/drm/drm_print.h:410:39: note: in definition of macro ‘drm_dev_dbg’
      410 |         __drm_dev_dbg(NULL, dev, cat, fmt, ##__VA_ARGS__)
          |                                       ^~~
    ./include/drm/drm_print.h:510:33: note: in expansion of macro ‘drm_dbg_driver’
      510 | #define drm_dbg(drm, fmt, ...)  drm_dbg_driver(drm, fmt, ##__VA_ARGS__)
          |                                 ^~~~~~~~~~~~~~
    drivers/gpu/drm/i915/display/intel_display_power.c:408:9: note: in expansion of macro ‘drm_dbg’
      408 |         drm_dbg(&i915->drm, "async_put_wakeref %lu\n",
          |         ^~~~~~~
    drivers/gpu/drm/i915/display/intel_display_power.c:408:50: note: format string is defined here
      408 |         drm_dbg(&i915->drm, "async_put_wakeref %lu\n",
          |                                                ~~^
          |                                                  |
          |                                                  long unsigned int
          |                                                %u
      CC [M]  drivers/gpu/drm/i915/i915_gem_evict.o
      CC [M]  drivers/gpu/drm/i915/i915_gem_gtt.o
      CC [M]  drivers/gpu/drm/xe/i915-display/intel_display_trace.o
      CC [M]  drivers/gpu/drm/xe/i915-display/intel_display_wa.o
      CC [M]  drivers/gpu/drm/i915/i915_query.o
    
    Fixes: 44e694958b95 ("drm/xe/display: Implement display support")
    Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
    Reviewed-by: Jani Nikula <jani.nikula at intel.com>
    Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
    (cherry picked from commit fdbadf504375886a0320ac6f84c850322a6b32e1)
    Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
index 1c5e30cf10ca..ecb1c0707706 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
@@ -5,4 +5,4 @@
 
 #include <linux/types.h>
 
-typedef bool intel_wakeref_t;
+typedef unsigned long intel_wakeref_t;
commit 7075893d1d68b2b3517be250a02d86e76554ed22
Author: Melissa Wen <mwen at igalia.com>
Date:   Fri Jan 5 21:02:09 2024 -0100

    drm/amd/display: cleanup inconsistent indenting in amdgpu_dm_color
    
    smatch warnings:
    amdgpu_dm_update_plane_color_mgmt() warn: inconsistent indenting
    
    Reported-by: kernel test robot <lkp at intel.com>
    Closes: https://lore.kernel.org/oe-kbuild-all/202401051643.PPdbmG1U-lkp@intel.com/
    Signed-off-by: Melissa Wen <mwen at igalia.com>
    Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
    Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 9b527bffe11a..c87b64e464ed 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1239,7 +1239,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 	if (has_crtc_cm_degamma && ret != -EINVAL) {
 		drm_dbg_kms(crtc->base.crtc->dev,
 			    "doesn't support plane and CRTC degamma at the same time\n");
-			return -EINVAL;
+		return -EINVAL;
 	}
 
 	/* If we are here, it means we don't have plane degamma settings, check
commit 50e60184bfe72400c49f7806af97edaf693ecd45
Author: James Zhu <James.Zhu at amd.com>
Date:   Tue Jan 2 15:53:01 2024 -0500

    drm/amdgpu: make a correction on comment
    
    Use a generic comment for AMDGPU_VM_RESERVED_VRAM size.
    
    Signed-off-by: James Zhu <James.Zhu at amd.com>
    Reviewed-by: Christian König <christian.koenig at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index b6cd565562ad..4740dd65b99d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -116,7 +116,7 @@ struct amdgpu_mem_stats;
 #define AMDGPU_VM_FAULT_STOP_FIRST	1
 #define AMDGPU_VM_FAULT_STOP_ALWAYS	2
 
-/* Reserve 4MB VRAM for page tables */
+/* How much VRAM be reserved for page tables */
 #define AMDGPU_VM_RESERVED_VRAM		(8ULL << 20)
 
 /*
commit c2ab9ce0ee7225fc05f58a6671c43b8a3684f530
Author: Ivan Lipski <ivlipski at amd.com>
Date:   Fri Jan 5 19:40:50 2024 -0500

    Revert "drm/amd/display: fix bandwidth validation failure on DCN 2.1"
    
    This commit causes dmesg-warn on several IGT tests on DCN 3.1.6: *ERROR*
    link_enc_cfg_validate: Invalid link encoder assignments - 0x1c
    
    Affected IGT tests include:
    - amdgpu/[amd_assr|amd_plane|amd_hotplug]
    - kms_atomic
    - kms_color
    - kms_flip
    - kms_properties
    - kms_universal_plane
    
    and some other tests
    
    This reverts commit 3a0fa3bc245ef92838a8296e0055569b8dff94c4.
    
    Cc: Melissa Wen <mwen at igalia.com>
    Cc: Hamza Mahfooz <hamza.mahfooz at amd.com>
    Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
    Signed-off-by: Ivan Lipski <ivlipski at amd.com>
    Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f6575d7dee97..10b2a896c498 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10753,7 +10753,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n");
 			goto fail;
 		}
-		status = dc_validate_global_state(dc, dm_state->context, false);
+		status = dc_validate_global_state(dc, dm_state->context, true);
 		if (status != DC_OK) {
 			DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)",
 				       dc_status_to_str(status), status);
commit c147ddc68e741aed78bba796effe049344d87ab8
Author: Felix Kuehling <felix.kuehling at amd.com>
Date:   Wed Jan 3 18:13:04 2024 -0500

    drm/amdkfd: Fix sparse __rcu annotation warnings
    
    Properly mark kfd_process->ef as __rcu and consistently use the right
    accessor functions.
    
    Reported-by: kernel test robot <lkp at intel.com>
    Closes: https://lore.kernel.org/oe-kbuild-all/202312052245.yFpBSgNH-lkp@intel.com/
    Signed-off-by: Felix Kuehling <felix.kuehling at amd.com>
    Reviewed-by: Christian König <christian.koenig at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index cf6ed5fce291..f262b9d89541 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -311,7 +311,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
 int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
 
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
-					    struct dma_fence **ef);
+					    struct dma_fence __rcu **ef);
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
 					      struct kfd_vm_fault_info *info);
 int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d17b2452cb1f..f183d7faeeec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2802,7 +2802,7 @@ unlock_out:
 	put_task_struct(usertask);
 }
 
-static void replace_eviction_fence(struct dma_fence **ef,
+static void replace_eviction_fence(struct dma_fence __rcu **ef,
 				   struct dma_fence *new_ef)
 {
 	struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
@@ -2837,7 +2837,7 @@ static void replace_eviction_fence(struct dma_fence **ef,
  * 7.  Add fence to all PD and PT BOs.
  * 8.  Unreserve all BOs
  */
-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef)
 {
 	struct amdkfd_process_info *process_info = info;
 	struct amdgpu_vm *peer_vm;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 745024b31340..17fbedbf3651 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -917,7 +917,7 @@ struct kfd_process {
 	 * fence will be triggered during eviction and new one will be created
 	 * during restore
 	 */
-	struct dma_fence *ef;
+	struct dma_fence __rcu *ef;
 
 	/* Work items for evicting and restoring BOs */
 	struct delayed_work eviction_work;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 71df51fcc1b0..717a60d7a4ea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1110,6 +1110,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 {
 	struct kfd_process *p = container_of(work, struct kfd_process,
 					     release_work);
+	struct dma_fence *ef;
 
 	kfd_process_dequeue_from_all_devices(p);
 	pqm_uninit(&p->pqm);
@@ -1118,7 +1119,9 @@ static void kfd_process_wq_release(struct work_struct *work)
 	 * destroyed. This allows any BOs to be freed without
 	 * triggering pointless evictions or waiting for fences.
 	 */
-	dma_fence_signal(p->ef);
+	synchronize_rcu();
+	ef = rcu_access_pointer(p->ef);
+	dma_fence_signal(ef);
 
 	kfd_process_remove_sysfs(p);
 
@@ -1127,7 +1130,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 	svm_range_list_fini(p);
 
 	kfd_process_destroy_pdds(p);
-	dma_fence_put(p->ef);
+	dma_fence_put(ef);
 
 	kfd_event_free_process(p);
 
commit 2a9de42e8d3c82c6990d226198602be44f43f340
Author: Philip Yang <Philip.Yang at amd.com>
Date:   Fri Dec 29 15:19:25 2023 -0500

    drm/amdkfd: Fix lock dependency warning with srcu
    
    ======================================================
    WARNING: possible circular locking dependency detected
    6.5.0-kfd-yangp #2289 Not tainted
    ------------------------------------------------------
    kworker/0:2/996 is trying to acquire lock:
            (srcu){.+.+}-{0:0}, at: __synchronize_srcu+0x5/0x1a0
    
    but task is already holding lock:
            ((work_completion)(&svms->deferred_list_work)){+.+.}-{0:0}, at:
            process_one_work+0x211/0x560
    
    which lock already depends on the new lock.
    
    the existing dependency chain (in reverse order) is:
    
    -> #3 ((work_completion)(&svms->deferred_list_work)){+.+.}-{0:0}:
            __flush_work+0x88/0x4f0
            svm_range_list_lock_and_flush_work+0x3d/0x110 [amdgpu]
            svm_range_set_attr+0xd6/0x14c0 [amdgpu]
            kfd_ioctl+0x1d1/0x630 [amdgpu]
            __x64_sys_ioctl+0x88/0xc0
    
    -> #2 (&info->lock#2){+.+.}-{3:3}:
            __mutex_lock+0x99/0xc70
            amdgpu_amdkfd_gpuvm_restore_process_bos+0x54/0x740 [amdgpu]
            restore_process_helper+0x22/0x80 [amdgpu]
            restore_process_worker+0x2d/0xa0 [amdgpu]
            process_one_work+0x29b/0x560
            worker_thread+0x3d/0x3d0
    
    -> #1 ((work_completion)(&(&process->restore_work)->work)){+.+.}-{0:0}:
            __flush_work+0x88/0x4f0
            __cancel_work_timer+0x12c/0x1c0
            kfd_process_notifier_release_internal+0x37/0x1f0 [amdgpu]
            __mmu_notifier_release+0xad/0x240
            exit_mmap+0x6a/0x3a0
            mmput+0x6a/0x120
            do_exit+0x322/0xb90
            do_group_exit+0x37/0xa0
            __x64_sys_exit_group+0x18/0x20
            do_syscall_64+0x38/0x80
    
    -> #0 (srcu){.+.+}-{0:0}:
            __lock_acquire+0x1521/0x2510
            lock_sync+0x5f/0x90
            __synchronize_srcu+0x4f/0x1a0
            __mmu_notifier_release+0x128/0x240
            exit_mmap+0x6a/0x3a0
            mmput+0x6a/0x120
            svm_range_deferred_list_work+0x19f/0x350 [amdgpu]
            process_one_work+0x29b/0x560
            worker_thread+0x3d/0x3d0
    
    other info that might help us debug this:
    Chain exists of:
      srcu --> &info->lock#2 --> (work_completion)(&svms->deferred_list_work)
    
    Possible unsafe locking scenario:
    
            CPU0                    CPU1
            ----                    ----
            lock((work_completion)(&svms->deferred_list_work));
                            lock(&info->lock#2);
                            lock((work_completion)(&svms->deferred_list_work));
            sync(srcu);
    
    Signed-off-by: Philip Yang <Philip.Yang at amd.com>
    Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index d46c145835e0..4e9f07c7a937 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2340,8 +2340,10 @@ retry:
 		mutex_unlock(&svms->lock);
 		mmap_write_unlock(mm);
 
-		/* Pairs with mmget in svm_range_add_list_work */
-		mmput(mm);
+		/* Pairs with mmget in svm_range_add_list_work. If dropping the
+		 * last mm refcount, schedule release work to avoid circular locking
+		 */
+		mmput_async(mm);
 
 		spin_lock(&svms->deferred_list_lock);
 	}
commit 73cb81dc548f154547d9205d5b9603ba10e2a402
Author: Hawking Zhang <Hawking.Zhang at amd.com>
Date:   Fri Dec 29 14:54:35 2023 +0800

    drm/amdgpu: Packed socket_id to ras feature mask
    
    Initialize RAS feature mask bit[31:29] with socket_id.
    
    Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 3f7f5c12961d..31823a30dea2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2919,6 +2919,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 
 	amdgpu_ras_query_poison_mode(adev);
 
+	/* Packed socket_id to ras feature mask bits[31:29] */
+	if (adev->smuio.funcs &&
+	    adev->smuio.funcs->get_socket_id)
+		con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29);
+
 	/* Get RAS schema for particular SOC */
 	con->schema = amdgpu_get_ras_schema(adev);
 
commit fb1e91719983c529f85602fdd08c0b7dbf384b1c
Author: Candice Li <candice.li at amd.com>
Date:   Thu Jan 4 10:27:10 2024 +0800

    drm/amdgpu: Support poison error injection via ras_ctrl debugfs
    
    Support poison error injection.
    
    Signed-off-by: Candice Li <candice.li at amd.com>
    Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 9a64584318e0..3f7f5c12961d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -305,11 +305,13 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
 			return -EINVAL;
 
 		data->head.block = block_id;
-		/* only ue and ce errors are supported */
+		/* only ue, ce and poison errors are supported */
 		if (!memcmp("ue", err, 2))
 			data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
 		else if (!memcmp("ce", err, 2))
 			data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+		else if (!memcmp("poison", err, 6))
+			data->head.type = AMDGPU_RAS_ERROR__POISON;
 		else
 			return -EINVAL;
 
@@ -431,9 +433,10 @@ static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
  * The block is one of: umc, sdma, gfx, etc.
  *	see ras_block_string[] for details
  *
- * The error type is one of: ue, ce, where,
+ * The error type is one of: ue, ce and poison where,
  *	ue is multi-uncorrectable
  *	ce is single-correctable
+ *	poison is poison
  *
  * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
  * The address and value are hexadecimal numbers, leading 0x is optional.
commit f4a94dbb6dc0bed10a5fc63718d00f1de45b12c0
Author: Likun Gao <Likun.Gao at amd.com>
Date:   Fri Jan 5 17:33:34 2024 +0800

    drm/amdgpu: correct the cu count for gfx v11
    
    Correct the algorithm of active CU to skip disabled
    sa for gfx v11.
    
    Signed-off-by: Likun Gao <Likun.Gao at amd.com>
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 2fbcd9765980..c7242877d5d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6383,6 +6383,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			bitmap = i * adev->gfx.config.max_sh_per_se + j;
+			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+				continue;
 			mask = 1;
 			counter = 0;
 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
commit 90bd01471d1c7f2d2db3c69259e247357991fe50
Author: Candice Li <candice.li at amd.com>
Date:   Thu Jan 4 09:34:25 2024 +0800

    drm/amdgpu: Drop unnecessary sentences about CE and deferred error.
    
    Remove "no user action is needed" for correctable and deferred error
    to avoid confusion.
    
    Signed-off-by: Candice Li <candice.li at amd.com>
    Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index fc42fb6ee191..9a64584318e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1067,8 +1067,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
 			mcm_info = &err_info->mcm_info;
 			if (err_info->ce_count) {
 				dev_info(adev->dev, "socket: %d, die: %d, "
-					 "%lld new correctable hardware errors detected in %s block, "
-					 "no user action is needed\n",
+					 "%lld new correctable hardware errors detected in %s block\n",
 					 mcm_info->socket_id,
 					 mcm_info->die_id,
 					 err_info->ce_count,
@@ -1080,8 +1079,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
 			err_info = &err_node->err_info;
 			mcm_info = &err_info->mcm_info;
 			dev_info(adev->dev, "socket: %d, die: %d, "
-				 "%lld correctable hardware errors detected in total in %s block, "
-				 "no user action is needed\n",
+				 "%lld correctable hardware errors detected in total in %s block\n",
 				 mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name);
 		}
 	}
@@ -1108,16 +1106,14 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
 			   adev->smuio.funcs->get_die_id) {
 			dev_info(adev->dev, "socket: %d, die: %d "
 				 "%ld correctable hardware errors "
-				 "detected in %s block, no user "
-				 "action is needed.\n",
+				 "detected in %s block\n",
 				 adev->smuio.funcs->get_socket_id(adev),
 				 adev->smuio.funcs->get_die_id(adev),
 				 ras_mgr->err_data.ce_count,
 				 blk_name);
 		} else {
 			dev_info(adev->dev, "%ld correctable hardware errors "
-				 "detected in %s block, no user "
-				 "action is needed.\n",
+				 "detected in %s block\n",
 				 ras_mgr->err_data.ce_count,
 				 blk_name);
 		}
@@ -1920,7 +1916,7 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
 				struct amdgpu_iv_entry *entry)
 {
 	dev_info(obj->adev->dev,
-		"Poison is created, no user action is needed.\n");
+		"Poison is created\n");
 }
 
 static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 6d24c84924cb..19986ff6a48d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -401,8 +401,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 
 			if (err_data.ce_count)
 				dev_info(adev->dev, "%ld correctable hardware "
-						"errors detected in %s block, "
-						"no user action is needed.\n",
+						"errors detected in %s block\n",
 						obj->err_data.ce_count,
 						get_ras_block_str(adev->nbio.ras_if));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 25a3da83e0fb..e90f33780803 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -597,8 +597,7 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 
 			if (err_data.ce_count)
 				dev_info(adev->dev, "%ld correctable hardware "
-						"errors detected in %s block, "
-						"no user action is needed.\n",
+						"errors detected in %s block\n",
 						obj->err_data.ce_count,
 						get_ras_block_str(adev->nbio.ras_if));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 530549314ce4..a3ee3c4c650f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -64,7 +64,7 @@ static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
 	uint64_t reg_value;
 
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
-		dev_info(adev->dev, "Deferred error, no user action is needed.\n");
+		dev_info(adev->dev, "Deferred error\n");
 
 	if (mc_umc_status)
 		dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
commit d32156a07575d69916944ce0e2d4a71a4c95979d
Author: Aric Cyr <aric.cyr at amd.com>
Date:   Sun Dec 17 19:36:16 2023 -0500

    drm/amd/display: 3.2.266
    
    This version brings along following fixes:
    
    - Improve z8/z10 support.
    - Revert some of the VRR optimization.
    - Improve usb4 when using MST.
    
    Acked-by: Rodrigo Siqueira <rodrigo.siqueira at amd.com>
    Signed-off-by: Aric Cyr <aric.cyr at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 7222f63caf28..5d7aa882416b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -51,7 +51,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.265"
+#define DC_VER "3.2.266"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
commit ab76bd72ee12d9117c3a16d749ffce84f5b235bf
Author: Meenakshikumar Somasundaram <meenakshikumar.somasundaram at amd.com>
Date:   Tue Dec 19 15:51:24 2023 -0500

    drm/amd/display: Dpia hpd status not in sync after S4
    
    [Why]
    Dpia hpd status not in sync causing driver not enabling BW Alloc after
    S4.
    
    [How]
    Update hpd_status of the link when querying hpd state from dmub in
    dpia_query_hpd_status().
    
    Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>
    Acked-by: Rodrigo Siqueira <rodrigo.siqueira at amd.com>
    Signed-off-by: Meenakshikumar Somasundaram <meenakshikumar.somasundaram at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
index 982eda3c46f5..6af42ba9885c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
@@ -82,25 +82,33 @@ bool dpia_query_hpd_status(struct dc_link *link)
 {
 	union dmub_rb_cmd cmd = {0};
 	struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv;
-	bool is_hpd_high = false;
 
 	/* prepare QUERY_HPD command */
 	cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE;
 	cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1;
 	cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
 
-	/* Return HPD status reported by DMUB if query successfully executed. */
-	if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
-	    cmd.query_hpd.data.status == AUX_RET_SUCCESS)
-		is_hpd_high = cmd.query_hpd.data.result;
-
-	DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n",
-		__func__,
-		link->link_index,
-		link->link_id.enum_id - ENUM_ID_1,
-		cmd.query_hpd.data.status,
-		cmd.query_hpd.data.result);
-
-	return is_hpd_high;
+	/* Query dpia hpd status from dmub */
+	if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd,
+		DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	    cmd.query_hpd.data.status == AUX_RET_SUCCESS) {
+		DC_LOG_DEBUG("%s: for link(%d) dpia(%d) success, current_hpd_status(%d) new_hpd_status(%d)\n",
+			__func__,
+			link->link_index,
+			link->link_id.enum_id - ENUM_ID_1,
+			link->hpd_status,
+			cmd.query_hpd.data.result);
+		link->hpd_status = cmd.query_hpd.data.result;
+	} else {
+		DC_LOG_ERROR("%s: for link(%d) dpia(%d) failed with status(%d), current_hpd_status(%d) new_hpd_status(0)\n",
+			__func__,
+			link->link_index,
+			link->link_id.enum_id - ENUM_ID_1,
+			cmd.query_hpd.data.status,
+			link->hpd_status);
+		link->hpd_status = false;
+	}
+
+	return link->hpd_status;
 }
 
commit 2476bf4328d1a55db709ce9ad2c274d26040311b
Author: Charlene Liu <charlene.liu at amd.com>
Date:   Thu Dec 21 09:42:28 2023 -0500

    drm/amd/display: Update z8 latency
    
    Adjust z8 latency for performance.
    
    Reviewed-by: Muhammad Ahmed <ahmed.ahmed at amd.com>
    Acked-by: Rodrigo Siqueira <rodrigo.siqueira at amd.com>
    Signed-off-by: Charlene Liu <charlene.liu at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 3d12dabd39e4..475c4ec43c01 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -166,9 +166,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 	.num_states = 5,
 	.sr_exit_time_us = 14.0,
 	.sr_enter_plus_exit_time_us = 16.0,
-	.sr_exit_z8_time_us = 525.0,
-	.sr_enter_plus_exit_z8_time_us = 715.0,
-	.fclk_change_latency_us = 20.0,
+	.sr_exit_z8_time_us = 210.0,
+	.sr_enter_plus_exit_z8_time_us = 320.0,
+	.fclk_change_latency_us = 24.0,
 	.usr_retraining_latency_us = 2,
 	.writeback_latency_us = 12.0,
 
commit bf282eb92b84709d99186ad5940b9997eb3c1ff2
Author: Daniel Miess <daniel.miess at amd.com>
Date:   Wed Dec 20 10:34:32 2023 -0500

    Revert "drm/amd/display: Fix conversions between bytes and KB"
    
    This reverts commit d0f639c5869399bf6dde4d694d5f8c0ab8c0ec46.
    
    The previous commit causes failure to light up for 1080p
    eDP + 8k HDMI panel combo.
    
    Reviewed-by: Charlene Liu <charlene.liu at amd.com>
    Acked-by: Rodrigo Siqueira <rodrigo.siqueira at amd.com>
    Signed-off-by: Daniel Miess <daniel.miess at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index b95bf27f2fe2..a6b938a12de1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -6229,7 +6229,7 @@ static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *m
 				CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
 				CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 				CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
-				CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+				CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 				CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
 				CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 				CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
@@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 				mode_lib->ms.NoOfDPPThisState,
 				mode_lib->ms.dpte_group_bytes,
 				s->HostVMInefficiencyFactor,
-				mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+				mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 				mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 		s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
@@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 						mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
 						mode_lib->ms.MetaRowBytes[j][k],
 						mode_lib->ms.DPTEBytesPerRow[j][k],
@@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 		CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
@@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 		UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
-		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 		UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 		UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
@@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 	CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 	CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
@@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
 			locals->dpte_group_bytes,
 			s->HostVMInefficiencyFactor,
-			mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+			mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 			mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 	locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
@@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
 			CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 			CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
-			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 			CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
 			CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 			CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
@@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 						locals->PDEAndMetaPTEBytesFrame[k],
 						locals->MetaRowByte[k],
 						locals->PixelPTEBytesPerRow[k],
commit 5f3bce13266e6fe2f7a46f94d8bc94d5274e276b
Author: Peichen Huang <peichen.huang at amd.com>
Date:   Thu Dec 14 23:16:34 2023 +0800

    drm/amd/display: Request usb4 bw for mst streams
    
    [WHY]
    When usb4 bandwidth allocation mode is enabled, driver need to request
    bandwidth from connection manager. For mst link,  the requested
    bandwidth should be big enough for all remote streams.
    
    [HOW]
    - If mst link, the requested bandwidth should be the sum of all mst
      streams bandwidth added with dp MTPH overhead.
    - Allocate/deallcate usb4 bandwidth when setting dpms on/off.
    - When doing display mode validation, driver also need to consider total
      bandwidth of all mst streams for mst link.
    
    Reviewed-by: Cruise Hung <cruise.hung at amd.com>
    Acked-by: Rodrigo Siqueira <rodrigo.siqueira at amd.com>
    Signed-off-by: Peichen Huang <peichen.huang at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 4f276169e05a..b08ccb8c68bc 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1140,23 +1140,25 @@ struct dc_panel_config {
 	} ilr;
 };
 
+#define MAX_SINKS_PER_LINK 4
+
 /*
  *  USB4 DPIA BW ALLOCATION STRUCTS
  */
 struct dc_dpia_bw_alloc {
-	int sink_verified_bw;  // The Verified BW that sink can allocated and use that has been verified already
-	int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated
-	int sink_max_bw;       // The Max BW that sink can require/support
+	int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks
+	int link_verified_bw;  // The Verified BW that link can allocated and use that has been verified already
+	int link_max_bw;       // The Max BW that link can require/support
+	int allocated_bw;      // The Actual Allocated BW for this DPIA
 	int estimated_bw;      // The estimated available BW for this DPIA
 	int bw_granularity;    // BW Granularity
+	int dp_overhead;       // DP overhead in dp tunneling
 	bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3:  DP-Tx & Dpia & CM
 	bool response_ready;   // Response ready from the CM side
 	uint8_t nrd_max_lane_count; // Non-reduced max lane count
 	uint8_t nrd_max_link_rate; // Non-reduced max link rate
 };
 
-#define MAX_SINKS_PER_LINK 4
-
 enum dc_hpd_enable_select {
 	HPD_EN_FOR_ALL_EDP = 0,
 	HPD_EN_FOR_PRIMARY_EDP_ONLY,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 5fe8b4871c77..3de148004c06 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -2005,17 +2005,11 @@ static enum dc_status enable_link_dp(struct dc_state *state,
 		}
 	}
 
-	/*
-	 * If the link is DP-over-USB4 do the following:
-	 * - Train with fallback when enabling DPIA link. Conventional links are
+	/* Train with fallback when enabling DPIA link. Conventional links are
 	 * trained with fallback during sink detection.
-	 * - Allocate only what the stream needs for bw in Gbps. Inform the CM
-	 * in case stream needs more or less bw from what has been allocated
-	 * earlier at plug time.
 	 */
-	if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+	if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
 		do_fallback = true;
-	}
 
 	/*
 	 * Temporary w/a to get DP2.0 link rates to work with SST.
@@ -2197,6 +2191,32 @@ static enum dc_status enable_link(
 	return status;
 }
 
+static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw)
+{
+	return true;
+}
+
+static bool allocate_usb4_bandwidth(struct dc_stream_state *stream)
+{
+	bool ret;
+
+	int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing,
+			dc_link_get_highest_encoding_format(stream->sink->link));
+
+	ret = allocate_usb4_bandwidth_for_stream(stream, bw);
+
+	return ret;
+}
+
+static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream)
+{
+	bool ret;
+
+	ret = allocate_usb4_bandwidth_for_stream(stream, 0);
+
+	return ret;
+}
+
 void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
 {
 	struct dc  *dc = pipe_ctx->stream->ctx->dc;
@@ -2232,6 +2252,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
 	update_psp_stream_config(pipe_ctx, true);
 	dc->hwss.blank_stream(pipe_ctx);
 
+	if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+		deallocate_usb4_bandwidth(pipe_ctx->stream);
+
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		deallocate_mst_payload(pipe_ctx);
 	else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
@@ -2474,6 +2497,9 @@ void link_set_dpms_on(
 		}
 	}
 
+	if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+		allocate_usb4_bandwidth(pipe_ctx->stream);
+
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		allocate_mst_payload(pipe_ctx);
 	else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
index b45fda96eaf6..8fe66c367850 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
@@ -346,23 +346,61 @@ enum dc_status link_validate_mode_timing(
 	return DC_OK;
 }
 
+/*
+ * This function calculates the bandwidth required for the stream timing
+ * and aggregates the stream bandwidth for the respective dpia link
+ *
+ * @stream: pointer to the dc_stream_state struct instance
+ * @num_streams: number of streams to be validated
+ *
+ * return: true if validation is succeeded
+ */
 bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams)
 {
-	bool ret = true;
-	int bw_needed[MAX_DPIA_NUM];
-	struct dc_link *link[MAX_DPIA_NUM];
-
-	if (!num_streams || num_streams > MAX_DPIA_NUM)
-		return ret;
+	int bw_needed[MAX_DPIA_NUM] = {0};
+	struct dc_link *dpia_link[MAX_DPIA_NUM] = {0};
+	int num_dpias = 0;
 
 	for (uint8_t i = 0; i < num_streams; ++i) {
+		if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) {
+			/* new dpia sst stream, check whether it exceeds max dpia */
+			if (num_dpias >= MAX_DPIA_NUM)
+				return false;
 
-		link[i] = stream[i].link;
-		bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
-				dc_link_get_highest_encoding_format(link[i]));
+			dpia_link[num_dpias] = stream[i].link;
+			bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
+					dc_link_get_highest_encoding_format(dpia_link[num_dpias]));
+			num_dpias++;
+		} else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+			uint8_t j = 0;
+			/* check whether its a known dpia link */
+			for (; j < num_dpias; ++j) {
+				if (dpia_link[j] == stream[i].link)
+					break;
+			}
+
+			if (j == num_dpias) {
+				/* new dpia mst stream, check whether it exceeds max dpia */
+				if (num_dpias >= MAX_DPIA_NUM)
+					return false;
+				else {
+					dpia_link[j] = stream[i].link;
+					num_dpias++;
+				}
+			}
+
+			bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
+				dc_link_get_highest_encoding_format(dpia_link[j]));
+		}
 	}
 
-	ret = dpia_validate_usb4_bw(link, bw_needed, num_streams);
+	/* Include dp overheads */
+	for (uint8_t i = 0; i < num_dpias; ++i) {
+		int dp_overhead = 0;
+
+		dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]);
+		bw_needed[i] += dp_overhead;
+	}
 
-	return ret;
+	return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
index a7aa8c9da868..4ef1a6a1d129 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -54,12 +54,18 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp)
 static void reset_bw_alloc_struct(struct dc_link *link)
 {
 	link->dpia_bw_alloc_config.bw_alloc_enabled = false;
-	link->dpia_bw_alloc_config.sink_verified_bw = 0;
-	link->dpia_bw_alloc_config.sink_max_bw = 0;
+	link->dpia_bw_alloc_config.link_verified_bw = 0;
+	link->dpia_bw_alloc_config.link_max_bw = 0;
+	link->dpia_bw_alloc_config.allocated_bw = 0;
 	link->dpia_bw_alloc_config.estimated_bw = 0;
 	link->dpia_bw_alloc_config.bw_granularity = 0;
+	link->dpia_bw_alloc_config.dp_overhead = 0;
 	link->dpia_bw_alloc_config.response_ready = false;
-	link->dpia_bw_alloc_config.sink_allocated_bw = 0;
+	link->dpia_bw_alloc_config.nrd_max_lane_count = 0;
+	link->dpia_bw_alloc_config.nrd_max_link_rate = 0;
+	for (int i = 0; i < MAX_SINKS_PER_LINK; i++)
+		link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0;
+	DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index);
 }
 
 #define BW_GRANULARITY_0 4 // 0.25 Gbps
@@ -210,8 +216,8 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in
 				link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) &&
 				(link_dpia_secondary->hpd_status &&
 				link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) {
-				total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
-					link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw;
+					total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
+						link_dpia_secondary->dpia_bw_alloc_config.allocated_bw;
 			} else if (link_dpia_primary->hpd_status &&
 					link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) {
 				total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw;
@@ -264,7 +270,7 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw)
 
 	/* Error check whether requested and allocated are equal */
 	req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
-	if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) {
+	if (req_bw == link->dpia_bw_alloc_config.allocated_bw) {
 		DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n",
 			__func__, link->link_index);
 	}
@@ -387,9 +393,9 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
 		DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n",
 			__func__, link->link_index);
 		DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n",
-			__func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed);
+			__func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed);
 
-		link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed;
+		link->dpia_bw_alloc_config.allocated_bw = bw_needed;
 
 		link->dpia_bw_alloc_config.response_ready = true;
 		break;
@@ -427,8 +433,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 	if (link->hpd_status && peak_bw > 0) {
 
 		// If DP over USB4 then we need to check BW allocation
-		link->dpia_bw_alloc_config.sink_max_bw = peak_bw;
-		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw);
+		link->dpia_bw_alloc_config.link_max_bw = peak_bw;
+		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw);
 
 		do {
 			if (timeout > 0)
@@ -440,8 +446,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 
 		if (!timeout)
 			ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
-		else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
-			ret = link->dpia_bw_alloc_config.sink_allocated_bw;
+		else if (link->dpia_bw_alloc_config.allocated_bw > 0)
+			ret = link->dpia_bw_alloc_config.allocated_bw;
 	}
 	//2. Cold Unplug
 	else if (!link->hpd_status)
@@ -450,7 +456,6 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 out:
 	return ret;
 }
-
 bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
 {
 	bool ret = false;
@@ -458,7 +463,7 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r
 
 	DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n",
 		__func__, link->link_index, link->hpd_status,
-		link->dpia_bw_alloc_config.sink_allocated_bw, req_bw);
+		link->dpia_bw_alloc_config.allocated_bw, req_bw);
 
 	if (!get_bw_alloc_proceed_flag(link))
 		goto out;
@@ -523,3 +528,29 @@ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const
 
 	return ret;
 }
+
+int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link)
+{
+	int dp_overhead = 0, link_mst_overhead = 0;
+
+	if (!get_bw_alloc_proceed_flag((link)))
+		return dp_overhead;
+
+	/* if its mst link, add MTPH overhead */
+	if ((link->type == dc_connection_mst_branch) &&
+		!link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
+		/* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH
+		 * MST overhead is 1/64 of link bandwidth (excluding any overhead)
+		 */
+		const struct dc_link_settings *link_cap =
+			dc_link_get_link_cap(link);
+		uint32_t link_bw_in_kbps =
+			link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8;
+		link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0);
+	}
+
+	/* add all the overheads */
+	dp_overhead = link_mst_overhead;
+
+	return dp_overhead;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
index 981bc4eb6120..3b6d8494f9d5 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
@@ -99,4 +99,13 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
  */
 bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias);
 
+/*
+ * Obtain all the DP overheads in dp tunneling for the dpia link
+ *
+ * @link: pointer to the dc_link struct instance
+ *
+ * return: DP overheads in DP tunneling
+ */
+int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link);
+
 #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */
commit a465536ebff88fcc42e131a1b09bbe3df829117b
Author: Martin Leung <martin.leung at amd.com>
Date:   Wed Dec 20 16:56:11 2023 -0500

    drm/amd/display: revert "Optimize VRR updates to only necessary ones"
    
    This reverts commit 6e4337f695c25162f0296934152506ad596fcebf.
    
    The original commit causes regression in corner case with HDMI at
    specific timings. Reverting from staging to get the full suite to
    retest.
    
    Reviewed-by: Rodrigo Siqueira <rodrigo.siqueira at amd.com>
    Signed-off-by: Martin Leung <martin.leung at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 2d7205058c64..69e726630241 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -411,12 +411,9 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
 	 * avoid conflicting with firmware updates.
 	 */
 	if (dc->ctx->dce_version > DCE_VERSION_MAX)
-		if (dc->optimized_required)
+		if (dc->optimized_required || dc->wm_optimized_required)
 			return false;
 
-	if (!memcmp(&stream->adjust, adjust, sizeof(*adjust)))
-		return true;
-
 	stream->adjust.v_total_max = adjust->v_total_max;
 	stream->adjust.v_total_mid = adjust->v_total_mid;
 	stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num;
@@ -2230,6 +2227,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
 	}
 
 	dc->optimized_required = false;
+	dc->wm_optimized_required = false;
 }
 
 bool dc_set_generic_gpio_for_stereo(bool enable,
@@ -2652,6 +2650,8 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
 		} else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) {
 			dc->optimized_required = true;
 		}
+
+		dc->optimized_required |= dc->wm_optimized_required;
 	}
 
 	return type;
@@ -2859,6 +2859,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
 	if (update->vrr_active_fixed)
 		stream->vrr_active_fixed = *update->vrr_active_fixed;
 
+	if (update->crtc_timing_adjust)
+		stream->adjust = *update->crtc_timing_adjust;
+
 	if (update->dpms_off)
 		stream->dpms_off = *update->dpms_off;
 
@@ -4288,7 +4291,8 @@ static bool full_update_required(struct dc *dc,
 			stream_update->mst_bw_update ||
 			stream_update->func_shaper ||
 			stream_update->lut3d_func ||
-			stream_update->pending_test_pattern))
+			stream_update->pending_test_pattern ||
+			stream_update->crtc_timing_adjust))
 		return true;
 
 	if (stream) {
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index f30a341bc090..7222f63caf28 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1036,6 +1036,7 @@ struct dc {
 
 	/* Require to optimize clocks and bandwidth for added/removed planes */
 	bool optimized_required;
+	bool wm_optimized_required;
 	bool idle_optimizations_allowed;
 	bool enable_c20_dtm_b0;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index a23eebd9933b..ee10941caa59 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -139,6 +139,7 @@ union stream_update_flags {
 		uint32_t wb_update:1;
 		uint32_t dsc_changed : 1;
 		uint32_t mst_bw : 1;
+		uint32_t crtc_timing_adjust : 1;
 		uint32_t fams_changed : 1;
 	} bits;
 
@@ -325,6 +326,7 @@ struct dc_stream_update {
 	struct dc_3dlut *lut3d_func;
 
 	struct test_pattern *pending_test_pattern;
+	struct dc_crtc_timing_adjust *crtc_timing_adjust;
 };
 
 bool dc_is_stream_unchanged(
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 51dd2ae09b2a..6dd479e8a348 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -3076,7 +3076,7 @@ void dcn10_prepare_bandwidth(
 			context,
 			false);
 
-	dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+	dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub,
 			&context->bw_ctx.bw.dcn.watermarks,
 			dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 			true);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 0dfcb3cdcd20..e931342fcf4c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -2159,10 +2159,10 @@ void dcn20_prepare_bandwidth(
 	}
 
 	/* program dchubbub watermarks:
-	 * For assigning optimized_required, use |= operator since we don't want
+	 * For assigning wm_optimized_required, use |= operator since we don't want
 	 * to clear the value if the optimize has not happened yet
 	 */
-	dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+	dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub,
 					&context->bw_ctx.bw.dcn.watermarks,
 					dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 					false);
@@ -2175,10 +2175,10 @@ void dcn20_prepare_bandwidth(
 	if (hubbub->funcs->program_compbuf_size) {
 		if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) {
 			compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes;
-			dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
+			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
 		} else {
 			compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb;
-			dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
+			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
 		}
 
 		hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false);
commit 51c7e6ac24101af3147ebc45627810da367c6b66
Author: Martin Leung <martin.leung at amd.com>
Date:   Wed Dec 20 16:54:05 2023 -0500

    drm/amd/display: revert "for FPO & SubVP/DRR config program vmin/max"
    
    This reverts commit 6b2b782ad6a25734ae847d1659bea3f613dbb563.
    
    The original commit causes issues with certain features when DRR is
    disabled, need to revisit this change later after resolving issues with
    new DRR policy.
    
    Reviewed-by: Rodrigo Siqueira <rodrigo.siqueira at amd.com>
    Signed-off-by: Martin Leung <martin.leung at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 57f0ddd15923..f2abc1096ffb 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -4986,20 +4986,6 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
 	return DC_OK;
 }
 
-bool resource_subvp_in_use(struct dc *dc,
-		struct dc_state *context)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
-			return true;
-	}
-	return false;
-}
-
 bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream)
 {
 	if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index e4a328b45c8a..87760600e154 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -183,6 +183,20 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 	return true;
 }
 
+bool dcn32_subvp_in_use(struct dc *dc,
+		struct dc_state *context)
+{
+	uint32_t i;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
+			return true;
+	}
+	return false;
+}
+
 bool dcn32_mpo_in_use(struct dc_state *context)
 {
 	uint32_t i;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index aa68d010cbfd..9f37f717a1f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -33,7 +33,6 @@
 #include "dcn30/dcn30_resource.h"
 #include "link.h"
 #include "dc_state_priv.h"
-#include "resource.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -292,7 +291,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 		/* for subvp + DRR case, if subvp pipes are still present we support pstate */
 		if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported &&
-				resource_subvp_in_use(dc, context))
+				dcn32_subvp_in_use(dc, context))
 			vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
 
 		if (vlevel < context->bw_ctx.dml.vba.soc.num_states &&
@@ -2273,7 +2272,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	unsigned int dummy_latency_index = 0;
 	int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
 	unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
-	bool subvp_active = resource_subvp_in_use(dc, context);
+	bool subvp_in_use = dcn32_subvp_in_use(dc, context);
 	unsigned int min_dram_speed_mts_margin;
 	bool need_fclk_lat_as_dummy = false;
 	bool is_subvp_p_drr = false;
@@ -2282,7 +2281,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	dc_assert_fp_enabled();
 
 	/* need to find dummy latency index for subvp */
-	if (subvp_active) {
+	if (subvp_in_use) {
 		/* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */
 		if (!pstate_en) {
 			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
@@ -2468,7 +2467,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 				dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
 		}
 
-		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_active) {
+		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) {
 			/* find largest table entry that is lower than dram speed,
 			 * but lower than DPM0 still uses DPM0
 			 */
@@ -3528,7 +3527,7 @@ void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb)
 void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context)
 {
 	// WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue)
-	if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || resource_subvp_in_use(dc, context)) &&
+	if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) &&
 			dc->dml.soc.num_chans <= 8) {
 		int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels;
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index bc71a9b058fe..0dfcb3cdcd20 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -1882,42 +1882,6 @@ static void dcn20_program_pipe(
 	}
 }
 
-static void update_vmin_vmax_fams(struct dc *dc,
-		struct dc_state *context)
-{
-	uint32_t i;
-	struct drr_params params = {0};
-	bool subvp_in_use = resource_subvp_in_use(dc, context);
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
-				((subvp_in_use && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM &&
-				pipe->stream->allow_freesync) || (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && pipe->stream->fpo_in_use))) {
-			if (!pipe->stream->vrr_active_variable && !pipe->stream->vrr_active_fixed) {
-				struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg;
-
-				/* DRR should be configured already if we're in active variable
-				 * or active fixed, so only program if we're not in this state
-				 */
-				params.vertical_total_min = pipe->stream->timing.v_total;
-				params.vertical_total_max = pipe->stream->timing.v_total;
-				tg->funcs->set_drr(tg, &params);
-			}
-		} else {
-			if (resource_is_pipe_type(pipe, OTG_MASTER) &&
-					!pipe->stream->vrr_active_variable &&
-					!pipe->stream->vrr_active_fixed) {
-				struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg;
-				params.vertical_total_min = 0;
-				params.vertical_total_max = 0;
-				tg->funcs->set_drr(tg, &params);
-			}
-		}
-	}
-}
-
 void dcn20_program_front_end_for_ctx(
 		struct dc *dc,
 		struct dc_state *context)
@@ -1994,7 +1958,6 @@ void dcn20_program_front_end_for_ctx(
 				&& context->res_ctx.pipe_ctx[i].stream)
 			hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
 
-	update_vmin_vmax_fams(dc, context);
 
 	/* Disconnect mpcc */
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 1d51fed12e20..c958ef37b78a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -609,9 +609,6 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
 		struct pipe_ctx *sec_pipe,
 		bool odm);
 
-bool resource_subvp_in_use(struct dc *dc,
-		struct dc_state *context);
-
 /* A test harness interface that modifies dp encoder resources in the given dc
  * state and bypasses the need to revalidate. The interface assumes that the
  * test harness interface is called with pre-validated link config stored in the
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index ac04a9c9a3d8..c4d71e7f18af 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -1899,7 +1899,7 @@ int dcn32_populate_dml_pipes_from_context(
 
 static struct dc_cap_funcs cap_funcs = {
 	.get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
-	.get_subvp_en = resource_subvp_in_use,
+	.get_subvp_en = dcn32_subvp_in_use,
 };
 
 void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index 62611acd4bcb..0c87b0fabba7 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -131,6 +131,9 @@ void dcn32_merge_pipes_for_subvp(struct dc *dc,
 bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 		struct dc_state *context);
 
+bool dcn32_subvp_in_use(struct dc *dc,
+		struct dc_state *context);
+
 bool dcn32_mpo_in_use(struct dc_state *context);
 
 bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index e1ab207c46f1..74412e5f03fe 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -1574,7 +1574,7 @@ static void dcn321_destroy_resource_pool(struct resource_pool **pool)
 
 static struct dc_cap_funcs cap_funcs = {
 	.get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
-	.get_subvp_en = resource_subvp_in_use,
+	.get_subvp_en = dcn32_subvp_in_use,
 };
 
 static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
commit 7bdbfb4e36e34eb788e44f27666bf0a2b3b90803
Author: George Shen <george.shen at amd.com>
Date:   Sun Dec 17 17:17:57 2023 -0500

    drm/amd/display: Disconnect phantom pipe OPP from OPTC being disabled
    
    [Why]
    If an OPP is used for a different OPTC without first being disconnected
    from the previous OPTC, unexpected behaviour can occur. This also
    applies to phantom pipes, which is what the current logic missed.
    
    [How]
    Disconnect OPPs from OPTC for phantom pipes before disabling OTG master.
    
    Also move the disconnection to before the OTG master disable, since the
    register is double buffered.
    
    Reviewed-by: Dillon Varone <dillon.varone at amd.com>
    Acked-by: Rodrigo Siqueira <rodrigo.siqueira at amd.com>
    Signed-off-by: George Shen <george.shen at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index 91ea0d4da06a..1788eb29474b 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -166,12 +166,6 @@ static bool optc32_disable_crtc(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
-	/* disable otg request until end of the first line
-	 * in the vertical blank region
-	 */
-	REG_UPDATE(OTG_CONTROL,
-			OTG_MASTER_EN, 0);
-
 	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
 			OPTC_SEG0_SRC_SEL, 0xf,
 			OPTC_SEG1_SRC_SEL, 0xf,
@@ -179,6 +173,12 @@ static bool optc32_disable_crtc(struct timing_generator *optc)
 			OPTC_SEG3_SRC_SEL, 0xf,
 			OPTC_NUM_OF_INPUT_SEGMENT, 0);
 
+	/* disable otg request until end of the first line
+	 * in the vertical blank region
+	 */
+	REG_UPDATE(OTG_CONTROL,
+			OTG_MASTER_EN, 0);
+
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 0);
 
@@ -205,6 +205,13 @@ static void optc32_disable_phantom_otg(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
+	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+			OPTC_SEG0_SRC_SEL, 0xf,
+			OPTC_SEG1_SRC_SEL, 0xf,
+			OPTC_SEG2_SRC_SEL, 0xf,
+			OPTC_SEG3_SRC_SEL, 0xf,
+			OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
 	REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0);
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
index 08a59cf449ca..3d6c1b2c2b4d 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
@@ -138,12 +138,6 @@ static bool optc35_disable_crtc(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
-	/* disable otg request until end of the first line
-	 * in the vertical blank region
-	 */
-	REG_UPDATE(OTG_CONTROL,
-			OTG_MASTER_EN, 0);
-
 	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
 			OPTC_SEG0_SRC_SEL, 0xf,
 			OPTC_SEG1_SRC_SEL, 0xf,
@@ -151,6 +145,12 @@ static bool optc35_disable_crtc(struct timing_generator *optc)
 			OPTC_SEG3_SRC_SEL, 0xf,
 			OPTC_NUM_OF_INPUT_SEGMENT, 0);
 
+	/* disable otg request until end of the first line
+	 * in the vertical blank region
+	 */
+	REG_UPDATE(OTG_CONTROL,
+			OTG_MASTER_EN, 0);
+
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 0);
 
commit 17e74e11ac2b46e7514705ae7abfb93ac0e20bd6
Author: Martin Tsai <martin.tsai at amd.com>
Date:   Mon Dec 18 16:36:44 2023 +0800

    drm/amd/display: To adjust dprefclk by down spread percentage
    
    [Why]
    Panels show corruption with high refresh rate timings when ssc is
    enabled.
    
    [How]
    Read down-spread percentage from lut to adjust dprefclk. Issues come
    from S0i3 with this commit has been fixed by SMU.
    
    Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>
    Acked-by: Rodrigo Siqueira <rodrigo.siqueira at amd.com>
    Signed-off-by: Martin Tsai <martin.tsai at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 757528256326..878c0e7b78ab 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -87,6 +87,20 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0,
 #define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK	0x0000F000L
 #define CLK1_CLK_PLL_REQ__FbMult_frac_MASK	0xFFFF0000L
 
+#define regCLK1_CLK2_BYPASS_CNTL			0x029c
+#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX	0
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT	0x0
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT	0x10
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK		0x00000007L
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK		0x000F0000L
+
+#define regCLK6_0_CLK6_spll_field_8				0x464b
+#define regCLK6_0_CLK6_spll_field_8_BASE_IDX	0
+
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en__SHIFT	0xd
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en_MASK		0x00002000L
+
 #define REG(reg_name) \
 	(CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
 
@@ -160,6 +174,37 @@ static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state
 	}
 }
 
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	uint32_t ssc_enable;
+
+	REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+	return ssc_enable == 1;
+}
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr)
+{
+	struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+	uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+
+	memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+	// Assumption is that boot state always supports pstate
+	clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk;	// restore ref_dtbclk
+	clk_mgr->clks.p_state_change_support = true;
+	clk_mgr->clks.prev_p_state_change_support = true;
+	clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
+	clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
+
+	// to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+	if (dcn314_is_spll_ssc_enabled(clk_mgr))
+		clk_mgr->dp_dto_source_clock_in_khz =
+			dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+	else
+		clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+}
+
 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 			struct dc_state *context,
 			bool safe_to_lower)
@@ -436,6 +481,11 @@ static DpmClocks314_t dummy_clocks;
 
 static struct dcn314_watermarks dummy_wms = { 0 };
 
+static struct dcn314_ss_info_table ss_info_table = {
+	.ss_divider = 1000,
+	.ss_percentage = {0, 0, 375, 375, 375}
+};
+
 static void dcn314_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn314_watermarks *table)
 {
 	int i, num_valid_sets;
@@ -708,13 +758,31 @@ static struct clk_mgr_funcs dcn314_funcs = {
 	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
 	.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
 	.update_clocks = dcn314_update_clocks,
-	.init_clocks = dcn31_init_clocks,
+	.init_clocks = dcn314_init_clocks,
 	.enable_pme_wa = dcn314_enable_pme_wa,
 	.are_clock_states_equal = dcn314_are_clock_states_equal,
 	.notify_wm_ranges = dcn314_notify_wm_ranges
 };
 extern struct clk_mgr_funcs dcn3_fpga_funcs;
 
+static void dcn314_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+	uint32_t clock_source;
+	//uint32_t ssc_enable;
+
+	REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+	//REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+	if (dcn314_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+		clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+		if (clk_mgr->dprefclk_ss_percentage != 0) {
+			clk_mgr->ss_on_dprefclk = true;
+			clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+		}
+	}
+}
+
 void dcn314_clk_mgr_construct(
 		struct dc_context *ctx,
 		struct clk_mgr_dcn314 *clk_mgr,
@@ -782,6 +850,7 @@ void dcn314_clk_mgr_construct(
 	clk_mgr->base.base.dprefclk_khz = 600000;
 	clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
 	dce_clock_read_ss_info(&clk_mgr->base);
+	dcn314_read_ss_info_from_lut(&clk_mgr->base);
 	/*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/
 
 	clk_mgr->base.base.bw_params = &dcn314_bw_params;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
index 171f84340eb2..002c28e80720 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
@@ -28,6 +28,8 @@
 #define __DCN314_CLK_MGR_H__
 #include "clk_mgr_internal.h"
 
+#define DCN314_NUM_CLOCK_SOURCES   5
+
 struct dcn314_watermarks;
 
 struct dcn314_smu_watermark_set {
@@ -40,9 +42,18 @@ struct clk_mgr_dcn314 {
 	struct dcn314_smu_watermark_set smu_wm_set;
 };
 
+struct dcn314_ss_info_table {
+	uint32_t ss_divider;
+	uint32_t ss_percentage[DCN314_NUM_CLOCK_SOURCES];
+};
+
 bool dcn314_are_clock_states_equal(struct dc_clocks *a,
 		struct dc_clocks *b);
 
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base);
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr);
+
 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 			struct dc_state *context,
 			bool safe_to_lower);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
index 140598f18bbd..f0458b8f00af 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@@ -782,7 +782,7 @@ static void get_azalia_clock_info_dp(
 	/*audio_dto_module = dpDtoSourceClockInkhz * 10,000;
 	 *  [khz] ->[100Hz] */
 	azalia_clock_info->audio_dto_module =
-		pll_info->dp_dto_source_clock_in_khz * 10;
+		pll_info->audio_dto_source_clock_in_khz * 10;
 }
 
 void dce_aud_wall_dto_setup(
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 5d3f6fa1011e..970644b695cd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -975,6 +975,9 @@ static bool dcn31_program_pix_clk(
 			look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10);
 	struct bp_pixel_clock_parameters bp_pc_params = {0};
 	enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+
+	if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+		dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
 	// For these signal types Driver to program DP_DTO without calling VBIOS Command table
 	if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
 		if (e) {
@@ -1088,6 +1091,10 @@ static bool get_pixel_clk_frequency_100hz(
 	struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
 	unsigned int clock_hz = 0;
 	unsigned int modulo_hz = 0;
+	unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
+
+	if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+		dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
 
 	if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
 		clock_hz = REG_READ(PHASE[inst]);
@@ -1100,7 +1107,7 @@ static bool get_pixel_clk_frequency_100hz(
 			modulo_hz = REG_READ(MODULO[inst]);
 			if (modulo_hz)
 				*pixel_clk_khz = div_u64((uint64_t)clock_hz*
-					clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
+					dp_dto_ref_khz*10,
 					modulo_hz);
 			else
 				*pixel_clk_khz = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index fb328cd06cea..5660f15da291 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -1354,7 +1354,7 @@ static void build_audio_output(
 	if (state->clk_mgr &&
 		(pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
 			pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) {
-		audio_output->pll_info.dp_dto_source_clock_in_khz =
+		audio_output->pll_info.audio_dto_source_clock_in_khz =
 				state->clk_mgr->funcs->get_dp_ref_clk_frequency(
 						state->clk_mgr);
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index cbba39d251e5..17e014d3bdc8 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -333,6 +333,7 @@ struct clk_mgr {
 	bool force_smu_not_present;
 	bool dc_mode_softmax_enabled;
 	int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes
+	int dp_dto_source_clock_in_khz; // Used to program DP DTO with ss adjustment on DCN314
 	int dentist_vco_freq_khz;
 	struct clk_state_registers_and_bypass boot_snapshot;
 	struct clk_bw_params *bw_params;
diff --git a/drivers/gpu/drm/amd/display/include/audio_types.h b/drivers/gpu/drm/amd/display/include/audio_types.h
index 66a54da0641c..915a031a43cb 100644
--- a/drivers/gpu/drm/amd/display/include/audio_types.h
+++ b/drivers/gpu/drm/amd/display/include/audio_types.h
@@ -64,7 +64,7 @@ enum audio_dto_source {
 /* PLL information required for AZALIA DTO calculation */
 
 struct audio_pll_info {
-	uint32_t dp_dto_source_clock_in_khz;
+	uint32_t audio_dto_source_clock_in_khz;
 	uint32_t feed_back_divider;
 	enum audio_dto_source dto_source;
 	bool ss_enabled;
commit 47bf0f83fc86df1bf42b385a91aadb910137c5c9
Author: Felix Kuehling <felix.kuehling at amd.com>
Date:   Tue Jan 2 15:07:44 2024 -0500

    drm/amdkfd: Fix lock dependency warning
    
    ======================================================
    WARNING: possible circular locking dependency detected
    6.5.0-kfd-fkuehlin #276 Not tainted
    ------------------------------------------------------
    kworker/8:2/2676 is trying to acquire lock:
    ffff9435aae95c88 ((work_completion)(&svm_bo->eviction_work)){+.+.}-{0:0}, at: __flush_work+0x52/0x550
    
    but task is already holding lock:
    ffff9435cd8e1720 (&svms->lock){+.+.}-{3:3}, at: svm_range_deferred_list_work+0xe8/0x340 [amdgpu]
    
    which lock already depends on the new lock.
    
    the existing dependency chain (in reverse order) is:
    
    -> #2 (&svms->lock){+.+.}-{3:3}:
           __mutex_lock+0x97/0xd30
           kfd_ioctl_alloc_memory_of_gpu+0x6d/0x3c0 [amdgpu]
           kfd_ioctl+0x1b2/0x5d0 [amdgpu]
           __x64_sys_ioctl+0x86/0xc0
           do_syscall_64+0x39/0x80
           entry_SYSCALL_64_after_hwframe+0x63/0xcd
    
    -> #1 (&mm->mmap_lock){++++}-{3:3}:
           down_read+0x42/0x160
           svm_range_evict_svm_bo_worker+0x8b/0x340 [amdgpu]
           process_one_work+0x27a/0x540
           worker_thread+0x53/0x3e0
           kthread+0xeb/0x120
           ret_from_fork+0x31/0x50
           ret_from_fork_asm+0x11/0x20
    
    -> #0 ((work_completion)(&svm_bo->eviction_work)){+.+.}-{0:0}:
           __lock_acquire+0x1426/0x2200
           lock_acquire+0xc1/0x2b0
           __flush_work+0x80/0x550
           __cancel_work_timer+0x109/0x190
           svm_range_bo_release+0xdc/0x1c0 [amdgpu]
           svm_range_free+0x175/0x180 [amdgpu]
           svm_range_deferred_list_work+0x15d/0x340 [amdgpu]
           process_one_work+0x27a/0x540
           worker_thread+0x53/0x3e0
           kthread+0xeb/0x120
           ret_from_fork+0x31/0x50
           ret_from_fork_asm+0x11/0x20
    
    other info that might help us debug this:
    
    Chain exists of:
      (work_completion)(&svm_bo->eviction_work) --> &mm->mmap_lock --> &svms->lock
    
     Possible unsafe locking scenario:
    
           CPU0                    CPU1
           ----                    ----
      lock(&svms->lock);
                                   lock(&mm->mmap_lock);
                                   lock(&svms->lock);
      lock((work_completion)(&svm_bo->eviction_work));
    
    I believe this cannot really lead to a deadlock in practice, because
    svm_range_evict_svm_bo_worker only takes the mmap_read_lock if the BO
    refcount is non-0. That means it's impossible that svm_range_bo_release
    is running concurrently. However, there is no good way to annotate this.
    
    To avoid the problem, take a BO reference in
    svm_range_schedule_evict_svm_bo instead of in the worker. That way it's
    impossible for a BO to get freed while eviction work is pending and the
    cancel_work_sync call in svm_range_bo_release can be eliminated.
    
    v2: Use svm_bo_ref_unless_zero and explained why that's safe. Also
    removed redundant checks that are already done in
    amdkfd_fence_enable_signaling.
    
    Signed-off-by: Felix Kuehling <felix.kuehling at amd.com>
    Reviewed-by: Philip Yang <philip.yang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index ac84c4a2ca07..d46c145835e0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -404,14 +404,9 @@ static void svm_range_bo_release(struct kref *kref)
 		spin_lock(&svm_bo->list_lock);
 	}
 	spin_unlock(&svm_bo->list_lock);
-	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
-		/* We're not in the eviction worker.
-		 * Signal the fence and synchronize with any
-		 * pending eviction work.
-		 */
+	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
+		/* We're not in the eviction worker. Signal the fence. */
 		dma_fence_signal(&svm_bo->eviction_fence->base);
-		cancel_work_sync(&svm_bo->eviction_work);
-	}
 	dma_fence_put(&svm_bo->eviction_fence->base);
 	amdgpu_bo_unref(&svm_bo->bo);
 	kfree(svm_bo);
@@ -3432,13 +3427,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
 
 int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
 {
-	if (!fence)
-		return -EINVAL;
-
-	if (dma_fence_is_signaled(&fence->base))
-		return 0;
-
-	if (fence->svm_bo) {
+	/* Dereferencing fence->svm_bo is safe here because the fence hasn't
+	 * signaled yet and we're under the protection of the fence->lock.
+	 * After the fence is signaled in svm_range_bo_release, we cannot get
+	 * here any more.
+	 *
+	 * Reference is dropped in svm_range_evict_svm_bo_worker.
+	 */
+	if (svm_bo_ref_unless_zero(fence->svm_bo)) {
 		WRITE_ONCE(fence->svm_bo->evicting, 1);
 		schedule_work(&fence->svm_bo->eviction_work);
 	}
@@ -3453,8 +3449,6 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 	int r = 0;
 
 	svm_bo = container_of(work, struct svm_range_bo, eviction_work);
-	if (!svm_bo_ref_unless_zero(svm_bo))
-		return; /* svm_bo was freed while eviction was pending */
 
 	if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
 		mm = svm_bo->eviction_fence->mm;