[openchrome-devel] drm-openchrome: Branch 'drm-next-5.19' - 63 commits - drivers/gpu/drm drivers/gpu/host1x drivers/gpu/Makefile include/linux/host1x_context_bus.h include/uapi/drm

Kevin Brace kevinbrace at kemper.freedesktop.org
Fri Jun 3 21:53:23 UTC 2022


 drivers/gpu/Makefile                                               |    3 
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c                   |    2 
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c                   |    8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c                             |    4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c                            |  223 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h                            |   23 
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c                         |   10 
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c                      |   19 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c                            |    6 
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c                         |   68 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c                            |    9 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c                            |   29 
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c                            |    2 
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c                         |    5 
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h                         |    1 
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c                            |    2 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c                            |   42 
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c                           |    4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c                            |    2 
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c                             |   48 
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c                             |    2 
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c                              |    6 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c                              |   21 
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c                             |    3 
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c                             |    3 
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c                             |   15 
 drivers/gpu/drm/amd/amdgpu/soc21.c                                 |    1 
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h                     | 2974 +++++-----
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm             |  394 -
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm              |  244 
 drivers/gpu/drm/amd/amdkfd/kfd_device.c                            |    6 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c                               |    2 
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c                          |   22 
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c                  |   19 
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c          |    2 
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c     |    3 
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c     |    3 
 drivers/gpu/drm/amd/display/dc/core/dc.c                           |   15 
 drivers/gpu/drm/amd/display/dc/core/dc_link.c                      |   24 
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c                   |  248 
 drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c                 |   19 
 drivers/gpu/drm/amd/display/dc/core/dc_surface.c                   |    2 
 drivers/gpu/drm/amd/display/dc/dc.h                                |    6 
 drivers/gpu/drm/amd/display/dc/dc_link.h                           |    2 
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.c                       |    3 
 drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c              |    9 
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c          |   27 
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c                 |   17 
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c                  |   10 
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c              |  516 -
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h              |    5 
 drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c            |    2 
 drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c            |    2 
 drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c            |    2 
 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c              |    2 
 drivers/gpu/drm/amd/display/dc/dml/Makefile                        |    3 
 drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c               |  617 ++
 drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h               |   67 
 drivers/gpu/drm/amd/display/dc/inc/core_types.h                    |    7 
 drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h                    |    3 
 drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h                  |    5 
 drivers/gpu/drm/amd/display/include/link_service_types.h           |    6 
 drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h        |    2 
 drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h        |    2 
 drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h        |    2 
 drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h        |    2 
 drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h         |    2 
 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h         |    2 
 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h       |    2 
 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h       |    4 
 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h       |    2 
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c                                |    3 
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c                          |    1 
 drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h |   22 
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h                       |    2 
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c            |   57 
 drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c                 |    1 
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c                     |   47 
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c               |    8 
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c               |   62 
 drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c               |    3 
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c                        |    3 
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c                |    4 
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c                            |    8 
 drivers/gpu/drm/msm/dp/dp_ctrl.c                                   |    9 
 drivers/gpu/drm/msm/msm_mdss.c                                     |   57 
 drivers/gpu/drm/radeon/radeon_connectors.c                         |    4 
 drivers/gpu/host1x/Kconfig                                         |    5 
 drivers/gpu/host1x/Makefile                                        |    1 
 drivers/gpu/host1x/context_bus.c                                   |   31 
 include/linux/host1x_context_bus.h                                 |   15 
 include/uapi/drm/amdgpu_drm.h                                      |    8 
 92 files changed, 3667 insertions(+), 2553 deletions(-)

New commits:
commit b41a7e74a1e7b610147fce97d4f8e37594f2c37d
Merge: 02bcefe410c1 404204340c0d
Author: Kevin Brace <kevinbrace at gmx.com>
Date:   Fri Jun 3 16:51:26 2022 -0500

    Merge tag 'drm-next-2022-06-03-1' of git://anongit.freedesktop.org/drm/drm into drm-next-5.19
    
    drm next for 5.19-rc1 (part 2/fixes)
    
    msm:
    - Limiting WB modes to max sspp linewidth
    - Fixing the supported rotations to add 180 back for IGT
    - Fix to handle pm_runtime_get_sync() errors to avoid unclocked access
      in the bind() path for dpu driver
    - Fix the irq_free() without request issue which was a big-time
      hitter in the CI-runs.
    
    amdgpu:
    - Update fdinfo to the common drm format
    - uapi: Add VM_NOALLOC GPUVM attribute to prevent buffers for going into the MALL
      Add AMDGPU_GEM_CREATE_DISCARDABLE flag to create buffers that can be discarded on eviction
      Mesa code which uses these: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466
    - Link training fixes
    - DPIA fixes
    - Misc code cleanups
    - Aux fixes
    - Hotplug fixes
    - More FP clean up
    - Misc GFX9/10 fixes
    - Fix a possible memory leak in SMU shutdown
    - SMU 13 updates
    - RAS fixes
    - TMZ fixes
    - GC 11 updates
    - SMU 11 metrics fixes
    - Fix coverage blend mode for overlay plane
    - Note DDR vs LPDDR memory
    - Fuzz fix for CS IOCTL
    - Add new PCI DID
    
    amdkfd:
    - Clean up hive setup
    - Misc fixes
    
    tegra:
    - add some prelim 5.20 work to avoid inter-tree mess

commit 404204340c0dc54e4b779737201044dcb231f0bf
Merge: b8042ff4faa5 597b89d30b42
Author: Dave Airlie <airlied at redhat.com>
Date:   Fri Jun 3 11:35:46 2022 +1000

    Merge tag 'drm/tegra/for-5.19-prep-work' of https://gitlab.freedesktop.org/drm/tegra into drm-next
    
    drm/tegra: Preparatory work for v5.19
    
    This contains a single patch from a series that's ready to go for v5.10
    but is also a shared build-time dependency for an IOMMU series that is
    planned for v5.20. The idea is to take this into v5.19 to fulfill that
    dependency and remove the need for close coordination for the two
    series.
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>
    
    From: Thierry Reding <thierry.reding at gmail.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20220601100335.3841301-1-thierry.reding@gmail.com

commit b8042ff4faa59ed3608beeeddc5d87ce55c62a98
Merge: bf23729c7a5f b9364eed9232
Author: Dave Airlie <airlied at redhat.com>
Date:   Fri Jun 3 11:19:11 2022 +1000

    Merge tag 'msm-next-5.19-fixes-06-01' of https://gitlab.freedesktop.org/abhinavk/msm into drm-next
    
    5.19 fixes for msm-next
    
    - Fix to add minimum ICC vote in the msm_mdss pm_resume path to address
       bootup splats
    - Fix to avoid dereferencing without checking in WB encoder
    - Fix to avoid crash during suspend in DP driver by ensuring interrupt
       mask bits are updated
    - Remove unused code from dpu_encoder_virt_atomic_check()
    - Fix to remove redundant init of dsc variable
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>
    
    From: Abhinav Kumar <quic_abhinavk at quicinc.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/927b201e-a734-a29d-b9fb-b9889e1f7795@quicinc.com

commit b9364eed9232f3d2a846f68c2307eb25c93cc2d0
Author: Douglas Anderson <dianders at chromium.org>
Date:   Tue May 31 16:01:26 2022 -0700

    drm/msm/dpu: Move min BW request and full BW disable back to mdss
    
    In commit a670ff578f1f ("drm/msm/dpu: always use mdp device to scale
    bandwidth") we fully moved interconnect stuff to the DPU driver. This
    had no change for sc7180 but _did_ have an impact for other SoCs. It
    made them match the sc7180 scheme.
    
    Unfortunately, the sc7180 scheme seems like it was a bit broken.
    Specifically the interconnect needs to be on for more than just the
    DPU driver's AXI bus. In the very least it also needs to be on for the
    DSI driver's AXI bus. This can be seen fairly easily by doing this on
    a ChromeOS sc7180-trogdor class device:
    
      set_power_policy --ac_screen_dim_delay=5 --ac_screen_off_delay=10
      sleep 10
      cd /sys/bus/platform/devices/ae94000.dsi/power
      echo on > control
    
    When you do that, you'll get a warning splat in the logs about
    "gcc_disp_hf_axi_clk status stuck at 'off'".
    
    One could argue that perhaps what I have done above is "illegal" and
    that it can't happen naturally in the system because in normal system
    usage the DPU is pretty much always on when DSI is on. That being
    said:
    * In official ChromeOS builds (admittedly a 5.4 kernel with backports)
      we have seen that splat at bootup.
    * Even though we don't use "autosuspend" for these components, we
      don't use the "put_sync" variants. Thus plausibly the DSI could stay
      "runtime enabled" past when the DPU is enabled. Techncially we
      shouldn't do that if the DPU's suspend ends up yanking our clock.
    
    Let's change things such that the "bare minimum" request for the
    interconnect happens in the mdss driver again. That means that all of
    the children can assume that the interconnect is on at the minimum
    bandwidth. We'll then let the DPU request the higher amount that it
    wants.
    
    It should be noted that this isn't as hacky of a solution as it might
    initially appear. Specifically:
    * Since MDSS and DPU individually get their own references to the
      interconnect then the framework will actually handle aggregating
      them. The two drivers are _not_ clobbering each other.
    * When the Qualcomm interconnect driver aggregates it takes the max of
      all the peaks. Thus having MDSS request a peak, as we're doing here,
      won't actually change the total interconnect bandwidth (it won't be
      added to the request for the DPU). This perhaps explains why the
      "average" requested in MDSS was historically 0 since that one
      _would_ be added in.
    
    NOTE also that in the downstream ChromeOS 5.4 and 5.15 kernels, we're
    also seeing some RPMH hangs that are addressed by this fix. These
    hangs are showing up in the field and on _some_ devices with enough
    stress testing of suspend/resume. Specifically right at suspend time
    with a stack crawl that looks like this (from chromeos-5.15 tree):
      rpmh_write_batch+0x19c/0x240
      qcom_icc_bcm_voter_commit+0x210/0x420
      qcom_icc_set+0x28/0x38
      apply_constraints+0x70/0xa4
      icc_set_bw+0x150/0x24c
      dpu_runtime_resume+0x50/0x1c4
      pm_generic_runtime_resume+0x30/0x44
      __genpd_runtime_resume+0x68/0x7c
      genpd_runtime_resume+0x12c/0x20c
      __rpm_callback+0x98/0x138
      rpm_callback+0x30/0x88
      rpm_resume+0x370/0x4a0
      __pm_runtime_resume+0x80/0xb0
      dpu_kms_enable_commit+0x24/0x30
      msm_atomic_commit_tail+0x12c/0x630
      commit_tail+0xac/0x150
      drm_atomic_helper_commit+0x114/0x11c
      drm_atomic_commit+0x68/0x78
      drm_atomic_helper_disable_all+0x158/0x1c8
      drm_atomic_helper_suspend+0xc0/0x1c0
      drm_mode_config_helper_suspend+0x2c/0x60
      msm_pm_prepare+0x2c/0x40
      pm_generic_prepare+0x30/0x44
      genpd_prepare+0x80/0xd0
      device_prepare+0x78/0x17c
      dpm_prepare+0xb0/0x384
      dpm_suspend_start+0x34/0xc0
    
    We don't completely understand all the mechanisms in play, but the
    hang seemed to come and go with random factors. It's not terribly
    surprising that the hang is gone after this patch since the line of
    code that was failing is no longer present in the kernel.
    
    Fixes: a670ff578f1f ("drm/msm/dpu: always use mdp device to scale bandwidth")
    Fixes: c33b7c0389e1 ("drm/msm/dpu: add support for clk and bw scaling for display")
    Signed-off-by: Douglas Anderson <dianders at chromium.org>
    Reviewed-by: Abhinav Kumar <quic_abhinavk at quicinc.com>
    Tested-by: Jessica Zhang <quic_jesszhan at quicinc.com> # RB3 (sdm845) and
    Reviewed-by: Stephen Boyd <swboyd at chromium.org>
    Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov at linaro.org>
    Patchwork: https://patchwork.freedesktop.org/patch/487884/
    Link: https://lore.kernel.org/r/20220531160059.v2.1.Ie7f6d4bf8cce28131da31a43354727e417cae98d@changeid
    Signed-off-by: Abhinav Kumar <quic_abhinavk at quicinc.com>

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index bce47647d891..e23e2552e802 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -49,8 +49,6 @@
 #define DPU_DEBUGFS_DIR "msm_dpu"
 #define DPU_DEBUGFS_HWMASKNAME "hw_log_mask"
 
-#define MIN_IB_BW	400000000ULL /* Min ib vote 400MB */
-
 static int dpu_kms_hw_init(struct msm_kms *kms);
 static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms);
 
@@ -1305,15 +1303,9 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev)
 	struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms);
 	struct drm_encoder *encoder;
 	struct drm_device *ddev;
-	int i;
 
 	ddev = dpu_kms->dev;
 
-	WARN_ON(!(dpu_kms->num_paths));
-	/* Min vote of BW is required before turning on AXI clk */
-	for (i = 0; i < dpu_kms->num_paths; i++)
-		icc_set_bw(dpu_kms->path[i], 0, Bps_to_icc(MIN_IB_BW));
-
 	rc = clk_bulk_prepare_enable(dpu_kms->num_clocks, dpu_kms->clocks);
 	if (rc) {
 		DPU_ERROR("clock enable failed rc:%d\n", rc);
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 0454a571adf7..e13c5c12b775 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -5,6 +5,7 @@
 
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/interconnect.h>
 #include <linux/irq.h>
 #include <linux/irqchip.h>
 #include <linux/irqdesc.h>
@@ -25,6 +26,8 @@
 #define UBWC_CTRL_2			0x150
 #define UBWC_PREDICTION_MODE		0x154
 
+#define MIN_IB_BW	400000000UL /* Min ib vote 400MB */
+
 struct msm_mdss {
 	struct device *dev;
 
@@ -36,8 +39,47 @@ struct msm_mdss {
 		unsigned long enabled_mask;
 		struct irq_domain *domain;
 	} irq_controller;
+	struct icc_path *path[2];
+	u32 num_paths;
 };
 
+static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
+					    struct msm_mdss *msm_mdss)
+{
+	struct icc_path *path0 = of_icc_get(dev, "mdp0-mem");
+	struct icc_path *path1 = of_icc_get(dev, "mdp1-mem");
+
+	if (IS_ERR_OR_NULL(path0))
+		return PTR_ERR_OR_ZERO(path0);
+
+	msm_mdss->path[0] = path0;
+	msm_mdss->num_paths = 1;
+
+	if (!IS_ERR_OR_NULL(path1)) {
+		msm_mdss->path[1] = path1;
+		msm_mdss->num_paths++;
+	}
+
+	return 0;
+}
+
+static void msm_mdss_put_icc_path(void *data)
+{
+	struct msm_mdss *msm_mdss = data;
+	int i;
+
+	for (i = 0; i < msm_mdss->num_paths; i++)
+		icc_put(msm_mdss->path[i]);
+}
+
+static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw)
+{
+	int i;
+
+	for (i = 0; i < msm_mdss->num_paths; i++)
+		icc_set_bw(msm_mdss->path[i], 0, Bps_to_icc(bw));
+}
+
 static void msm_mdss_irq(struct irq_desc *desc)
 {
 	struct msm_mdss *msm_mdss = irq_desc_get_handler_data(desc);
@@ -136,6 +178,13 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
 {
 	int ret;
 
+	/*
+	 * Several components have AXI clocks that can only be turned on if
+	 * the interconnect is enabled (non-zero bandwidth). Let's make sure
+	 * that the interconnects are at least at a minimum amount.
+	 */
+	msm_mdss_icc_request_bw(msm_mdss, MIN_IB_BW);
+
 	ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks);
 	if (ret) {
 		dev_err(msm_mdss->dev, "clock enable failed, ret:%d\n", ret);
@@ -178,6 +227,7 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
 static int msm_mdss_disable(struct msm_mdss *msm_mdss)
 {
 	clk_bulk_disable_unprepare(msm_mdss->num_clocks, msm_mdss->clocks);
+	msm_mdss_icc_request_bw(msm_mdss, 0);
 
 	return 0;
 }
@@ -271,6 +321,13 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5
 
 	dev_dbg(&pdev->dev, "mapped mdss address space @%pK\n", msm_mdss->mmio);
 
+	ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss);
+	if (ret)
+		return ERR_PTR(ret);
+	ret = devm_add_action_or_reset(&pdev->dev, msm_mdss_put_icc_path, msm_mdss);
+	if (ret)
+		return ERR_PTR(ret);
+
 	if (is_mdp5)
 		ret = mdp5_mdss_parse_clock(pdev, &msm_mdss->clocks);
 	else
commit 8caad14e722477807e9324fe0e85dd10151fb134
Author: Haowen Bai <baihaowen at meizu.com>
Date:   Mon May 30 10:19:55 2022 +0800

    drm/msm/dpu: Fix pointer dereferenced before checking
    
    The phys_enc->wb_idx is dereferencing before null checking, so move
    it after checking.
    
    Signed-off-by: Haowen Bai <baihaowen at meizu.com>
    Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov at linaro.org>
    Fixes: d7d0e73f7de33 ("drm/msm/dpu: introduce the dpu_encoder_phys_* for
    Reviewed-by: Abhinav Kumar <quic_abhinavk at quicinc.com>
    Patchwork: https://patchwork.freedesktop.org/patch/487606/
    Link: https://lore.kernel.org/r/1653877196-23114-1-git-send-email-baihaowen@meizu.com
    Signed-off-by: Abhinav Kumar <quic_abhinavk at quicinc.com>

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index 4829d1ce0cf8..59da348ff339 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -574,11 +574,11 @@ static void dpu_encoder_phys_wb_disable(struct dpu_encoder_phys *phys_enc)
  */
 static void dpu_encoder_phys_wb_destroy(struct dpu_encoder_phys *phys_enc)
 {
-	DPU_DEBUG("[wb:%d]\n", phys_enc->wb_idx - WB_0);
-
 	if (!phys_enc)
 		return;
 
+	DPU_DEBUG("[wb:%d]\n", phys_enc->wb_idx - WB_0);
+
 	kfree(phys_enc);
 }
 
commit fb0af2daaa04d69b9e66613b8721d23d41f387f6
Author: Jiapeng Chong <jiapeng.chong at linux.alibaba.com>
Date:   Tue May 24 16:14:13 2022 +0800

    drm/msm/dpu: Remove unused code
    
    Eliminate the follow clang warning:
    
    drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c:544:33: warning: variable
    ‘mode’ set but not used [-Wunused-but-set-variable].
    
    Reported-by: Abaci Robot <abaci at linux.alibaba.com>
    Signed-off-by: Jiapeng Chong <jiapeng.chong at linux.alibaba.com>
    Fixes: 3177589c6e93("drm/msm/dpu: encoder: drop unused mode_fixup callback")
    Reviewed-by: Abhinav Kumar <quic_abhinavk at quicinc.com>
    Patchwork: https://patchwork.freedesktop.org/patch/487136/
    Link: https://lore.kernel.org/r/20220524081413.37895-1-jiapeng.chong@linux.alibaba.com
    Signed-off-by: Abhinav Kumar <quic_abhinavk at quicinc.com>

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 8da01d862b83..3a462e327e0e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -541,7 +541,6 @@ static int dpu_encoder_virt_atomic_check(
 	struct dpu_encoder_virt *dpu_enc;
 	struct msm_drm_private *priv;
 	struct dpu_kms *dpu_kms;
-	const struct drm_display_mode *mode;
 	struct drm_display_mode *adj_mode;
 	struct msm_display_topology topology;
 	struct dpu_global_state *global_state;
@@ -559,7 +558,6 @@ static int dpu_encoder_virt_atomic_check(
 
 	priv = drm_enc->dev->dev_private;
 	dpu_kms = to_dpu_kms(priv->kms);
-	mode = &crtc_state->mode;
 	adj_mode = &crtc_state->adjusted_mode;
 	global_state = dpu_kms_get_global_state(crtc_state->state);
 	if (IS_ERR(global_state))
commit 6daf7e4aa91e05b3e138805613686fb5c46d8196
Author: Vinod Koul <vkoul at kernel.org>
Date:   Wed May 25 13:09:12 2022 +0530

    drm/msm/disp/dpu1: remove superfluous init
    
    Commit 58dca9810749 ("drm/msm/disp/dpu1: Add support for DSC in
    encoder") added dsc_common_mode variable which was set to zero but then
    again programmed, so drop the superfluous init.
    
    Fixes: 58dca9810749 ("drm/msm/disp/dpu1: Add support for DSC in encoder")
    Reported-by: kernel test robot <yujie.liu at intel.com>
    Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov at linaro.org>
    Reviewed-by: Abhinav Kumar <quic_abhinavk at quicinc.com>
    Signed-off-by: Vinod Koul <vkoul at kernel.org>
    Patchwork: https://patchwork.freedesktop.org/patch/487208/
    Link: https://lore.kernel.org/r/20220525073912.2706505-1-vkoul@kernel.org
    Signed-off-by: Abhinav Kumar <quic_abhinavk at quicinc.com>

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 52516eb20cb8..8da01d862b83 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -1814,7 +1814,6 @@ static void dpu_encoder_prep_dsc(struct dpu_encoder_virt *dpu_enc,
 		}
 	}
 
-	dsc_common_mode = 0;
 	pic_width = dsc->drm->pic_width;
 
 	dsc_common_mode = DSC_MODE_MULTIPLEX | DSC_MODE_SPLIT_PANEL;
commit 993a2adc6e2e94a0a7b5bfc054eda90ac95f62c3
Author: Kuogee Hsieh <quic_khsieh at quicinc.com>
Date:   Tue May 17 09:21:34 2022 -0700

    drm/msm/dp: Always clear mask bits to disable interrupts at dp_ctrl_reset_irq_ctrl()
    
    dp_catalog_ctrl_reset() will software reset DP controller. But it will
    not reset programmable registers to default value. DP driver still have
    to clear mask bits to interrupt status registers to disable interrupts
    after software reset of controller.
    
    At current implementation, dp_ctrl_reset_irq_ctrl() will software reset dp
    controller but did not call dp_catalog_ctrl_enable_irq(false) to clear hpd
    related interrupt mask bits to disable hpd related interrupts due to it
    mistakenly think hpd related interrupt mask bits will be cleared by software
    reset of dp controller automatically. This mistake may cause system to crash
    during suspending procedure due to unexpected irq fired and trigger event
    thread to access dp controller registers with controller clocks are disabled.
    
    This patch fixes system crash during suspending problem by removing "enable"
    flag condition checking at dp_ctrl_reset_irq_ctrl() so that hpd related
    interrupt mask bits are cleared to prevent unexpected from happening.
    
    Changes in v2:
    -- add more details commit text
    
    Changes in v3:
    -- add synchrons_irq()
    -- add atomic_t suspended
    
    Changes in v4:
    -- correct Fixes's commit ID
    -- remove synchrons_irq()
    
    Changes in v5:
    -- revise commit text
    
    Changes in v6:
    -- add event_lock to protect "suspended"
    
    Changes in v7:
    -- delete "suspended" flag
    
    Fixes: 989ebe7bc446 ("drm/msm/dp: do not initialize phy until plugin interrupt received")
    Signed-off-by: Kuogee Hsieh <quic_khsieh at quicinc.com>
    Reviewed-by: Stephen Boyd <swboyd at chromium.org>
    Patchwork: https://patchwork.freedesktop.org/patch/486591/
    Link: https://lore.kernel.org/r/1652804494-19650-1-git-send-email-quic_khsieh@quicinc.com
    Signed-off-by: Abhinav Kumar <quic_abhinavk at quicinc.com>

diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index af7a80c63610..f3e333eff32d 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -1389,8 +1389,13 @@ void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable)
 
 	dp_catalog_ctrl_reset(ctrl->catalog);
 
-	if (enable)
-		dp_catalog_ctrl_enable_irq(ctrl->catalog, enable);
+	/*
+	 * all dp controller programmable registers will not
+	 * be reset to default value after DP_SW_RESET
+	 * therefore interrupt mask bits have to be updated
+	 * to enable/disable interrupts
+	 */
+	dp_catalog_ctrl_enable_irq(ctrl->catalog, enable);
 }
 
 void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl)
commit 597b89d30b42dcc8e6b262e6876b42dde66f97f0
Author: Mikko Perttunen <mperttunen at nvidia.com>
Date:   Mon May 16 11:52:51 2022 +0300

    gpu: host1x: Add context bus
    
    The context bus is a "dummy" bus that contains struct devices that
    correspond to IOMMU contexts assigned through Host1x to processes.
    
    Even when host1x itself is built as a module, the bus is registered
    in built-in code so that the built-in ARM SMMU driver is able to
    reference it.
    
    Signed-off-by: Mikko Perttunen <mperttunen at nvidia.com>
    Signed-off-by: Thierry Reding <treding at nvidia.com>

diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index 835c88318cec..8997f0096545 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -2,7 +2,6 @@
 # drm/tegra depends on host1x, so if both drivers are built-in care must be
 # taken to initialize them in the correct order. Link order is the only way
 # to ensure this currently.
-obj-$(CONFIG_TEGRA_HOST1X)	+= host1x/
-obj-y			+= drm/ vga/
+obj-y			+= host1x/ drm/ vga/
 obj-$(CONFIG_IMX_IPUV3_CORE)	+= ipu-v3/
 obj-$(CONFIG_TRACE_GPU_MEM)		+= trace/
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
index 6815b4db17c1..1861a8180d3f 100644
--- a/drivers/gpu/host1x/Kconfig
+++ b/drivers/gpu/host1x/Kconfig
@@ -1,8 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
+
+config TEGRA_HOST1X_CONTEXT_BUS
+	bool
+
 config TEGRA_HOST1X
 	tristate "NVIDIA Tegra host1x driver"
 	depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
 	select DMA_SHARED_BUFFER
+	select TEGRA_HOST1X_CONTEXT_BUS
 	select IOMMU_IOVA
 	help
 	  Driver for the NVIDIA Tegra host1x hardware.
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index d2b6f7de0498..c891a3e33844 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -18,3 +18,4 @@ host1x-y = \
 	hw/host1x07.o
 
 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
+obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o
diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c
new file mode 100644
index 000000000000..b0d35b2bbe89
--- /dev/null
+++ b/drivers/gpu/host1x/context_bus.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/of.h>
+
+struct bus_type host1x_context_device_bus_type = {
+	.name = "host1x-context",
+};
+EXPORT_SYMBOL_GPL(host1x_context_device_bus_type);
+
+static int __init host1x_context_device_bus_init(void)
+{
+	int err;
+
+	if (!of_machine_is_compatible("nvidia,tegra186") &&
+	    !of_machine_is_compatible("nvidia,tegra194") &&
+	    !of_machine_is_compatible("nvidia,tegra234"))
+		return 0;
+
+	err = bus_register(&host1x_context_device_bus_type);
+	if (err < 0) {
+		pr_err("bus type registration failed: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+postcore_initcall(host1x_context_device_bus_init);
diff --git a/include/linux/host1x_context_bus.h b/include/linux/host1x_context_bus.h
new file mode 100644
index 000000000000..72462737a6db
--- /dev/null
+++ b/include/linux/host1x_context_bus.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
+ */
+
+#ifndef __LINUX_HOST1X_CONTEXT_BUS_H
+#define __LINUX_HOST1X_CONTEXT_BUS_H
+
+#include <linux/device.h>
+
+#ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
+extern struct bus_type host1x_context_device_bus_type;
+#endif
+
+#endif
commit bf23729c7a5f44f0e863666b9364a64741fd3241
Merge: e573a3cd56d1 9bdc1992c925
Author: Dave Airlie <airlied at redhat.com>
Date:   Wed Jun 1 10:56:11 2022 +1000

    Merge tag 'amd-drm-next-5.19-2022-05-26-2' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
    
    amd-drm-next-5.19-2022-05-26-2:
    
    amdgpu:
    - Update fdinfo to the common drm format
    
    UAPI:
    - Add VM_NOALLOC GPUVM attribute to prevent buffers for going into the MALL
      Add AMDGPU_GEM_CREATE_DISCARDABLE flag to create buffers that can be discarded on eviction
      Mesa code which uses these: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>
    From: Alex Deucher <alexander.deucher at amd.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20220526202546.66860-1-alexander.deucher@amd.com

commit e573a3cd56d10664a89f199606d3c827fd744fd5
Merge: c4955d9cd2fc 62e9bd20035b
Author: Dave Airlie <airlied at redhat.com>
Date:   Wed Jun 1 10:37:20 2022 +1000

    Merge tag 'amd-drm-next-5.19-2022-05-26' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
    
    amd-drm-next-5.19-2022-05-26:
    
    amdgpu:
    - Link training fixes
    - DPIA fixes
    - Misc code cleanups
    - Aux fixes
    - Hotplug fixes
    - More FP clean up
    - Misc GFX9/10 fixes
    - Fix a possible memory leak in SMU shutdown
    - SMU 13 updates
    - RAS fixes
    - TMZ fixes
    - GC 11 updates
    - SMU 11 metrics fixes
    - Fix coverage blend mode for overlay plane
    - Note DDR vs LPDDR memory
    - Fuzz fix for CS IOCTL
    - Add new PCI DID
    
    amdkfd:
    - Clean up hive setup
    - Misc fixes
    
    radeon:
    - Fix a possible NULL pointer dereference
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>
    From: Alex Deucher <alexander.deucher at amd.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20220526200641.64097-1-alexander.deucher@amd.com

commit 9bdc1992c925a35c6f7200e8abe54e3f00ce7719
Author: Christian König <christian.koenig at amd.com>
Date:   Tue May 10 18:29:35 2022 +0200

    drm/amdgpu: add drm-client-id to fdinfo v2
    
    This is enough to get gputop working :)
    
    v2: rebase and some addition cleanup
    
    Signed-off-by: Christian König <christian.koenig at amd.com>
    Reviewed-by: Shashank Sharma <shashank.sharma at amd.com> (v1)
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index 4d453845235c..99a7855ab1bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -58,11 +58,11 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
 	struct drm_file *file = f->private_data;
 	struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
 	struct amdgpu_fpriv *fpriv = file->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 
 	uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
 	ktime_t usage[AMDGPU_HW_IP_NUM];
 	uint32_t bus, dev, fn, domain;
-	struct amdgpu_bo *root;
 	unsigned int hw_ip;
 	int ret;
 
@@ -71,14 +71,12 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
 	dev = PCI_SLOT(adev->pdev->devfn);
 	fn = PCI_FUNC(adev->pdev->devfn);
 
-	root = fpriv->vm.root.bo;
-	ret = amdgpu_bo_reserve(root, false);
-	if (ret) {
-		DRM_ERROR("Fail to reserve bo\n");
+	ret = amdgpu_bo_reserve(vm->root.bo, false);
+	if (ret)
 		return;
-	}
-	amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);
-	amdgpu_bo_unreserve(root);
+
+	amdgpu_vm_get_memory(vm, &vram_mem, &gtt_mem, &cpu_mem);
+	amdgpu_bo_unreserve(vm->root.bo);
 
 	amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
 
@@ -91,6 +89,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
 	seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid);
 	seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name);
 	seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
+	seq_printf(m, "drm-client-id:\t%Lu\n", vm->immediate.fence_context);
 	seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL);
 	seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL);
 	seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL);
commit af0b541670090e87996e0894bd0e457edf617541
Author: Christian König <christian.koenig at amd.com>
Date:   Wed May 11 11:06:26 2022 +0200

    drm/amdgpu: Convert to common fdinfo format v5
    
    Convert fdinfo format to one documented in drm-usage-stats.rst.
    
    It turned out that the existing implementation was actually completely
    nonsense. The calculated percentages indeed represented the usage of the
    engine, but with varying time slices.
    
    So 10% usage for application A could mean something completely different
    than 10% usage for application B.
    
    Completely nuke that and just use the now standardized nanosecond
    interface.
    
    v2: drop the documentation change for now, nuke percentage calculation
    v3: only account for each hw_ip, move the time_spend to the ctx mgr.
    v4: move general ctx changes into separate patch, rework the fdinfo to
        ctx_mgr interface so that all usages are calculated at once, drop
        some unecessary and dangerous refcount dance.
    v5: add one more comment how we calculate the time spend
    
    Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
    Signed-off-by: Christian König <christian.koenig at amd.com>
    Reviewed-by: Shashank Sharma <shashank.sharma at amd.com>
    Cc: Daniel Vetter <daniel at ffwll.ch>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a61e4c83a545..7dc92ef36b2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -162,17 +162,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
 	return hw_prio;
 }
 
+/* Calculate the time spend on the hw */
+static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
+{
+	struct drm_sched_fence *s_fence;
+
+	if (!fence)
+		return ns_to_ktime(0);
+
+	/* When the fence is not even scheduled it can't have spend time */
+	s_fence = to_drm_sched_fence(fence);
+	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
+		return ns_to_ktime(0);
+
+	/* When it is still running account how much already spend */
+	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
+		return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
+
+	return ktime_sub(s_fence->finished.timestamp,
+			 s_fence->scheduled.timestamp);
+}
+
+static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
+				      struct amdgpu_ctx_entity *centity)
+{
+	ktime_t res = ns_to_ktime(0);
+	uint32_t i;
+
+	spin_lock(&ctx->ring_lock);
+	for (i = 0; i < amdgpu_sched_jobs; i++) {
+		res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
+	}
+	spin_unlock(&ctx->ring_lock);
+	return res;
+}
 
 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
 				  const u32 ring)
 {
+	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
 	struct amdgpu_device *adev = ctx->mgr->adev;
 	struct amdgpu_ctx_entity *entity;
-	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
-	unsigned num_scheds = 0;
-	int32_t ctx_prio;
-	unsigned int hw_prio;
 	enum drm_sched_priority drm_prio;
+	unsigned int hw_prio, num_scheds;
+	int32_t ctx_prio;
 	int r;
 
 	entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
@@ -182,6 +215,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
 
 	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
 			ctx->init_priority : ctx->override_priority;
+	entity->hw_ip = hw_ip;
 	entity->sequence = 1;
 	hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
 	drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
@@ -220,6 +254,23 @@ error_free_entity:
 	return r;
 }
 
+static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+{
+	ktime_t res = ns_to_ktime(0);
+	int i;
+
+	if (!entity)
+		return res;
+
+	for (i = 0; i < amdgpu_sched_jobs; ++i) {
+		res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
+		dma_fence_put(entity->fences[i]);
+	}
+
+	kfree(entity);
+	return res;
+}
+
 static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
 			   struct drm_file *filp, struct amdgpu_ctx *ctx)
 {
@@ -246,20 +297,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
 	return 0;
 }
 
-static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
-{
-
-	int i;
-
-	if (!entity)
-		return;
-
-	for (i = 0; i < amdgpu_sched_jobs; ++i)
-		dma_fence_put(entity->fences[i]);
-
-	kfree(entity);
-}
-
 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
 					u32 *stable_pstate)
 {
@@ -351,8 +388,10 @@ static void amdgpu_ctx_fini(struct kref *ref)
 
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
-			amdgpu_ctx_fini_entity(ctx->entities[i][j]);
-			ctx->entities[i][j] = NULL;
+			ktime_t spend;
+
+			spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
+			atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
 		}
 	}
 
@@ -689,6 +728,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
 	centity->sequence++;
 	spin_unlock(&ctx->ring_lock);
 
+	atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
+		     &ctx->mgr->time_spend[centity->hw_ip]);
+
 	dma_fence_put(other);
 	return seq;
 }
@@ -795,9 +837,14 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
 			 struct amdgpu_device *adev)
 {
+	unsigned int i;
+
 	mgr->adev = adev;
 	mutex_init(&mgr->lock);
 	idr_init(&mgr->ctx_handles);
+
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		atomic64_set(&mgr->time_spend[i], 0);
 }
 
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
@@ -873,80 +920,38 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 	mutex_destroy(&mgr->lock);
 }
 
-static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
-		struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
-{
-	ktime_t now, t1;
-	uint32_t i;
-
-	*total = *max = 0;
-
-	now = ktime_get();
-	for (i = 0; i < amdgpu_sched_jobs; i++) {
-		struct dma_fence *fence;
-		struct drm_sched_fence *s_fence;
-
-		spin_lock(&ctx->ring_lock);
-		fence = dma_fence_get(centity->fences[i]);
-		spin_unlock(&ctx->ring_lock);
-		if (!fence)
-			continue;
-		s_fence = to_drm_sched_fence(fence);
-		if (!dma_fence_is_signaled(&s_fence->scheduled)) {
-			dma_fence_put(fence);
-			continue;
-		}
-		t1 = s_fence->scheduled.timestamp;
-		if (!ktime_before(t1, now)) {
-			dma_fence_put(fence);
-			continue;
-		}
-		if (dma_fence_is_signaled(&s_fence->finished) &&
-			s_fence->finished.timestamp < now)
-			*total += ktime_sub(s_fence->finished.timestamp, t1);
-		else
-			*total += ktime_sub(now, t1);
-		t1 = ktime_sub(now, t1);
-		dma_fence_put(fence);
-		*max = max(t1, *max);
-	}
-}
-
-ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
-		uint32_t idx, uint64_t *elapsed)
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+			  ktime_t usage[AMDGPU_HW_IP_NUM])
 {
-	struct idr *idp;
 	struct amdgpu_ctx *ctx;
+	unsigned int hw_ip, i;
 	uint32_t id;
-	struct amdgpu_ctx_entity *centity;
-	ktime_t total = 0, max = 0;
 
-	if (idx >= AMDGPU_MAX_ENTITY_NUM)
-		return 0;
-	idp = &mgr->ctx_handles;
+	/*
+	 * This is a little bit racy because it can be that a ctx or a fence are
+	 * destroyed just in the moment we try to account them. But that is ok
+	 * since exactly that case is explicitely allowed by the interface.
+	 */
 	mutex_lock(&mgr->lock);
-	idr_for_each_entry(idp, ctx, id) {
-		ktime_t ttotal, tmax;
+	for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+		uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
 
-		if (!ctx->entities[hwip][idx])
-			continue;
+		usage[hw_ip] = ns_to_ktime(ns);
+	}
 
-		centity = ctx->entities[hwip][idx];
-		amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
+	idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
+		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+			for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
+				struct amdgpu_ctx_entity *centity;
+				ktime_t spend;
 
-		/* Harmonic mean approximation diverges for very small
-		 * values. If ratio < 0.01% ignore
-		 */
-		if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
-			continue;
-
-		total = ktime_add(total, ttotal);
-		max = ktime_after(tmax, max) ? tmax : max;
+				centity = ctx->entities[hw_ip][i];
+				if (!centity)
+					continue;
+				spend = amdgpu_ctx_entity_time(ctx, centity);
+				usage[hw_ip] = ktime_add(usage[hw_ip], spend);
+			}
+		}
 	}
-
 	mutex_unlock(&mgr->lock);
-	if (elapsed)
-		*elapsed = max;
-
-	return total;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 681050bc828c..cc7c8afff414 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -23,16 +23,20 @@
 #ifndef __AMDGPU_CTX_H__
 #define __AMDGPU_CTX_H__
 
+#include <linux/ktime.h>
+#include <linux/types.h>
+
 #include "amdgpu_ring.h"
 
 struct drm_device;
 struct drm_file;
 struct amdgpu_fpriv;
+struct amdgpu_ctx_mgr;
 
 #define AMDGPU_MAX_ENTITY_NUM 4
-#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total))
 
 struct amdgpu_ctx_entity {
+	uint32_t		hw_ip;
 	uint64_t		sequence;
 	struct drm_sched_entity	entity;
 	struct dma_fence	*fences[];
@@ -61,6 +65,7 @@ struct amdgpu_ctx_mgr {
 	struct mutex		lock;
 	/* protected by lock */
 	struct idr		ctx_handles;
+	atomic64_t		time_spend[AMDGPU_HW_IP_NUM];
 };
 
 extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
@@ -90,6 +95,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
-ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
-		uint32_t idx, uint64_t *elapsed);
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+			  ktime_t usage[AMDGPU_HW_IP_NUM]);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index 5a6857c44bb6..4d453845235c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -32,6 +32,7 @@
 
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
 
 #include "amdgpu.h"
 #include "amdgpu_vm.h"
@@ -54,26 +55,23 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
 
 void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
 {
-	struct amdgpu_fpriv *fpriv;
-	uint32_t bus, dev, fn, i, domain;
-	uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
 	struct drm_file *file = f->private_data;
 	struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
+	struct amdgpu_fpriv *fpriv = file->driver_priv;
+
+	uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
+	ktime_t usage[AMDGPU_HW_IP_NUM];
+	uint32_t bus, dev, fn, domain;
 	struct amdgpu_bo *root;
+	unsigned int hw_ip;
 	int ret;
 
-	ret = amdgpu_file_to_fpriv(f, &fpriv);
-	if (ret)
-		return;
 	bus = adev->pdev->bus->number;
 	domain = pci_domain_nr(adev->pdev->bus);
 	dev = PCI_SLOT(adev->pdev->devfn);
 	fn = PCI_FUNC(adev->pdev->devfn);
 
-	root = amdgpu_bo_ref(fpriv->vm.root.bo);
-	if (!root)
-		return;
-
+	root = fpriv->vm.root.bo;
 	ret = amdgpu_bo_reserve(root, false);
 	if (ret) {
 		DRM_ERROR("Fail to reserve bo\n");
@@ -81,31 +79,26 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
 	}
 	amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);
 	amdgpu_bo_unreserve(root);
-	amdgpu_bo_unref(&root);
 
-	seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
-			dev, fn, fpriv->vm.pasid);
-	seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
-	seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL);
-	seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL);
-	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
-		uint32_t count = amdgpu_ctx_num_entities[i];
-		int idx = 0;
-		uint64_t total = 0, min = 0;
-		uint32_t perc, frac;
+	amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
 
-		for (idx = 0; idx < count; idx++) {
-			total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr,
-				i, idx, &min);
-			if ((total == 0) || (min == 0))
-				continue;
+	/*
+	 * ******************************************************************
+	 * For text output format description please see drm-usage-stats.rst!
+	 * ******************************************************************
+	 */
 
-			perc = div64_u64(10000 * total, min);
-			frac = perc % 100;
+	seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid);
+	seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name);
+	seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
+	seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL);
+	seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL);
+	seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL);
+	for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+		if (!usage[hw_ip])
+			continue;
 
-			seq_printf(m, "%s%d:\t%d.%d%%\n",
-					amdgpu_ip_name[i],
-					idx, perc/100, frac);
-		}
+		seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip],
+			   ktime_to_ns(usage[hw_ip]));
 	}
 }
commit 08cffb3eb731fefd0dea12424cedbfa63c356ee0
Author: Christian König <christian.koenig at amd.com>
Date:   Fri May 6 13:21:28 2022 +0200

    drm/amdgpu: bump minor version number
    
    Increase the minor version number to indicate that the new flags are
    available.
    
    Signed-off-by: Christian König <christian.koenig at amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
    Reviewed-by: Marek Olšák <marek.olsak at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index afabdbbb22c6..8890300766a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -99,10 +99,11 @@
  * - 3.43.0 - Add device hot plug/unplug support
  * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
  * - 3.45.0 - Add context ioctl stable pstate interface
- * * 3.46.0 - To enable hot plug amdgpu tests in libdrm
+ * - 3.46.0 - To enable hot plug amdgpu tests in libdrm
+ * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	46
+#define KMS_DRIVER_MINOR	47
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit;
commit b6c65a2c92aa880e8050a91ca83288b85fc32575
Author: Christian König <christian.koenig at amd.com>
Date:   Fri May 6 13:11:41 2022 +0200

    drm/amdgpu: add AMDGPU_VM_NOALLOC v2
    
    Add the AMDGPU_VM_NOALLOC flag to let userspace control MALL allocation.
    
    v2: also add the flag to the allowed flags.
    
    Signed-off-by: Christian König <christian.koenig at amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher at amd.com> (v1)
    Reviewed-by: Marek Olšák <marek.olsak at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 56f4c9aa87c6..8ef31d687ef3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -645,6 +645,8 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
 		pte_flag |= AMDGPU_PTE_WRITEABLE;
 	if (flags & AMDGPU_VM_PAGE_PRT)
 		pte_flag |= AMDGPU_PTE_PRT;
+	if (flags & AMDGPU_VM_PAGE_NOALLOC)
+		pte_flag |= AMDGPU_PTE_NOALLOC;
 
 	if (adev->gmc.gmc_funcs->map_mtype)
 		pte_flag |= amdgpu_gmc_map_mtype(adev,
@@ -658,7 +660,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 {
 	const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
 		AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
-		AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK;
+		AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
+		AMDGPU_VM_PAGE_NOALLOC;
 	const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
 		AMDGPU_VM_PAGE_PRT;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index b8c79789e1e4..9077dfccaf3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -613,6 +613,9 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
 	*flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
 	*flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
 
+	*flags &= ~AMDGPU_PTE_NOALLOC;
+	*flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
+
 	if (mapping->flags & AMDGPU_PTE_PRT) {
 		*flags |= AMDGPU_PTE_PRT;
 		*flags |= AMDGPU_PTE_SNOOPED;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 477f67d9b07c..a0c0b7d9f444 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -500,6 +500,9 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
 	*flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
 	*flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
 
+	*flags &= ~AMDGPU_PTE_NOALLOC;
+	*flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
+
 	if (mapping->flags & AMDGPU_PTE_PRT) {
 		*flags |= AMDGPU_PTE_PRT;
 		*flags |= AMDGPU_PTE_SNOOPED;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index a81bef5cfeaa..18d3246d636e 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -533,6 +533,8 @@ struct drm_amdgpu_gem_op {
 #define AMDGPU_VM_MTYPE_UC		(4 << 5)
 /* Use Read Write MTYPE instead of default MTYPE */
 #define AMDGPU_VM_MTYPE_RW		(5 << 5)
+/* don't allocate MALL */
+#define AMDGPU_VM_PAGE_NOALLOC		(1 << 9)
 
 struct drm_amdgpu_gem_va {
 	/** GEM object handle */
commit fab2cc8335839867a3db38f195441b9c7c6460f6
Author: Christian König <christian.koenig at amd.com>
Date:   Fri May 6 13:08:34 2022 +0200

    drm/amdgpu: add AMDGPU_GEM_CREATE_DISCARDABLE
    
    Add a AMDGPU_GEM_CREATE_DISCARDABLE flag to note that the content of a BO
    doesn't needs to be preserved during eviction.
    
    KFD was already using a similar functionality for SVM BOs so replace the
    internal flag with the new UAPI.
    
    Signed-off-by: Christian König <christian.koenig at amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
    Reviewed-by: Marek Olšák <marek.olsak at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 652571267077..56f4c9aa87c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -296,8 +296,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
 		      AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
 		      AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
-		      AMDGPU_GEM_CREATE_ENCRYPTED))
-
+		      AMDGPU_GEM_CREATE_ENCRYPTED |
+		      AMDGPU_GEM_CREATE_DISCARDABLE))
 		return -EINVAL;
 
 	/* reject invalid gem domains */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e1e6441c475f..2c82b1d5a0d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -567,6 +567,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 		bp->domain;
 	bo->allowed_domains = bo->preferred_domains;
 	if (bp->type != ttm_bo_type_kernel &&
+	    !(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE) &&
 	    bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 4c9cbdc66995..147b79c10cbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -41,7 +41,6 @@
 
 /* BO flag to indicate a KFD userptr BO */
 #define AMDGPU_AMDKFD_CREATE_USERPTR_BO	(1ULL << 63)
-#define AMDGPU_AMDKFD_CREATE_SVM_BO	(1ULL << 62)
 
 #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
 #define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ec26edd4f4d8..be6f76a30ac6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -117,7 +117,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	}
 
 	abo = ttm_to_amdgpu_bo(bo);
-	if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
+	if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
 		placement->num_placement = 0;
 		placement->num_busy_placement = 0;
 		return;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 29e9ebf6d8d5..2ebf0132c25b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -531,7 +531,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
 	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
 	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
 	bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
-	bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO;
+	bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
 	bp.type = ttm_bo_type_device;
 	bp.resv = NULL;
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index d9d475d65c76..a81bef5cfeaa 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -140,6 +140,10 @@ extern "C" {
  * not require GTT memory accounting
  */
 #define AMDGPU_GEM_CREATE_PREEMPTIBLE		(1 << 11)
+/* Flag that BO can be discarded under memory pressure without keeping the
+ * content.
+ */
+#define AMDGPU_GEM_CREATE_DISCARDABLE		(1 << 12)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
commit 62e9bd20035b53ff6c679499c08546d96c6c60a7
Author: Alex Deucher <alexander.deucher at amd.com>
Date:   Tue May 24 23:23:59 2022 -0400

    drm/amdgpu: add beige goby PCI ID
    
    Add a beige goby PCI ID.
    
    Reviewed-by: Guchun Chen <guchun.chen at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
    Cc: stable at vger.kernel.org

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8592d43a79b0..afabdbbb22c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1940,6 +1940,7 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
 	{0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
 	{0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
 	{0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
 
 	{ PCI_DEVICE(0x1002, PCI_ANY_ID),
commit 39dbde650f9377f97ad985bfa16af93381766232
Author: Lijo Lazar <lijo.lazar at amd.com>
Date:   Tue May 24 17:35:55 2022 +0530

    drm/amd/pm: Return auto perf level, if unsupported
    
    When powerplay is not enabled, return AUTO as default level.
    
    Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 5472f9936feb..d1bf073adf54 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -770,6 +770,9 @@ enum amd_dpm_forced_level amdgpu_dpm_get_performance_level(struct amdgpu_device
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 	enum amd_dpm_forced_level level;
 
+	if (!pp_funcs)
+		return AMD_DPM_FORCED_LEVEL_AUTO;
+
 	mutex_lock(&adev->pm.mutex);
 	if (pp_funcs->get_performance_level)
 		level = pp_funcs->get_performance_level(adev->powerplay.pp_handle);
commit 6bd8d4b7d511f00a9e02f89b250fba3013200843
Author: Julia Lawall <Julia.Lawall at inria.fr>
Date:   Sat May 21 13:11:24 2022 +0200

    drm/amdkfd: fix typo in comment
    
    Spelling mistake (triple letters) in comment.
    Detected with the help of Coccinelle.
    
    Signed-off-by: Julia Lawall <Julia.Lawall at inria.fr>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8b5452a8d330..67abf8dcd30a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1621,7 +1621,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
 	mutex_lock(&mem->lock);
 
-	/* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */
+	/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
 	if (mem->alloc_flags &
 	    (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
 	     KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
commit ab5a7fb6d2296b9486d17d1e24f4bde90822e644
Author: Julia Lawall <Julia.Lawall at inria.fr>
Date:   Sat May 21 13:11:14 2022 +0200

    drm/amdgpu/gfx: fix typos in comments
    
    Spelling mistakes (triple letters) in comments.
    Detected with the help of Coccinelle.
    
    Signed-off-by: Julia Lawall <Julia.Lawall at inria.fr>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 02754ee86c81..c5f46d264b23 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5111,7 +5111,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
 	mutex_unlock(&adev->srbm_mutex);
 
 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
-	   acccess. These should be enabled by FW for target VMIDs. */
+	   access. These should be enabled by FW for target VMIDs. */
 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index fb9302910742..7f0b18b0d4c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3714,7 +3714,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
 	mutex_unlock(&adev->srbm_mutex);
 
 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
-	   acccess. These should be enabled by FW for target VMIDs. */
+	   access. These should be enabled by FW for target VMIDs. */
 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
 		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
 		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
@@ -5815,7 +5815,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
 		gfx_v8_0_wait_for_rlc_serdes(adev);
 
-		/* write cmd to Set CGCG Overrride */
+		/* write cmd to Set CGCG Override */
 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
 
 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f12ae6e2359a..5349ca4d19e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2535,7 +2535,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
 	mutex_unlock(&adev->srbm_mutex);
 
 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
-	   acccess. These should be enabled by FW for target VMIDs. */
+	   access. These should be enabled by FW for target VMIDs. */
 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
commit 31ab27b14daaa75541a415c6794d6f3567fea44a
Author: Dave Airlie <airlied at redhat.com>
Date:   Mon May 23 10:24:18 2022 +1000

    drm/amdgpu/cs: make commands with 0 chunks illegal behaviour.
    
    Submitting a cs with 0 chunks, causes an oops later, found trying
    to execute the wrong userspace driver.
    
    MESA_LOADER_DRIVER_OVERRIDE=v3d glxinfo
    
    [172536.665184] BUG: kernel NULL pointer dereference, address: 00000000000001d8
    [172536.665188] #PF: supervisor read access in kernel mode
    [172536.665189] #PF: error_code(0x0000) - not-present page
    [172536.665191] PGD 6712a0067 P4D 6712a0067 PUD 5af9ff067 PMD 0
    [172536.665195] Oops: 0000 [#1] SMP NOPTI
    [172536.665197] CPU: 7 PID: 2769838 Comm: glxinfo Tainted: P           O      5.10.81 #1-NixOS
    [172536.665199] Hardware name: To be filled by O.E.M. To be filled by O.E.M./CROSSHAIR V FORMULA-Z, BIOS 2201 03/23/2015
    [172536.665272] RIP: 0010:amdgpu_cs_ioctl+0x96/0x1ce0 [amdgpu]
    [172536.665274] Code: 75 18 00 00 4c 8b b2 88 00 00 00 8b 46 08 48 89 54 24 68 49 89 f7 4c 89 5c 24 60 31 d2 4c 89 74 24 30 85 c0 0f 85 c0 01 00 00 <48> 83 ba d8 01 00 00 00 48 8b b4 24 90 00 00 00 74 16 48 8b 46 10
    [172536.665276] RSP: 0018:ffffb47c0e81bbe0 EFLAGS: 00010246
    [172536.665277] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
    [172536.665278] RDX: 0000000000000000 RSI: ffffb47c0e81be28 RDI: ffffb47c0e81bd68
    [172536.665279] RBP: ffff936524080010 R08: 0000000000000000 R09: ffffb47c0e81be38
    [172536.665281] R10: ffff936524080010 R11: ffff936524080000 R12: ffffb47c0e81bc40
    [172536.665282] R13: ffffb47c0e81be28 R14: ffff9367bc410000 R15: ffffb47c0e81be28
    [172536.665283] FS:  00007fe35e05d740(0000) GS:ffff936c1edc0000(0000) knlGS:0000000000000000
    [172536.665284] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [172536.665286] CR2: 00000000000001d8 CR3: 0000000532e46000 CR4: 00000000000406e0
    [172536.665287] Call Trace:
    [172536.665322]  ? amdgpu_cs_find_mapping+0x110/0x110 [amdgpu]
    [172536.665332]  drm_ioctl_kernel+0xaa/0xf0 [drm]
    [172536.665338]  drm_ioctl+0x201/0x3b0 [drm]
    [172536.665369]  ? amdgpu_cs_find_mapping+0x110/0x110 [amdgpu]
    [172536.665372]  ? selinux_file_ioctl+0x135/0x230
    [172536.665399]  amdgpu_drm_ioctl+0x49/0x80 [amdgpu]
    [172536.665403]  __x64_sys_ioctl+0x83/0xb0
    [172536.665406]  do_syscall_64+0x33/0x40
    [172536.665409]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
    
    Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2018
    Signed-off-by: Dave Airlie <airlied at redhat.com>
    Cc: stable at vger.kernel.org
    Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 84caab5e4d22..b28af04b0c3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -116,7 +116,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
 	int ret;
 
 	if (cs->in.num_chunks == 0)
-		return 0;
+		return -EINVAL;
 
 	chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
 	if (!chunk_array)
commit d534ca7128d7bf681ed6d462c09b9d6ffb3bed91
Author: Alex Deucher <alexander.deucher at amd.com>
Date:   Mon May 23 11:24:31 2022 -0400

    drm/amdgpu: differentiate between LP and non-LP DDR memory
    
    Some applications want to know whether the memory is LP or
    not.
    
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 63e0293edc5f..fd8f3731758e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -188,13 +188,17 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
 			vram_type = AMDGPU_VRAM_TYPE_DDR3;
 			break;
 		case Ddr4MemType:
-		case LpDdr4MemType:
 			vram_type = AMDGPU_VRAM_TYPE_DDR4;
 			break;
+		case LpDdr4MemType:
+			vram_type = AMDGPU_VRAM_TYPE_LPDDR4;
+			break;
 		case Ddr5MemType:
-		case LpDdr5MemType:
 			vram_type = AMDGPU_VRAM_TYPE_DDR5;
 			break;
+		case LpDdr5MemType:
+			vram_type = AMDGPU_VRAM_TYPE_LPDDR5;
+			break;
 		default:
 			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
 			break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5444515c1476..e1e6441c475f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1018,7 +1018,9 @@ static const char *amdgpu_vram_names[] = {
 	"DDR3",
 	"DDR4",
 	"GDDR6",
-	"DDR5"
+	"DDR5",
+	"LPDDR4",
+	"LPDDR5"
 };
 
 /**
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 9a1d210d135d..d9d475d65c76 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -988,6 +988,8 @@ struct drm_amdgpu_info_vbios {
 #define AMDGPU_VRAM_TYPE_DDR4  8
 #define AMDGPU_VRAM_TYPE_GDDR6 9
 #define AMDGPU_VRAM_TYPE_DDR5  10
+#define AMDGPU_VRAM_TYPE_LPDDR4 11
+#define AMDGPU_VRAM_TYPE_LPDDR5 12
 
 struct drm_amdgpu_info_device {
 	/** PCI Device ID */
commit a5457087eb10322864dedb7768b7a95332393efe
Author: Candice Li <candice.li at amd.com>
Date:   Fri May 20 20:51:53 2022 +0800

    drm/amdgpu: Resolve pcie_bif RAS recovery bug
    
    Check shared buf instead of init flag for xgmi ta shared buf init
    during xgmi ta initialization.
    
    Signed-off-by: Candice Li <candice.li at amd.com>
    Reviewed-by: John Clements <john.clements at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 214e4e89a028..e9411c28d88b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1177,7 +1177,7 @@ int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool lo
 	psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE;
 	psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
 
-	if (!psp->xgmi_context.context.initialized) {
+	if (!psp->xgmi_context.context.mem_context.shared_buf) {
 		ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context);
 		if (ret)
 			return ret;
commit 1c755241463bab5d90404a782abf3baf7b7a3217
Author: Prike Liang <Prike.Liang at amd.com>
Date:   Fri May 20 11:04:35 2022 +0800

    drm/amdgpu: clean up asd on the ta_firmware_header_v2_0
    
    On the psp13 series use ta_firmware_header_v2_0 and the asd firmware
    was buildin ta, so needn't request asd firmware separately.
    
    Signed-off-by: Prike Liang <Prike.Liang at amd.com>
    Reviewed-by: Yifan Zhang <yifan1.zhang at amd.com>
    Acked-by: Huang Rui <ray.huang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 18014ed0e853..9e1ef81933ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -32,13 +32,10 @@
 MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
 MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
 MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
-MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
-MODULE_FIRMWARE("amdgpu/psp_13_0_5_asd.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
-MODULE_FIRMWARE("amdgpu/psp_13_0_8_asd.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
@@ -96,9 +93,6 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
 	case IP_VERSION(13, 0, 3):
 	case IP_VERSION(13, 0, 5):
 	case IP_VERSION(13, 0, 8):
-		err = psp_init_asd_microcode(psp, chip_name);
-		if (err)
-			return err;
 		err = psp_init_toc_microcode(psp, chip_name);
 		if (err)
 			return err;
commit a0ccc717c4ab3ef572f023fdceffb4b6df496a0d
Author: Alex Deucher <alexander.deucher at amd.com>
Date:   Mon May 16 14:12:33 2022 -0400

    drm/amdgpu/discovery: validate VCN and SDMA instances
    
    Validate the VCN and SDMA instances against the driver
    structure sizes to make sure we don't get into a
    situation where the firmware reports more instances than
    the driver supports.
    
    Reviewed-by: Guchun Chen <guchun.chen at amd.com>
    Acked-by: Christian König <christian.koenig at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 16cdfb30b013..47f0344205ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1130,13 +1130,24 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
 				adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
 					ip->revision & 0xc0;
 				ip->revision &= ~0xc0;
-				adev->vcn.num_vcn_inst++;
+				if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES)
+					adev->vcn.num_vcn_inst++;
+				else
+					dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
+						adev->vcn.num_vcn_inst + 1,
+						AMDGPU_MAX_VCN_INSTANCES);
 			}
 			if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
 			    le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
 			    le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
-			    le16_to_cpu(ip->hw_id) == SDMA3_HWID)
-				adev->sdma.num_instances++;
+			    le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
+				if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES)
+					adev->sdma.num_instances++;
+				else
+					dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
+						adev->sdma.num_instances + 1,
+						AMDGPU_MAX_SDMA_INSTANCES);
+			}
 
 			if (le16_to_cpu(ip->hw_id) == UMC_HWID)
 				adev->gmc.num_umc++;
commit 76818cdd11a25ac6cb1d98875719935d8d0e2e51
Author: Sung Joon Kim <Sungjoon.Kim at amd.com>
Date:   Thu May 19 17:46:36 2022 -0400

    drm/amd/display: add Coverage blend mode for overlay plane
    
    According to the KMS man page, there is a
    "Coverage" alpha blend mode that assumes the
    pixel color values have NOT been pre-multiplied
    and will be done when the actual blending to
    the background color values happens.
    
    Previously, this mode hasn't been enabled
    in our driver and it was assumed that all
    normal overlay planes are pre-multiplied
    by default.
    
    When a 3rd party app is used to input a image
    in a specific format, e.g. PNG, as a source
    of a overlay plane to blend with the background
    primary plane, the pixel color values are not
    pre-multiplied. So by adding "Coverage" blend
    mode, our driver will support those cases.
    
    Issue fixed: Overlay plane alpha channel blending is incorrect
    Issue tracker: https://gitlab.freedesktop.org/drm/amd/-/issues/1769
    
    Reference:
    https://dri.freedesktop.org/docs/drm/gpu/drm-kms.html#plane-composition-properties
    
    Adding Coverage support also enables IGT
    kms_plane_alpha_blend Coverage subtests:
    1. coverage-7efc
    2. coverage-vs-premult-vs-constant
    
    Changes
    1. Add DRM_MODE_BLEND_COVERAGE blend mode capability
    2. Add "pre_multiplied_alpha" flag for Coverage case
    3. Read the correct flag and set the DCN MPCC
    pre_multiplied register bit (only on overlay plane)
    
    Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1769
    Signed-off-by: Sung Joon Kim <Sungjoon.Kim at amd.com>
    Reviewed-by: Melissa Wen <mwen at igalia.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 5ea5e14b83c8..70be67a56673 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5381,17 +5381,19 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
 
 static void
 fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
-			       bool *per_pixel_alpha, bool *global_alpha,
-			       int *global_alpha_value)
+			       bool *per_pixel_alpha, bool *pre_multiplied_alpha,
+			       bool *global_alpha, int *global_alpha_value)
 {
 	*per_pixel_alpha = false;
+	*pre_multiplied_alpha = true;
 	*global_alpha = false;
 	*global_alpha_value = 0xff;
 
 	if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY)
 		return;
 
-	if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) {
+	if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ||
+		plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) {
 		static const uint32_t alpha_formats[] = {
 			DRM_FORMAT_ARGB8888,
 			DRM_FORMAT_RGBA8888,
@@ -5406,6 +5408,9 @@ fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
 				break;
 			}
 		}
+
+		if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE)
+			*pre_multiplied_alpha = false;
 	}
 
 	if (plane_state->alpha < 0xffff) {
@@ -5568,7 +5573,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
 		return ret;
 
 	fill_blending_from_plane_state(
-		plane_state, &plane_info->per_pixel_alpha,
+		plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha,
 		&plane_info->global_alpha, &plane_info->global_alpha_value);
 
 	return 0;
@@ -5615,6 +5620,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
 	dc_plane_state->tiling_info = plane_info.tiling_info;
 	dc_plane_state->visible = plane_info.visible;
 	dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha;
+	dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha;
 	dc_plane_state->global_alpha = plane_info.global_alpha;
 	dc_plane_state->global_alpha_value = plane_info.global_alpha_value;
 	dc_plane_state->dcc = plane_info.dcc;
@@ -7911,7 +7917,8 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
 	if (plane->type == DRM_PLANE_TYPE_OVERLAY &&
 	    plane_cap && plane_cap->per_pixel_alpha) {
 		unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) |
-					  BIT(DRM_MODE_BLEND_PREMULTI);
+					  BIT(DRM_MODE_BLEND_PREMULTI) |
+					  BIT(DRM_MODE_BLEND_COVERAGE);
 
 		drm_plane_create_alpha_property(plane);
 		drm_plane_create_blend_mode_property(plane, blend_caps);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index e6b9c6a71841..5bc6ff2fa73e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -61,6 +61,8 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl
 		plane_state->blend_tf->type = TF_TYPE_BYPASS;
 	}
 
+	plane_state->pre_multiplied_alpha = true;
+
 }
 
 static void dc_plane_destruct(struct dc_plane_state *plane_state)
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index a31ea3644ec2..3960c74482be 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1011,6 +1011,7 @@ struct dc_plane_state {
 
 	bool is_tiling_rotated;
 	bool per_pixel_alpha;
+	bool pre_multiplied_alpha;
 	bool global_alpha;
 	int  global_alpha_value;
 	bool visible;
@@ -1045,6 +1046,7 @@ struct dc_plane_info {
 	bool horizontal_mirror;
 	bool visible;
 	bool per_pixel_alpha;
+	bool pre_multiplied_alpha;
 	bool global_alpha;
 	int  global_alpha_value;
 	bool input_csc_enabled;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index e02ac75afbf7..e3a62873c0e7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -2550,12 +2550,21 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	blnd_cfg.overlap_only = false;
 	blnd_cfg.global_gain = 0xff;
 
-	if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
-		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
-		blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
-	} else if (per_pixel_alpha) {
-		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+	if (per_pixel_alpha) {
+		/* DCN1.0 has output CM before MPC which seems to screw with
+		 * pre-multiplied alpha.
+		 */
+		blnd_cfg.pre_multiplied_alpha = (is_rgb_cspace(
+				pipe_ctx->stream->output_color_space)
+						&& pipe_ctx->plane_state->pre_multiplied_alpha);
+		if (pipe_ctx->plane_state->global_alpha) {
+			blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
+			blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
+		} else {
+			blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+		}
 	} else {
+		blnd_cfg.pre_multiplied_alpha = false;
 		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
 	}
 
@@ -2564,14 +2573,6 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	else
 		blnd_cfg.global_alpha = 0xff;
 
-	/* DCN1.0 has output CM before MPC which seems to screw with
-	 * pre-multiplied alpha.
-	 */
-	blnd_cfg.pre_multiplied_alpha = is_rgb_cspace(
-			pipe_ctx->stream->output_color_space)
-					&& per_pixel_alpha;
-
-
 	/*
 	 * TODO: remove hack
 	 * Note: currently there is a bug in init_hw such that
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 0da024912dbe..ec6aa8d8b251 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -2345,12 +2345,16 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	blnd_cfg.overlap_only = false;
 	blnd_cfg.global_gain = 0xff;
 
-	if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
-		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
-		blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
-	} else if (per_pixel_alpha) {
-		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+	if (per_pixel_alpha) {
+		blnd_cfg.pre_multiplied_alpha = pipe_ctx->plane_state->pre_multiplied_alpha;
+		if (pipe_ctx->plane_state->global_alpha) {
+			blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
+			blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
+		} else {
+			blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+		}
 	} else {
+		blnd_cfg.pre_multiplied_alpha = false;
 		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
 	}
 
@@ -2364,7 +2368,7 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	blnd_cfg.top_gain = 0x1f000;
 	blnd_cfg.bottom_inside_gain = 0x1f000;
 	blnd_cfg.bottom_outside_gain = 0x1f000;
-	blnd_cfg.pre_multiplied_alpha = per_pixel_alpha;
+
 	if (pipe_ctx->plane_state->format
 			== SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA)
 		blnd_cfg.pre_multiplied_alpha = false;
commit a35faec3db0e13aac8ea720bc1a3503081dd5a3d
Author: Dan Carpenter <dan.carpenter at oracle.com>
Date:   Mon May 16 10:05:48 2022 +0300

    drm/amdgpu: Off by one in dm_dmub_outbox1_low_irq()
    
    The > ARRAY_SIZE() should be >= ARRAY_SIZE() to prevent an out of bounds
    access.
    
    Fixes: e27c41d5b068 ("drm/amd/display: Support for DMUB HPD interrupt handling")
    Reviewed-by: Harry Wentland <harry.wentland at amd.com>
    Signed-off-by: Dan Carpenter <dan.carpenter at oracle.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a92cfb055c15..5ea5e14b83c8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -769,7 +769,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params)
 
 		do {
 			dc_stat_get_dmub_notification(adev->dm.dc, &notify);
-			if (notify.type > ARRAY_SIZE(dm->dmub_thread_offload)) {
+			if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) {
 				DRM_ERROR("DM: notify type %d invalid!", notify.type);
 				continue;
 			}
commit caa5eadc140ca3748b2ae187da36383edc779300
Author: Evan Quan <evan.quan at amd.com>
Date:   Thu May 19 17:28:12 2022 +0800

    drm/amdgpu: suppress some compile warnings
    
    Suppress two compile warnings about "no previous prototype".
    
    Reported-by: kernel test robot <lkp at intel.com>
    Signed-off-by: Evan Quan <evan.quan at amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 881570dced41..16cdfb30b013 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1361,7 +1361,7 @@ union mall_info {
 	struct mall_info_v1_0 v1;
 };
 
-int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
+static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
 {
 	struct binary_header *bhdr;
 	union mall_info *mall_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index c6a8520053bb..9e18a2b22607 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -42,6 +42,7 @@
 
 #include "soc15.h"
 #include "soc15_common.h"
+#include "soc21.h"
 
 static const struct amd_ip_funcs soc21_common_ip_funcs;
 
commit 396beb91a9eb86cbfa404e4220cca8f3ada70777
Author: Evan Quan <evan.quan at amd.com>
Date:   Wed Apr 6 14:14:50 2022 +0800

    drm/amd/pm: correct the metrics version for SMU 11.0.11/12/13
    
    Correct the metrics version used for SMU 11.0.11/12/13.
    Fixes misreported GPU metrics (e.g., fan speed, etc.) depending
    on which version of SMU firmware is loaded.
    
    Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1925
    Signed-off-by: Evan Quan <evan.quan at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index d68be8f8850e..78f3d9e722bb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -697,12 +697,28 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
 	uint32_t apu_percent = 0;
 	uint32_t dgpu_percent = 0;
 
-	if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
-		(smu->smc_fw_version >= 0x3A4900))
-		use_metrics_v3 = true;
-	else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
-		(smu->smc_fw_version >= 0x3A4300))
-		use_metrics_v2 =  true;
+	switch (smu->adev->ip_versions[MP1_HWIP][0]) {
+	case IP_VERSION(11, 0, 7):
+		if (smu->smc_fw_version >= 0x3A4900)
+			use_metrics_v3 = true;
+		else if (smu->smc_fw_version >= 0x3A4300)
+			use_metrics_v2 = true;
+		break;
+	case IP_VERSION(11, 0, 11):
+		if (smu->smc_fw_version >= 0x412D00)
+			use_metrics_v2 = true;
+		break;
+	case IP_VERSION(11, 0, 12):
+		if (smu->smc_fw_version >= 0x3B2300)
+			use_metrics_v2 = true;
+		break;
+	case IP_VERSION(11, 0, 13):
+		if (smu->smc_fw_version >= 0x491100)
+			use_metrics_v2 = true;
+		break;
+	default:
+		break;
+	}
 
 	ret = smu_cmn_get_metrics_table(smu,
 					NULL,
@@ -3833,13 +3849,28 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
 	uint16_t average_gfx_activity;
 	int ret = 0;
 
-	if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
-		(smu->smc_fw_version >= 0x3A4900))
-		use_metrics_v3 = true;
-	else if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
-		(smu->smc_fw_version >= 0x3A4300))
-		use_metrics_v2 = true;
-
+	switch (smu->adev->ip_versions[MP1_HWIP][0]) {
+	case IP_VERSION(11, 0, 7):
+		if (smu->smc_fw_version >= 0x3A4900)
+			use_metrics_v3 = true;
+		else if (smu->smc_fw_version >= 0x3A4300)
+			use_metrics_v2 = true;
+		break;
+	case IP_VERSION(11, 0, 11):
+		if (smu->smc_fw_version >= 0x412D00)
+			use_metrics_v2 = true;
+		break;
+	case IP_VERSION(11, 0, 12):
+		if (smu->smc_fw_version >= 0x3B2300)
+			use_metrics_v2 = true;
+		break;
+	case IP_VERSION(11, 0, 13):
+		if (smu->smc_fw_version >= 0x491100)
+			use_metrics_v2 = true;
+		break;
+	default:
+		break;
+	}
 
 	ret = smu_cmn_get_metrics_table(smu,
 					&metrics_external,
commit 6a8170383c7acdf6fb8da1f3774fa2bc9191d628
Author: Jay Cornwall <jay.cornwall at amd.com>
Date:   Thu Dec 30 21:32:06 2021 +0800

    drm/amdkfd: Add gfx11 trap handler
    
    Based on gfx10 with following changes:
    
    - GPR_ALLOC.VGPR_SIZE field moved (and size corrected in gfx10)
    - s_sendmsg_rtn_b64 replaces some s_sendmsg/s_getreg
    - Buffer instructions no longer have direct-to-LDS modifier
    
    Signed-off-by: Jay Cornwall <jay.cornwall at amd.com>
    Reviewed-by: Laurent Morichetti <laurent.morichetti at amd.com>
    Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 8cbdc7f519c6..60a81649cf12 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -776,7 +776,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xe0704100, 0x705d0100,
 	0xe0704200, 0x705d0200,
 	0xe0704300, 0x705d0300,
-	0xb9702a05, 0x80708170,
+	0xb9703a05, 0x80708170,
 	0xbf0d9973, 0xbf850002,
 	0x8f708970, 0xbf820001,
 	0x8f708a70, 0xb97a1e06,
@@ -855,7 +855,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0x877aff6d, 0x80000000,
 	0xbf840040, 0x8f7b867b,
 	0x8f7b827b, 0xbef6037b,
-	0xb9702a05, 0x80708170,
+	0xb9703a05, 0x80708170,
 	0xbf0d9973, 0xbf850002,
 	0x8f708970, 0xbf820001,
 	0x8f708a70, 0xb97a1e06,
@@ -891,7 +891,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xbef003ff, 0x00000200,
 	0xbeff0380, 0xbf820003,
 	0xbef003ff, 0x00000400,
-	0xbeff03c1, 0xb97b2a05,
+	0xbeff03c1, 0xb97b3a05,
 	0x807b817b, 0x8f7b827b,
 	0x907c9973, 0x877c817c,
 	0xbf06817c, 0xbf850017,
@@ -939,7 +939,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xb96f4306, 0x876fc16f,
 	0xbf840029, 0x8f6f866f,
 	0x8f6f826f, 0xbef6036f,
-	0xb9782a05, 0x80788178,
+	0xb9783a05, 0x80788178,
 	0xbf0d9972, 0xbf850002,
 	0x8f788978, 0xbf820001,
 	0x8f788a78, 0xb96e1e06,
@@ -962,7 +962,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0x907c9972, 0x877c817c,
 	0xbf06817c, 0xbf850002,
 	0xbeff0380, 0xbf820001,
-	0xbeff03c1, 0xb96f2a05,
+	0xbeff03c1, 0xb96f3a05,
 	0x806f816f, 0x8f6f826f,
 	0x907c9972, 0x877c817c,
 	0xbf06817c, 0xbf850024,
@@ -1010,7 +1010,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0x6e5d0100, 0xe0304200,
 	0x6e5d0200, 0xe0304300,
 	0x6e5d0300, 0xbf8c3f70,
-	0xb9782a05, 0x80788178,
+	0xb9783a05, 0x80788178,
 	0xbf0d9972, 0xbf850002,
 	0x8f788978, 0xbf820001,
 	0x8f788a78, 0xb96e1e06,
@@ -1037,7 +1037,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xbe8c310c, 0xbe8e310e,
 	0xbf06807c, 0xbf84fff0,
 	0xba80f801, 0x00000000,
-	0xbf8a0000, 0xb9782a05,
+	0xbf8a0000, 0xb9783a05,
 	0x80788178, 0xbf0d9972,
 	0xbf850002, 0x8f788978,
 	0xbf820001, 0x8f788a78,
@@ -2261,7 +2261,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xbf8a0000, 0x877aff6d,
 	0x80000000, 0xbf840040,
 	0x8f7b867b, 0x8f7b827b,
-	0xbef6037b, 0xb9702a05,
+	0xbef6037b, 0xb9703a05,
 	0x80708170, 0xbf0d9973,
 	0xbf850002, 0x8f708970,
 	0xbf820001, 0x8f708a70,
@@ -2298,7 +2298,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0x00000200, 0xbeff0380,
 	0xbf820003, 0xbef003ff,
 	0x00000400, 0xbeff03c1,
-	0xb97b2a05, 0x807b817b,
+	0xb97b3a05, 0x807b817b,
 	0x8f7b827b, 0x907c9973,
 	0x877c817c, 0xbf06817c,
 	0xbf850017, 0xbef603ff,
@@ -2345,7 +2345,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xbeff03c1, 0xb96f4306,
 	0x876fc16f, 0xbf840029,
 	0x8f6f866f, 0x8f6f826f,
-	0xbef6036f, 0xb9782a05,
+	0xbef6036f, 0xb9783a05,
 	0x80788178, 0xbf0d9972,
 	0xbf850002, 0x8f788978,
 	0xbf820001, 0x8f788a78,
@@ -2369,7 +2369,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0x877c817c, 0xbf06817c,
 	0xbf850002, 0xbeff0380,
 	0xbf820001, 0xbeff03c1,
-	0xb96f2a05, 0x806f816f,
+	0xb96f3a05, 0x806f816f,
 	0x8f6f826f, 0x907c9972,
 	0x877c817c, 0xbf06817c,
 	0xbf850024, 0xbef603ff,
@@ -2416,7 +2416,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xe0304100, 0x6e5d0100,
 	0xe0304200, 0x6e5d0200,
 	0xe0304300, 0x6e5d0300,
-	0xbf8c3f70, 0xb9782a05,
+	0xbf8c3f70, 0xb9783a05,
 	0x80788178, 0xbf0d9972,
 	0xbf850002, 0x8f788978,
 	0xbf820001, 0x8f788a78,
@@ -2444,7 +2444,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xbe8e310e, 0xbf06807c,
 	0xbf84fff0, 0xba80f801,
 	0x00000000, 0xbf8a0000,
-	0xb9782a05, 0x80788178,
+	0xb9783a05, 0x80788178,
 	0xbf0d9972, 0xbf850002,
 	0x8f788978, 0xbf820001,
 	0x8f788a78, 0xb96e1e06,
@@ -2494,3 +2494,440 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0x00000000,
 };
+
+static const uint32_t cwsr_trap_gfx11_hex[] = {
+	0xbfa00001, 0xbfa0021b,
+	0xb0804006, 0xb8f8f802,
+	0x91788678, 0xb8fbf803,
+	0x8b6eff78, 0x00002000,
+	0xbfa10009, 0x8b6eff6d,
+	0x00ff0000, 0xbfa2001e,
+	0x8b6eff7b, 0x00000400,
+	0xbfa20041, 0xbf830010,
+	0xb8fbf803, 0xbfa0fffa,
+	0x8b6eff7b, 0x00000900,
+	0xbfa20015, 0x8b6eff7b,
+	0x000071ff, 0xbfa10008,
+	0x8b6fff7b, 0x00007080,
+	0xbfa10001, 0xbeee1287,
+	0xb8eff801, 0x846e8c6e,
+	0x8b6e6f6e, 0xbfa2000a,
+	0x8b6eff6d, 0x00ff0000,
+	0xbfa20007, 0xb8eef801,
+	0x8b6eff6e, 0x00000800,
+	0xbfa20003, 0x8b6eff7b,
+	0x00000400, 0xbfa20026,
+	0xbefa4d82, 0xbf89fc07,
+	0x84fa887a, 0xf4005bbd,
+	0xf8000010, 0xbf89fc07,
+	0x846e976e, 0x9177ff77,
+	0x00800000, 0x8c776e77,
+	0xf4045bbd, 0xf8000000,
+	0xbf89fc07, 0xf4045ebd,
+	0xf8000008, 0xbf89fc07,
+	0x8bee6e6e, 0xbfa10001,
+	0xbe80486e, 0x8b6eff6d,
+	0x01ff0000, 0xbfa20005,
+	0x8c78ff78, 0x00002000,
+	0x80ec886c, 0x82ed806d,
+	0xbfa00005, 0x8b6eff6d,
+	0x01000000, 0xbfa20002,
+	0x806c846c, 0x826d806d,
+	0x8b6dff6d, 0x0000ffff,
+	0x8bfe7e7e, 0x8bea6a6a,
+	0xb978f802, 0xbe804a6c,
+	0x8b6dff6d, 0x0000ffff,
+	0xbefa0080, 0xb97a0283,
+	0xbeee007e, 0xbeef007f,
+	0xbefe0180, 0xbefe4d84,
+	0xbf89fc07, 0x8b7aff7f,
+	0x04000000, 0x847a857a,
+	0x8c6d7a6d, 0xbefa007e,
+	0x8b7bff7f, 0x0000ffff,
+	0xbefe00c1, 0xbeff00c1,
+	0xdca6c000, 0x007a0000,
+	0x7e000280, 0xbefe007a,
+	0xbeff007b, 0xb8fb02dc,
+	0x847b997b, 0xb8fa3b05,
+	0x807a817a, 0xbf0d997b,
+	0xbfa20002, 0x847a897a,
+	0xbfa00001, 0x847a8a7a,
+	0xb8fb1e06, 0x847b8a7b,
+	0x807a7b7a, 0x8b7bff7f,
+	0x0000ffff, 0x807aff7a,
+	0x00000200, 0x807a7e7a,
+	0x827b807b, 0xd7610000,
+	0x00010870, 0xd7610000,
+	0x00010a71, 0xd7610000,
+	0x00010c72, 0xd7610000,
+	0x00010e73, 0xd7610000,
+	0x00011074, 0xd7610000,
+	0x00011275, 0xd7610000,
+	0x00011476, 0xd7610000,
+	0x00011677, 0xd7610000,
+	0x00011a79, 0xd7610000,
+	0x00011c7e, 0xd7610000,
+	0x00011e7f, 0xbefe00ff,
+	0x00003fff, 0xbeff0080,
+	0xdca6c040, 0x007a0000,
+	0xd760007a, 0x00011d00,
+	0xd760007b, 0x00011f00,
+	0xbefe007a, 0xbeff007b,
+	0xbef4007e, 0x8b75ff7f,
+	0x0000ffff, 0x8c75ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x10807fac,
+	0xbef1007d, 0xbef00080,
+	0xb8f302dc, 0x84739973,
+	0xbefe00c1, 0x857d9973,
+	0x8b7d817d, 0xbf06817d,
+	0xbfa20002, 0xbeff0080,
+	0xbfa00002, 0xbeff00c1,
+	0xbfa00009, 0xbef600ff,
+	0x01000000, 0xe0685080,
+	0x701d0100, 0xe0685100,
+	0x701d0200, 0xe0685180,
+	0x701d0300, 0xbfa00008,
+	0xbef600ff, 0x01000000,
+	0xe0685100, 0x701d0100,
+	0xe0685200, 0x701d0200,
+	0xe0685300, 0x701d0300,
+	0xb8f03b05, 0x80708170,
+	0xbf0d9973, 0xbfa20002,
+	0x84708970, 0xbfa00001,
+	0x84708a70, 0xb8fa1e06,
+	0x847a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0xbef600ff, 0x01000000,
+	0x7e000280, 0x7e020280,
+	0x7e040280, 0xbefd0080,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xd7610002,
+	0x0000fa6c, 0x807d817d,
+	0x917aff6d, 0x80000000,
+	0xd7610002, 0x0000fa7a,
+	0x807d817d, 0xd7610002,
+	0x0000fa6e, 0x807d817d,
+	0xd7610002, 0x0000fa6f,
+	0x807d817d, 0xd7610002,
+	0x0000fa78, 0x807d817d,
+	0xb8faf803, 0xd7610002,
+	0x0000fa7a, 0x807d817d,
+	0xd7610002, 0x0000fa7b,
+	0x807d817d, 0xb8f1f801,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xb8f1f814,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xb8f1f815,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xbefe00ff,
+	0x0000ffff, 0xbeff0080,
+	0xe0685000, 0x701d0200,
+	0xbefe00c1, 0xb8f03b05,
+	0x80708170, 0xbf0d9973,
+	0xbfa20002, 0x84708970,
+	0xbfa00001, 0x84708a70,
+	0xb8fa1e06, 0x847a8a7a,
+	0x80707a70, 0xbef600ff,
+	0x01000000, 0xbef90080,
+	0xbefd0080, 0xbf800000,
+	0xbe804100, 0xbe824102,
+	0xbe844104, 0xbe864106,
+	0xbe884108, 0xbe8a410a,
+	0xbe8c410c, 0xbe8e410e,
+	0xd7610002, 0x0000f200,
+	0x80798179, 0xd7610002,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
+	0x80798179, 0xd7610002,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
+	0x80798179, 0xd7610002,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
+	0x80798179, 0xd7610002,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
+	0x80798179, 0xd7610002,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
+	0x80798179, 0xd7610002,
+	0x0000f20b, 0x80798179,
+	0xd7610002, 0x0000f20c,
+	0x80798179, 0xd7610002,
+	0x0000f20d, 0x80798179,
+	0xd7610002, 0x0000f20e,
+	0x80798179, 0xd7610002,
+	0x0000f20f, 0x80798179,
+	0xbf06a079, 0xbfa10006,
+	0xe0685000, 0x701d0200,
+	0x8070ff70, 0x00000080,
+	0xbef90080, 0x7e040280,
+	0x807d907d, 0xbf0aff7d,
+	0x00000060, 0xbfa2ffbc,
+	0xbe804100, 0xbe824102,
+	0xbe844104, 0xbe864106,
+	0xbe884108, 0xbe8a410a,
+	0xd7610002, 0x0000f200,
+	0x80798179, 0xd7610002,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
+	0x80798179, 0xd7610002,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
+	0x80798179, 0xd7610002,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
+	0x80798179, 0xd7610002,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
+	0x80798179, 0xd7610002,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
+	0x80798179, 0xd7610002,
+	0x0000f20b, 0x80798179,
+	0xe0685000, 0x701d0200,
+	0xbefe00c1, 0x857d9973,
+	0x8b7d817d, 0xbf06817d,
+	0xbfa20002, 0xbeff0080,
+	0xbfa00001, 0xbeff00c1,
+	0xb8fb4306, 0x8b7bc17b,
+	0xbfa10044, 0xbfbd0000,
+	0x8b7aff6d, 0x80000000,
+	0xbfa10040, 0x847b867b,
+	0x847b827b, 0xbef6007b,
+	0xb8f03b05, 0x80708170,
+	0xbf0d9973, 0xbfa20002,
+	0x84708970, 0xbfa00001,
+	0x84708a70, 0xb8fa1e06,
+	0x847a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0x8070ff70, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0xd71f0000, 0x000100c1,
+	0xd7200000, 0x000200c1,
+	0x16000084, 0x857d9973,
+	0x8b7d817d, 0xbf06817d,
+	0xbefd0080, 0xbfa20012,
+	0xbe8300ff, 0x00000080,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf890000,
+	0xe0685000, 0x701d0100,
+	0x807d037d, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000080, 0xbf0a7b7d,
+	0xbfa2fff4, 0xbfa00011,
+	0xbe8300ff, 0x00000100,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf890000,
+	0xe0685000, 0x701d0100,
+	0x807d037d, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000100, 0xbf0a7b7d,
+	0xbfa2fff4, 0xbefe00c1,
+	0x857d9973, 0x8b7d817d,
+	0xbf06817d, 0xbfa20004,
+	0xbef000ff, 0x00000200,
+	0xbeff0080, 0xbfa00003,
+	0xbef000ff, 0x00000400,
+	0xbeff00c1, 0xb8fb3b05,
+	0x807b817b, 0x847b827b,
+	0x857d9973, 0x8b7d817d,
+	0xbf06817d, 0xbfa20017,
+	0xbef600ff, 0x01000000,
+	0xbefd0084, 0xbf0a7b7d,
+	0xbfa10037, 0x7e008700,
+	0x7e028701, 0x7e048702,
+	0x7e068703, 0xe0685000,
+	0x701d0000, 0xe0685080,
+	0x701d0100, 0xe0685100,
+	0x701d0200, 0xe0685180,
+	0x701d0300, 0x807d847d,
+	0x8070ff70, 0x00000200,
+	0xbf0a7b7d, 0xbfa2ffef,
+	0xbfa00025, 0xbef600ff,
+	0x01000000, 0xbefd0084,
+	0xbf0a7b7d, 0xbfa10011,
+	0x7e008700, 0x7e028701,
+	0x7e048702, 0x7e068703,
+	0xe0685000, 0x701d0000,
+	0xe0685100, 0x701d0100,
+	0xe0685200, 0x701d0200,
+	0xe0685300, 0x701d0300,
+	0x807d847d, 0x8070ff70,
+	0x00000400, 0xbf0a7b7d,
+	0xbfa2ffef, 0xb8fb1e06,
+	0x8b7bc17b, 0xbfa1000c,
+	0x847b837b, 0x807b7d7b,
+	0xbefe00c1, 0xbeff0080,
+	0x7e008700, 0xe0685000,
+	0x701d0000, 0x807d817d,
+	0x8070ff70, 0x00000080,
+	0xbf0a7b7d, 0xbfa2fff8,
+	0xbfa00141, 0xbef4007e,
+	0x8b75ff7f, 0x0000ffff,
+	0x8c75ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x10807fac, 0xb8f202dc,
+	0x84729972, 0x8b6eff7f,
+	0x04000000, 0xbfa1003a,
+	0xbefe00c1, 0x857d9972,
+	0x8b7d817d, 0xbf06817d,
+	0xbfa20002, 0xbeff0080,
+	0xbfa00001, 0xbeff00c1,
+	0xb8ef4306, 0x8b6fc16f,
+	0xbfa1002f, 0x846f866f,
+	0x846f826f, 0xbef6006f,
+	0xb8f83b05, 0x80788178,
+	0xbf0d9972, 0xbfa20002,
+	0x84788978, 0xbfa00001,
+	0x84788a78, 0xb8ee1e06,
+	0x846e8a6e, 0x80786e78,
+	0x8078ff78, 0x00000200,
+	0x8078ff78, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0x857d9972, 0x8b7d817d,
+	0xbf06817d, 0xbefd0080,
+	0xbfa2000c, 0xe0500000,
+	0x781d0000, 0xbf8903f7,
+	0xdac00000, 0x00000000,
+	0x807dff7d, 0x00000080,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7d, 0xbfa2fff5,
+	0xbfa0000b, 0xe0500000,
+	0x781d0000, 0xbf8903f7,
+	0xdac00000, 0x00000000,
+	0x807dff7d, 0x00000100,
+	0x8078ff78, 0x00000100,
+	0xbf0a6f7d, 0xbfa2fff5,
+	0xbef80080, 0xbefe00c1,
+	0x857d9972, 0x8b7d817d,
+	0xbf06817d, 0xbfa20002,
+	0xbeff0080, 0xbfa00001,
+	0xbeff00c1, 0xb8ef3b05,
+	0x806f816f, 0x846f826f,
+	0x857d9972, 0x8b7d817d,
+	0xbf06817d, 0xbfa20024,
+	0xbef600ff, 0x01000000,
+	0xbeee0078, 0x8078ff78,
+	0x00000200, 0xbefd0084,
+	0xbf0a6f7d, 0xbfa10050,
+	0xe0505000, 0x781d0000,
+	0xe0505080, 0x781d0100,
+	0xe0505100, 0x781d0200,
+	0xe0505180, 0x781d0300,
+	0xbf8903f7, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807d847d,
+	0x8078ff78, 0x00000200,
+	0xbf0a6f7d, 0xbfa2ffee,
+	0xe0505000, 0x6e1d0000,
+	0xe0505080, 0x6e1d0100,
+	0xe0505100, 0x6e1d0200,
+	0xe0505180, 0x6e1d0300,
+	0xbf8903f7, 0xbfa00034,
+	0xbef600ff, 0x01000000,
+	0xbeee0078, 0x8078ff78,
+	0x00000400, 0xbefd0084,
+	0xbf0a6f7d, 0xbfa10012,
+	0xe0505000, 0x781d0000,
+	0xe0505100, 0x781d0100,
+	0xe0505200, 0x781d0200,
+	0xe0505300, 0x781d0300,
+	0xbf8903f7, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807d847d,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7d, 0xbfa2ffee,
+	0xb8ef1e06, 0x8b6fc16f,
+	0xbfa1000e, 0x846f836f,
+	0x806f7d6f, 0xbefe00c1,
+	0xbeff0080, 0xe0505000,
+	0x781d0000, 0xbf8903f7,
+	0x7e008500, 0x807d817d,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7d, 0xbfa2fff7,
+	0xbeff00c1, 0xe0505000,
+	0x6e1d0000, 0xe0505100,
+	0x6e1d0100, 0xe0505200,
+	0x6e1d0200, 0xe0505300,
+	0x6e1d0300, 0xbf8903f7,
+	0xb8f83b05, 0x80788178,
+	0xbf0d9972, 0xbfa20002,
+	0x84788978, 0xbfa00001,
+	0x84788a78, 0xb8ee1e06,
+	0x846e8a6e, 0x80786e78,
+	0x8078ff78, 0x00000200,
+	0x80f8ff78, 0x00000050,
+	0xbef600ff, 0x01000000,
+	0xbefd00ff, 0x0000006c,
+	0x80f89078, 0xf428403a,
+	0xf0000000, 0xbf89fc07,
+	0x80fd847d, 0xbf800000,
+	0xbe804300, 0xbe824302,
+	0x80f8a078, 0xf42c403a,
+	0xf0000000, 0xbf89fc07,
+	0x80fd887d, 0xbf800000,
+	0xbe804300, 0xbe824302,
+	0xbe844304, 0xbe864306,
+	0x80f8c078, 0xf430403a,
+	0xf0000000, 0xbf89fc07,
+	0x80fd907d, 0xbf800000,
+	0xbe804300, 0xbe824302,
+	0xbe844304, 0xbe864306,
+	0xbe884308, 0xbe8a430a,
+	0xbe8c430c, 0xbe8e430e,
+	0xbf06807d, 0xbfa1fff0,
+	0xb980f801, 0x00000000,
+	0xbfbd0000, 0xb8f83b05,
+	0x80788178, 0xbf0d9972,
+	0xbfa20002, 0x84788978,
+	0xbfa00001, 0x84788a78,
+	0xb8ee1e06, 0x846e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0xbef600ff,
+	0x01000000, 0xf4205bfa,
+	0xf0000000, 0x80788478,
+	0xf4205b3a, 0xf0000000,
+	0x80788478, 0xf4205b7a,
+	0xf0000000, 0x80788478,
+	0xf4205c3a, 0xf0000000,
+	0x80788478, 0xf4205c7a,
+	0xf0000000, 0x80788478,
+	0xf4205eba, 0xf0000000,
+	0x80788478, 0xf4205efa,
+	0xf0000000, 0x80788478,
+	0xf4205e7a, 0xf0000000,
+	0x80788478, 0xf4205cfa,
+	0xf0000000, 0x80788478,
+	0xf4205bba, 0xf0000000,
+	0x80788478, 0xbf89fc07,
+	0xb96ef814, 0xf4205bba,
+	0xf0000000, 0x80788478,
+	0xbf89fc07, 0xb96ef815,
+	0xbefd006f, 0xbefe0070,
+	0xbeff0071, 0x8b6f7bff,
+	0x000003ff, 0xb96f4803,
+	0x8b6f7bff, 0xfffff800,
+	0x856f8b6f, 0xb96fa2c3,
+	0xb973f801, 0xb8ee3b05,
+	0x806e816e, 0xbf0d9972,
+	0xbfa20002, 0x846e896e,
+	0xbfa00001, 0x846e8a6e,
+	0xb8ef1e06, 0x846f8a6f,
+	0x806e6f6e, 0x806eff6e,
+	0x00000200, 0x806e746e,
+	0x826f8075, 0x8b6fff6f,
+	0x0000ffff, 0xf4085c37,
+	0xf8000050, 0xf4085d37,
+	0xf8000060, 0xf4005e77,
+	0xf8000074, 0xbf89fc07,
+	0x8b6dff6d, 0x0000ffff,
+	0x8bfe7e7e, 0x8bea6a6a,
+	0xb97af802, 0xbe804a6c,
+	0xbfb00000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 0348191e8592..250ab007399b 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -23,15 +23,26 @@
 /* To compile this assembly code:
  *
  * Navi1x:
- *   cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
- *   sp3-nv1x nv1x.sp3 -hex nv1x.hex
+ *   cpp -DASIC_FAMILY=CHIP_NAVI10 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
+ *   sp3 nv1x.sp3 -hex nv1x.hex
  *
- * Others:
- *   cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
- *   sp3-gfx10 gfx10.sp3 -hex gfx10.hex
+ * gfx10:
+ *   cpp -DASIC_FAMILY=CHIP_SIENNA_CICHLID cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
+ *   sp3 gfx10.sp3 -hex gfx10.hex
+ *
+ * gfx11:
+ *   cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
+ *   sp3 gfx11.sp3 -hex gfx11.hex
  */
 
-#define NO_SQC_STORE !ASIC_TARGET_NAVI1X
+#define CHIP_NAVI10 26
+#define CHIP_SIENNA_CICHLID 30
+#define CHIP_PLUM_BONITO 36
+
+#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
+#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
+#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
 
 var SINGLE_STEP_MISSED_WORKAROUND		= 1	//workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
 
@@ -41,15 +52,18 @@ var SQ_WAVE_STATUS_ECC_ERR_MASK			= 0x20000
 
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT		= 12
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE		= 9
-var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT		= 8
-var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE		= 6
-var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT		= 24
-var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE		= 4
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE		= 8
 var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT	= 24
 var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE	= 4
 var SQ_WAVE_IB_STS2_WAVE64_SHIFT		= 11
 var SQ_WAVE_IB_STS2_WAVE64_SIZE			= 1
 
+#if ASIC_FAMILY < CHIP_PLUM_BONITO
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT		= 8
+#else
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT		= 12
+#endif
+
 var SQ_WAVE_TRAPSTS_SAVECTX_MASK		= 0x400
 var SQ_WAVE_TRAPSTS_EXCP_MASK			= 0x1FF
 var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT		= 10
@@ -231,15 +245,20 @@ end
 	s_cbranch_scc1	L_SAVE
 
 L_FETCH_2ND_TRAP:
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
 	save_and_clear_ib_sts(ttmp14, ttmp15)
 #endif
 
 	// Read second-level TBA/TMA from first-level TMA and jump if available.
 	// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
 	// ttmp12 holds SQ_WAVE_STATUS
+#if HAVE_SENDMSG_RTN
+	s_sendmsg_rtn_b64       [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
+	s_waitcnt       lgkmcnt(0)
+#else
 	s_getreg_b32	ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
 	s_getreg_b32	ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
+#endif
 	s_lshl_b64	[ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
 
 	s_load_dword    ttmp2, [ttmp14, ttmp15], 0x10 glc:1			// debug trap enabled flag
@@ -282,7 +301,7 @@ L_TRAP_CASE:
 L_EXIT_TRAP:
 	s_and_b32	ttmp1, ttmp1, 0xFFFF
 
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
 	restore_ib_sts(ttmp14, ttmp15)
 #endif
 
@@ -298,7 +317,7 @@ L_SAVE:
 	s_mov_b32	s_save_tmp, 0
 	s_setreg_b32	hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp	//clear saveCtx bit
 
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
 	save_and_clear_ib_sts(s_save_tmp, s_save_trapsts)
 #endif
 
@@ -307,9 +326,13 @@ L_SAVE:
 	s_mov_b32	s_save_exec_hi, exec_hi
 	s_mov_b64	exec, 0x0						//clear EXEC to get ready to receive
 
+#if HAVE_SENDMSG_RTN
+	s_sendmsg_rtn_b64       [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
+#else
 	s_sendmsg	sendmsg(MSG_SAVEWAVE)					//send SPI a message and wait for SPI's write to EXEC
+#endif
 
-#if ASIC_TARGET_NAVI1X
+#if ASIC_FAMILY < CHIP_SIENNA_CICHLID
 L_SLEEP:
 	// sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
 	// SQ hang, since the 7,8th wave could not get arbit to exec inst, while
@@ -389,7 +412,7 @@ L_SLEEP:
 	s_mov_b32	s_save_mem_offset, 0x0
 	get_wave_size(s_wave_size)
 
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
 	// Save and clear vector XNACK state late to free up SGPRs.
 	s_getreg_b32	s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
 	s_setreg_imm32_b32	hwreg(HW_REG_SHADER_XNACK_MASK), 0x0
@@ -777,7 +800,13 @@ L_RESTORE_LDS_NORMAL:
 	s_cbranch_scc1	L_RESTORE_LDS_LOOP_W64
 
 L_RESTORE_LDS_LOOP_W32:
+#if HAVE_BUFFER_LDS_LOAD
 	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1	// first 64DW
+#else
+	buffer_load_dword       v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+	s_waitcnt	vmcnt(0)
+	ds_store_addtid_b32     v0
+#endif
 	s_add_u32	m0, m0, 128						// 128 DW
 	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 128		//mem offset increased by 128DW
 	s_cmp_lt_u32	m0, s_restore_alloc_size				//scc=(m0 < s_restore_alloc_size) ? 1 : 0
@@ -785,7 +814,13 @@ L_RESTORE_LDS_LOOP_W32:
 	s_branch	L_RESTORE_VGPR
 
 L_RESTORE_LDS_LOOP_W64:
+#if HAVE_BUFFER_LDS_LOAD
 	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1	// first 64DW
+#else
+	buffer_load_dword       v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+	s_waitcnt	vmcnt(0)
+	ds_store_addtid_b32     v0
+#endif
 	s_add_u32	m0, m0, 256						// 256 DW
 	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 256		//mem offset increased by 256DW
 	s_cmp_lt_u32	m0, s_restore_alloc_size				//scc=(m0 < s_restore_alloc_size) ? 1 : 0
@@ -996,7 +1031,7 @@ L_RESTORE_HWREG:
 	s_and_b32	s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
 	s_setreg_b32	hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
 
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
 	s_setreg_b32	hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
 #endif
 
@@ -1019,7 +1054,7 @@ L_RESTORE_HWREG:
 	s_load_dword	ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
 	s_waitcnt	lgkmcnt(0)
 
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
 	restore_ib_sts(s_restore_tmp, s_restore_m0)
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index f1a225a20719..8667e3df2d0b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -441,10 +441,14 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
 			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
 			kfd->cwsr_isa = cwsr_trap_nv1x_hex;
 			kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
-		} else {
+		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
 			kfd->cwsr_isa = cwsr_trap_gfx10_hex;
 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
+		} else {
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_gfx11_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
 		}
 
 		kfd->cwsr_enabled = true;
commit 5e613723f804658feb689be1b3cb88ceeed234d3
Author: Eric Huang <JinhuiEric.Huang at amd.com>
Date:   Mon May 16 14:22:38 2022 -0400

    drm/amdkfd: port cwsr trap handler from dkms branch
    
    Most of changes are for debugger feature, and it is
    to simplify trap handler support for new asics in the
    future.
    
    Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 475f89700c74..8cbdc7f519c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -166,7 +166,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
 	0x807c847c, 0x806eff6e,
 	0x00000400, 0xbf0a757c,
 	0xbf85ffef, 0xbf9c0000,
-	0xbf8200cd, 0xbef8007e,
+	0xbf8200ce, 0xbef8007e,
 	0x8679ff7f, 0x0000ffff,
 	0x8779ff79, 0x00040000,
 	0xbefa0080, 0xbefb00ff,
@@ -212,304 +212,310 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
 	0x761e0000, 0xe0524100,
 	0x761e0100, 0xe0524200,
 	0x761e0200, 0xe0524300,
-	0x761e0300, 0xb8f22a05,
-	0x80728172, 0x8e728a72,
-	0xb8f61605, 0x80768176,
-	0x8e768676, 0x80727672,
-	0x80f2c072, 0xb8f31605,
-	0x80738173, 0x8e738473,
-	0x8e7a8273, 0xbefa00ff,
-	0x01000000, 0xbefc0073,
-	0xc031003c, 0x00000072,
-	0x80f2c072, 0xbf8c007f,
-	0x80fc907c, 0xbe802d00,
-	0xbe822d02, 0xbe842d04,
-	0xbe862d06, 0xbe882d08,
-	0xbe8a2d0a, 0xbe8c2d0c,
-	0xbe8e2d0e, 0xbf06807c,
-	0xbf84fff1, 0xb8f22a05,
-	0x80728172, 0x8e728a72,
-	0xb8f61605, 0x80768176,
-	0x8e768676, 0x80727672,
-	0xbefa0084, 0xbefa00ff,
-	0x01000000, 0xc0211cfc,
+	0x761e0300, 0xbf8c0f70,
+	0xb8f22a05, 0x80728172,
+	0x8e728a72, 0xb8f61605,
+	0x80768176, 0x8e768676,
+	0x80727672, 0x80f2c072,
+	0xb8f31605, 0x80738173,
+	0x8e738473, 0x8e7a8273,
+	0xbefa00ff, 0x01000000,
+	0xbefc0073, 0xc031003c,
+	0x00000072, 0x80f2c072,
+	0xbf8c007f, 0x80fc907c,
+	0xbe802d00, 0xbe822d02,
+	0xbe842d04, 0xbe862d06,
+	0xbe882d08, 0xbe8a2d0a,
+	0xbe8c2d0c, 0xbe8e2d0e,
+	0xbf06807c, 0xbf84fff1,
+	0xb8f22a05, 0x80728172,
+	0x8e728a72, 0xb8f61605,
+	0x80768176, 0x8e768676,
+	0x80727672, 0xbefa0084,
+	0xbefa00ff, 0x01000000,
+	0xc0211cfc, 0x00000072,
+	0x80728472, 0xc0211c3c,
 	0x00000072, 0x80728472,
-	0xc0211c3c, 0x00000072,
-	0x80728472, 0xc0211c7c,
+	0xc0211c7c, 0x00000072,
+	0x80728472, 0xc0211bbc,
 	0x00000072, 0x80728472,
-	0xc0211bbc, 0x00000072,
-	0x80728472, 0xc0211bfc,
+	0xc0211bfc, 0x00000072,
+	0x80728472, 0xc0211d3c,
 	0x00000072, 0x80728472,
-	0xc0211d3c, 0x00000072,
-	0x80728472, 0xc0211d7c,
+	0xc0211d7c, 0x00000072,
+	0x80728472, 0xc0211a3c,
 	0x00000072, 0x80728472,
-	0xc0211a3c, 0x00000072,
-	0x80728472, 0xc0211a7c,
+	0xc0211a7c, 0x00000072,
+	0x80728472, 0xc0211dfc,
 	0x00000072, 0x80728472,
-	0xc0211dfc, 0x00000072,
-	0x80728472, 0xc0211b3c,
+	0xc0211b3c, 0x00000072,
+	0x80728472, 0xc0211b7c,
 	0x00000072, 0x80728472,
-	0xc0211b7c, 0x00000072,
-	0x80728472, 0xbf8c007f,
-	0xbefc0073, 0xbefe006e,
-	0xbeff006f, 0x867375ff,
-	0x000003ff, 0xb9734803,
-	0x867375ff, 0xfffff800,
-	0x8f738b73, 0xb973a2c3,
-	0xb977f801, 0x8673ff71,
-	0xf0000000, 0x8f739c73,
-	0x8e739073, 0xbef60080,
-	0x87767376, 0x8673ff71,
-	0x08000000, 0x8f739b73,
-	0x8e738f73, 0x87767376,
-	0x8673ff74, 0x00800000,
-	0x8f739773, 0xb976f807,
-	0x8671ff71, 0x0000ffff,
-	0x86fe7e7e, 0x86ea6a6a,
-	0x8f768374, 0xb976e0c2,
-	0xbf800002, 0xb9740002,
-	0xbf8a0000, 0x95807370,
-	0xbf810000, 0x00000000,
+	0xbf8c007f, 0xbefc0073,
+	0xbefe006e, 0xbeff006f,
+	0x867375ff, 0x000003ff,
+	0xb9734803, 0x867375ff,
+	0xfffff800, 0x8f738b73,
+	0xb973a2c3, 0xb977f801,
+	0x8673ff71, 0xf0000000,
+	0x8f739c73, 0x8e739073,
+	0xbef60080, 0x87767376,
+	0x8673ff71, 0x08000000,
+	0x8f739b73, 0x8e738f73,
+	0x87767376, 0x8673ff74,
+	0x00800000, 0x8f739773,
+	0xb976f807, 0x8671ff71,
+	0x0000ffff, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f768374,
+	0xb976e0c2, 0xbf800002,
+	0xb9740002, 0xbf8a0000,
+	0x95807370, 0xbf810000,
 };
 
 
 static const uint32_t cwsr_trap_gfx9_hex[] = {
-	0xbf820001, 0xbf820248,
-	0xb8f8f802, 0x89788678,
-	0xb8eef801, 0x866eff6e,
-	0x00000800, 0xbf840003,
+	0xbf820001, 0xbf820254,
+	0xb8f8f802, 0x8978ff78,
+	0x00020006, 0xb8fbf803,
 	0x866eff78, 0x00002000,
-	0xbf840016, 0xb8fbf803,
+	0xbf840009, 0x866eff6d,
+	0x00ff0000, 0xbf85001e,
 	0x866eff7b, 0x00000400,
-	0xbf85003b, 0x866eff7b,
-	0x00000800, 0xbf850003,
-	0x866eff7b, 0x00000100,
-	0xbf84000c, 0x866eff78,
-	0x00002000, 0xbf840005,
-	0xbf8e0010, 0xb8eef803,
-	0x866eff6e, 0x00000400,
-	0xbf84fffb, 0x8778ff78,
-	0x00002000, 0x80ec886c,
-	0x82ed806d, 0xb8eef807,
-	0x866fff6e, 0x001f8000,
-	0x8e6f8b6f, 0x8977ff77,
-	0xfc000000, 0x87776f77,
-	0x896eff6e, 0x001f8000,
-	0xb96ef807, 0xb8faf812,
+	0xbf850051, 0xbf8e0010,
+	0xb8fbf803, 0xbf82fffa,
+	0x866eff7b, 0x00000900,
+	0xbf850015, 0x866eff7b,
+	0x000071ff, 0xbf840008,
+	0x866fff7b, 0x00007080,
+	0xbf840001, 0xbeee1a87,
+	0xb8eff801, 0x8e6e8c6e,
+	0x866e6f6e, 0xbf85000a,
+	0x866eff6d, 0x00ff0000,
+	0xbf850007, 0xb8eef801,
+	0x866eff6e, 0x00000800,
+	0xbf850003, 0x866eff7b,
+	0x00000400, 0xbf850036,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xb8faf812,
 	0xb8fbf813, 0x8efa887a,
-	0xc0071bbd, 0x00000000,
-	0xbf8cc07f, 0xc0071ebd,
-	0x00000008, 0xbf8cc07f,
-	0x86ee6e6e, 0xbf840001,
-	0xbe801d6e, 0xb8fbf803,
-	0x867bff7b, 0x000001ff,
+	0xc0031bbd, 0x00000010,
+	0xbf8cc07f, 0x8e6e976e,
+	0x8977ff77, 0x00800000,
+	0x87776e77, 0xc0071bbd,
+	0x00000000, 0xbf8cc07f,
+	0xc0071ebd, 0x00000008,
+	0xbf8cc07f, 0x86ee6e6e,
+	0xbf840001, 0xbe801d6e,
+	0x866eff6d, 0x01ff0000,
+	0xbf850005, 0x8778ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x866eff6d, 0x01000000,
 	0xbf850002, 0x806c846c,
 	0x826d806d, 0x866dff6d,
-	0x0000ffff, 0x8f6e8b77,
-	0x866eff6e, 0x001f8000,
-	0xb96ef807, 0x86fe7e7e,
+	0x0000ffff, 0x8f7a8b77,
+	0x867aff7a, 0x001f8000,
+	0xb97af807, 0x86fe7e7e,
 	0x86ea6a6a, 0x8f6e8378,
 	0xb96ee0c2, 0xbf800002,
 	0xb9780002, 0xbe801f6c,
 	0x866dff6d, 0x0000ffff,
 	0xbefa0080, 0xb97a0283,
-	0xb8fa2407, 0x8e7a9b7a,
-	0x876d7a6d, 0xb8fa03c7,
-	0x8e7a9a7a, 0x876d7a6d,
 	0xb8faf807, 0x867aff7a,
-	0x00007fff, 0xb97af807,
-	0xbeee007e, 0xbeef007f,
-	0xbefe0180, 0xbf900004,
-	0x877a8478, 0xb97af802,
-	0xbf8e0002, 0xbf88fffe,
-	0xb8fa2a05, 0x807a817a,
-	0x8e7a8a7a, 0xb8fb1605,
-	0x807b817b, 0x8e7b867b,
-	0x807a7b7a, 0x807a7e7a,
-	0x827b807f, 0x867bff7b,
-	0x0000ffff, 0xc04b1c3d,
-	0x00000050, 0xbf8cc07f,
-	0xc04b1d3d, 0x00000060,
-	0xbf8cc07f, 0xc0431e7d,
-	0x00000074, 0xbf8cc07f,
-	0xbef4007e, 0x8675ff7f,
-	0x0000ffff, 0x8775ff75,
-	0x00040000, 0xbef60080,
-	0xbef700ff, 0x00807fac,
-	0x867aff7f, 0x08000000,
-	0x8f7a837a, 0x87777a77,
-	0x867aff7f, 0x70000000,
-	0x8f7a817a, 0x87777a77,
-	0xbef1007c, 0xbef00080,
-	0xb8f02a05, 0x80708170,
-	0x8e708a70, 0xb8fa1605,
-	0x807a817a, 0x8e7a867a,
-	0x80707a70, 0xbef60084,
-	0xbef600ff, 0x01000000,
-	0xbefe007c, 0xbefc0070,
-	0xc0611c7a, 0x0000007c,
-	0xbf8cc07f, 0x80708470,
-	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611b3a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xbeee007e,
+	0xbeef007f, 0xbefe0180,
+	0xbf900004, 0x877a8478,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xb8fa2a05,
+	0x807a817a, 0x8e7a8a7a,
+	0xb8fb1605, 0x807b817b,
+	0x8e7b867b, 0x807a7b7a,
+	0x807a7e7a, 0x827b807f,
+	0x867bff7b, 0x0000ffff,
+	0xc04b1c3d, 0x00000050,
+	0xbf8cc07f, 0xc04b1d3d,
+	0x00000060, 0xbf8cc07f,
+	0xc0431e7d, 0x00000074,
+	0xbf8cc07f, 0xbef4007e,
+	0x8675ff7f, 0x0000ffff,
+	0x8775ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x00807fac, 0xbef1007c,
+	0xbef00080, 0xb8f02a05,
+	0x80708170, 0x8e708a70,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x80707a70,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xbefe007c,
+	0xbefc0070, 0xc0611c7a,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
 	0xbefe007c, 0xbefc0070,
-	0xc0611b7a, 0x0000007c,
+	0xc0611b3a, 0x0000007c,
 	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611bba,
+	0xbefc0070, 0xc0611b7a,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
 	0xbefe007c, 0xbefc0070,
-	0xc0611bfa, 0x0000007c,
+	0xc0611bba, 0x0000007c,
 	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611e3a,
-	0x0000007c, 0xbf8cc07f,
-	0x80708470, 0xbefc007e,
-	0xb8fbf803, 0xbefe007c,
-	0xbefc0070, 0xc0611efa,
+	0xbefc0070, 0xc0611bfa,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
 	0xbefe007c, 0xbefc0070,
-	0xc0611a3a, 0x0000007c,
+	0xc0611e3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8fbf803,
+	0xbefe007c, 0xbefc0070,
+	0xc0611efa, 0x0000007c,
 	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611a7a,
-	0x0000007c, 0xbf8cc07f,
-	0x80708470, 0xbefc007e,
-	0xb8f1f801, 0xbefe007c,
-	0xbefc0070, 0xc0611c7a,
+	0xbefc0070, 0xc0611a3a,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
-	0x867aff7f, 0x04000000,
-	0xbeef0080, 0x876f6f7a,
-	0xb8f02a05, 0x80708170,
-	0x8e708a70, 0xb8fb1605,
-	0x807b817b, 0x8e7b847b,
-	0x8e76827b, 0xbef600ff,
-	0x01000000, 0xbef20174,
-	0x80747074, 0x82758075,
-	0xbefc0080, 0xbf800000,
-	0xbe802b00, 0xbe822b02,
-	0xbe842b04, 0xbe862b06,
-	0xbe882b08, 0xbe8a2b0a,
-	0xbe8c2b0c, 0xbe8e2b0e,
-	0xc06b003a, 0x00000000,
-	0xbf8cc07f, 0xc06b013a,
-	0x00000010, 0xbf8cc07f,
-	0xc06b023a, 0x00000020,
-	0xbf8cc07f, 0xc06b033a,
-	0x00000030, 0xbf8cc07f,
-	0x8074c074, 0x82758075,
-	0x807c907c, 0xbf0a7b7c,
-	0xbf85ffe7, 0xbef40172,
-	0xbef00080, 0xbefe00c1,
-	0xbeff00c1, 0xbee80080,
-	0xbee90080, 0xbef600ff,
-	0x01000000, 0x867aff78,
-	0x00400000, 0xbf850003,
-	0xb8faf803, 0x897a7aff,
-	0x10000000, 0xbf85004d,
-	0xbe840080, 0xd2890000,
-	0x00000900, 0x80048104,
-	0xd2890001, 0x00000900,
-	0x80048104, 0xd2890002,
-	0x00000900, 0x80048104,
-	0xd2890003, 0x00000900,
-	0x80048104, 0xc069003a,
-	0x00000070, 0xbf8cc07f,
-	0x80709070, 0xbf06c004,
-	0xbf84ffee, 0xbe840080,
-	0xd2890000, 0x00000901,
+	0xbefe007c, 0xbefc0070,
+	0xc0611a7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8f1f801,
+	0xbefe007c, 0xbefc0070,
+	0xc0611c7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbeef0080,
+	0x876f6f7a, 0xb8f02a05,
+	0x80708170, 0x8e708a70,
+	0xb8fb1605, 0x807b817b,
+	0x8e7b847b, 0x8e76827b,
+	0xbef600ff, 0x01000000,
+	0xbef20174, 0x80747074,
+	0x82758075, 0xbefc0080,
+	0xbf800000, 0xbe802b00,
+	0xbe822b02, 0xbe842b04,
+	0xbe862b06, 0xbe882b08,
+	0xbe8a2b0a, 0xbe8c2b0c,
+	0xbe8e2b0e, 0xc06b003a,
+	0x00000000, 0xbf8cc07f,
+	0xc06b013a, 0x00000010,
+	0xbf8cc07f, 0xc06b023a,
+	0x00000020, 0xbf8cc07f,
+	0xc06b033a, 0x00000030,
+	0xbf8cc07f, 0x8074c074,
+	0x82758075, 0x807c907c,
+	0xbf0a7b7c, 0xbf85ffe7,
+	0xbef40172, 0xbef00080,
+	0xbefe00c1, 0xbeff00c1,
+	0xbee80080, 0xbee90080,
+	0xbef600ff, 0x01000000,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf85004d, 0xbe840080,
+	0xd2890000, 0x00000900,
 	0x80048104, 0xd2890001,
-	0x00000901, 0x80048104,
-	0xd2890002, 0x00000901,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
 	0x80048104, 0xd2890003,
-	0x00000901, 0x80048104,
+	0x00000900, 0x80048104,
 	0xc069003a, 0x00000070,
 	0xbf8cc07f, 0x80709070,
 	0xbf06c004, 0xbf84ffee,
 	0xbe840080, 0xd2890000,
-	0x00000902, 0x80048104,
-	0xd2890001, 0x00000902,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
 	0x80048104, 0xd2890002,
-	0x00000902, 0x80048104,
-	0xd2890003, 0x00000902,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
 	0x80048104, 0xc069003a,
 	0x00000070, 0xbf8cc07f,
 	0x80709070, 0xbf06c004,
 	0xbf84ffee, 0xbe840080,
-	0xd2890000, 0x00000903,
+	0xd2890000, 0x00000902,
 	0x80048104, 0xd2890001,
-	0x00000903, 0x80048104,
-	0xd2890002, 0x00000903,
+	0x00000902, 0x80048104,
+	0xd2890002, 0x00000902,
 	0x80048104, 0xd2890003,
-	0x00000903, 0x80048104,
+	0x00000902, 0x80048104,
 	0xc069003a, 0x00000070,
 	0xbf8cc07f, 0x80709070,
 	0xbf06c004, 0xbf84ffee,
-	0xbf820008, 0xe0724000,
-	0x701d0000, 0xe0724100,
-	0x701d0100, 0xe0724200,
-	0x701d0200, 0xe0724300,
-	0x701d0300, 0xbefe00c1,
-	0xbeff00c1, 0xb8fb4306,
-	0x867bc17b, 0xbf840063,
-	0xbf8a0000, 0x867aff6f,
-	0x04000000, 0xbf84005f,
-	0x8e7b867b, 0x8e7b827b,
-	0xbef6007b, 0xb8f02a05,
-	0x80708170, 0x8e708a70,
-	0xb8fa1605, 0x807a817a,
-	0x8e7a867a, 0x80707a70,
-	0x8070ff70, 0x00000080,
-	0xbef600ff, 0x01000000,
-	0xbefc0080, 0xd28c0002,
-	0x000100c1, 0xd28d0003,
-	0x000204c1, 0x867aff78,
-	0x00400000, 0xbf850003,
-	0xb8faf803, 0x897a7aff,
-	0x10000000, 0xbf850030,
-	0x24040682, 0xd86e4000,
-	0x00000002, 0xbf8cc07f,
 	0xbe840080, 0xd2890000,
-	0x00000900, 0x80048104,
-	0xd2890001, 0x00000900,
+	0x00000903, 0x80048104,
+	0xd2890001, 0x00000903,
 	0x80048104, 0xd2890002,
-	0x00000900, 0x80048104,
-	0xd2890003, 0x00000900,
+	0x00000903, 0x80048104,
+	0xd2890003, 0x00000903,
 	0x80048104, 0xc069003a,
 	0x00000070, 0xbf8cc07f,
 	0x80709070, 0xbf06c004,
-	0xbf84ffee, 0xbe840080,
-	0xd2890000, 0x00000901,
+	0xbf84ffee, 0xbf820008,
+	0xe0724000, 0x701d0000,
+	0xe0724100, 0x701d0100,
+	0xe0724200, 0x701d0200,
+	0xe0724300, 0x701d0300,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8fb4306, 0x867bc17b,
+	0xbf840063, 0xbf8a0000,
+	0x867aff6f, 0x04000000,
+	0xbf84005f, 0x8e7b867b,
+	0x8e7b827b, 0xbef6007b,
+	0xb8f02a05, 0x80708170,
+	0x8e708a70, 0xb8fa1605,
+	0x807a817a, 0x8e7a867a,
+	0x80707a70, 0x8070ff70,
+	0x00000080, 0xbef600ff,
+	0x01000000, 0xbefc0080,
+	0xd28c0002, 0x000100c1,
+	0xd28d0003, 0x000204c1,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf850030, 0x24040682,
+	0xd86e4000, 0x00000002,
+	0xbf8cc07f, 0xbe840080,
+	0xd2890000, 0x00000900,
 	0x80048104, 0xd2890001,
-	0x00000901, 0x80048104,
-	0xd2890002, 0x00000901,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
 	0x80048104, 0xd2890003,
-	0x00000901, 0x80048104,
+	0x00000900, 0x80048104,
 	0xc069003a, 0x00000070,
 	0xbf8cc07f, 0x80709070,
 	0xbf06c004, 0xbf84ffee,
-	0x680404ff, 0x00000200,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0x680404ff,
+	0x00000200, 0xd0c9006a,
+	0x0000f702, 0xbf87ffd2,
+	0xbf820015, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe800077,
+	0x8677ff77, 0xff7fffff,
+	0x8777ff77, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8cc07f, 0xe0765000,
+	0x701d0002, 0x68040702,
 	0xd0c9006a, 0x0000f702,
-	0xbf87ffd2, 0xbf820015,
-	0xd1060002, 0x00011103,
-	0x7e0602ff, 0x00000200,
-	0xbefc00ff, 0x00010000,
-	0xbe800077, 0x8677ff77,
-	0xff7fffff, 0x8777ff77,
-	0x00058000, 0xd8ec0000,
-	0x00000002, 0xbf8cc07f,
-	0xe0765000, 0x701d0002,
-	0x68040702, 0xd0c9006a,
-	0x0000f702, 0xbf87fff7,
-	0xbef70000, 0xbef000ff,
-	0x00000400, 0xbefe00c1,
-	0xbeff00c1, 0xb8fb2a05,
-	0x807b817b, 0x8e7b827b,
-	0x8e76887b, 0xbef600ff,
+	0xbf87fff7, 0xbef70000,
+	0xbef000ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8fb2a05, 0x807b817b,
+	0x8e7b827b, 0xbef600ff,
 	0x01000000, 0xbefc0084,
 	0xbf0a7b7c, 0xbf84006d,
 	0xbf11017c, 0x807bff7b,
@@ -566,15 +572,11 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 	0x701d0300, 0x807c847c,
 	0x8070ff70, 0x00000400,
 	0xbf0a7b7c, 0xbf85ffef,
-	0xbf9c0000, 0xbf8200da,
+	0xbf9c0000, 0xbf8200c7,
 	0xbef4007e, 0x8675ff7f,
 	0x0000ffff, 0x8775ff75,
 	0x00040000, 0xbef60080,
 	0xbef700ff, 0x00807fac,
-	0x866eff7f, 0x08000000,
-	0x8f6e836e, 0x87776e77,
-	0x866eff7f, 0x70000000,
-	0x8f6e816e, 0x87776e77,
 	0x866eff7f, 0x04000000,
 	0xbf84001e, 0xbefe00c1,
 	0xbeff00c1, 0xb8ef4306,
@@ -591,34 +593,34 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 	0x781d0000, 0x807cff7c,
 	0x00000200, 0x8078ff78,
 	0x00000200, 0xbf0a6f7c,
-	0xbf85fff6, 0xbef80080,
-	0xbefe00c1, 0xbeff00c1,
-	0xb8ef2a05, 0x806f816f,
-	0x8e6f826f, 0x8e76886f,
-	0xbef600ff, 0x01000000,
-	0xbeee0078, 0x8078ff78,
-	0x00000400, 0xbefc0084,
-	0xbf11087c, 0x806fff6f,
-	0x00008000, 0xe0524000,
-	0x781d0000, 0xe0524100,
-	0x781d0100, 0xe0524200,
-	0x781d0200, 0xe0524300,
-	0x781d0300, 0xbf8c0f70,
-	0x7e000300, 0x7e020301,
-	0x7e040302, 0x7e060303,
-	0x807c847c, 0x8078ff78,
-	0x00000400, 0xbf0a6f7c,
-	0xbf85ffee, 0xbf9c0000,
-	0xe0524000, 0x6e1d0000,
-	0xe0524100, 0x6e1d0100,
-	0xe0524200, 0x6e1d0200,
-	0xe0524300, 0x6e1d0300,
-	0xb8f82a05, 0x80788178,
-	0x8e788a78, 0xb8ee1605,
-	0x806e816e, 0x8e6e866e,
-	0x80786e78, 0x80f8c078,
-	0xb8ef1605, 0x806f816f,
-	0x8e6f846f, 0x8e76826f,
+	0xbf85fff6, 0xbefe00c1,
+	0xbeff00c1, 0xbef600ff,
+	0x01000000, 0xb8ef2a05,
+	0x806f816f, 0x8e6f826f,
+	0x806fff6f, 0x00008000,
+	0xbef80080, 0xbeee0078,
+	0x8078ff78, 0x00000400,
+	0xbefc0084, 0xbf11087c,
+	0xe0524000, 0x781d0000,
+	0xe0524100, 0x781d0100,
+	0xe0524200, 0x781d0200,
+	0xe0524300, 0x781d0300,
+	0xbf8c0f70, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0x807c847c,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xbf9c0000, 0xe0524000,
+	0x6e1d0000, 0xe0524100,
+	0x6e1d0100, 0xe0524200,
+	0x6e1d0200, 0xe0524300,
+	0x6e1d0300, 0xbf8c0f70,
+	0xb8f82a05, 0x80788178,
+	0x8e788a78, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0x80f8c078,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f846f, 0x8e76826f,
 	0xbef600ff, 0x01000000,
 	0xbefc006f, 0xc031003a,
 	0x00000078, 0x80f8c078,
@@ -663,90 +665,101 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 	0xc00b1c37, 0x00000050,
 	0xc00b1d37, 0x00000060,
 	0xc0031e77, 0x00000074,
-	0xbf8cc07f, 0x866fff6d,
-	0xf8000000, 0x8f6f9b6f,
-	0x8e6f906f, 0xbeee0080,
-	0x876e6f6e, 0x866fff6d,
-	0x04000000, 0x8f6f9a6f,
-	0x8e6f8f6f, 0x876e6f6e,
-	0x866fff7a, 0x00800000,
-	0x8f6f976f, 0xb96ef807,
-	0x866dff6d, 0x0000ffff,
-	0x86fe7e7e, 0x86ea6a6a,
-	0x8f6e837a, 0xb96ee0c2,
-	0xbf800002, 0xb97a0002,
-	0xbf8a0000, 0x95806f6c,
-	0xbf810000, 0x00000000,
+	0xbf8cc07f, 0x8f6e8b77,
+	0x866eff6e, 0x001f8000,
+	0xb96ef807, 0x866dff6d,
+	0x0000ffff, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e837a,
+	0xb96ee0c2, 0xbf800002,
+	0xb97a0002, 0xbf8a0000,
+	0xbe801f6c, 0xbf810000,
 };
 
 static const uint32_t cwsr_trap_nv1x_hex[] = {
-	0xbf820001, 0xbf8201cd,
+	0xbf820001, 0xbf8201f1,
 	0xb0804004, 0xb978f802,
-	0x8a788678, 0xb96ef801,
-	0x876eff6e, 0x00000800,
-	0xbf840003, 0x876eff78,
+	0x8a78ff78, 0x00020006,
+	0xb97bf803, 0x876eff78,
 	0x00002000, 0xbf840009,
-	0xb97bf803, 0x876eff7b,
-	0x00000400, 0xbf850033,
-	0x876eff7b, 0x00000100,
-	0xbf840002, 0x8878ff78,
-	0x00002000, 0x8a77ff77,
-	0xff000000, 0xb96ef807,
-	0x876fff6e, 0x02000000,
-	0x8f6f866f, 0x88776f77,
-	0x876fff6e, 0x003f8000,
-	0x8f6f896f, 0x88776f77,
-	0x8a6eff6e, 0x023f8000,
-	0xb9eef807, 0xb97af812,
+	0x876eff6d, 0x00ff0000,
+	0xbf85001e, 0x876eff7b,
+	0x00000400, 0xbf850057,
+	0xbf8e0010, 0xb97bf803,
+	0xbf82fffa, 0x876eff7b,
+	0x00000900, 0xbf850015,
+	0x876eff7b, 0x000071ff,
+	0xbf840008, 0x876fff7b,
+	0x00007080, 0xbf840001,
+	0xbeee1d87, 0xb96ff801,
+	0x8f6e8c6e, 0x876e6f6e,
+	0xbf85000a, 0x876eff6d,
+	0x00ff0000, 0xbf850007,
+	0xb96ef801, 0x876eff6e,
+	0x00000800, 0xbf850003,
+	0x876eff7b, 0x00000400,
+	0xbf85003c, 0x8a77ff77,
+	0xff000000, 0xb97af807,
+	0x877bff7a, 0x02000000,
+	0x8f7b867b, 0x88777b77,
+	0x877bff7a, 0x003f8000,
+	0x8f7b897b, 0x88777b77,
+	0x8a7aff7a, 0x023f8000,
+	0xb9faf807, 0xb97af812,
 	0xb97bf813, 0x8ffa887a,
-	0xf4051bbd, 0xfa000000,
-	0xbf8cc07f, 0xf4051ebd,
-	0xfa000008, 0xbf8cc07f,
-	0x87ee6e6e, 0xbf840001,
-	0xbe80206e, 0xb97bf803,
-	0x877bff7b, 0x000001ff,
+	0xf4011bbd, 0xfa000010,
+	0xbf8cc07f, 0x8f6e976e,
+	0x8a77ff77, 0x00800000,
+	0x88776e77, 0xf4051bbd,
+	0xfa000000, 0xbf8cc07f,
+	0xf4051ebd, 0xfa000008,
+	0xbf8cc07f, 0x87ee6e6e,
+	0xbf840001, 0xbe80206e,
+	0x876eff6d, 0x01ff0000,
+	0xbf850005, 0x8878ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x876eff6d, 0x01000000,
 	0xbf850002, 0x806c846c,
 	0x826d806d, 0x876dff6d,
-	0x0000ffff, 0x906e8977,
-	0x876fff6e, 0x003f8000,
-	0x906e8677, 0x876eff6e,
-	0x02000000, 0x886e6f6e,
-	0xb9eef807, 0x87fe7e7e,
+	0x0000ffff, 0x907a8977,
+	0x877bff7a, 0x003f8000,
+	0x907a8677, 0x877aff7a,
+	0x02000000, 0x887a7b7a,
+	0xb9faf807, 0x87fe7e7e,
 	0x87ea6a6a, 0xb9f8f802,
 	0xbe80226c, 0x876dff6d,
 	0x0000ffff, 0xbefa0380,
-	0xb9fa0283, 0xb97a2c07,
-	0x8f7a9a7a, 0x886d7a6d,
-	0xb97a03c7, 0x8f7a997a,
-	0x886d7a6d, 0xb97a0647,
-	0x8f7a987a, 0x886d7a6d,
-	0xb97af807, 0x877aff7a,
-	0x00007fff, 0xb9faf807,
-	0xbeee037e, 0xbeef037f,
-	0xbefe0480, 0xbf900004,
-	0xbf8e0002, 0xbf88fffe,
-	0xb97b02dc, 0x8f7b997b,
-	0x887b7b7f, 0xb97a2a05,
+	0xb9fa0283, 0x8a77ff77,
+	0xff000000, 0xb97af807,
+	0x877bff7a, 0x02000000,
+	0x8f7b867b, 0x88777b77,
+	0x877bff7a, 0x003f8000,
+	0x8f7b897b, 0x88777b77,
+	0x8a7aff7a, 0x023f8000,
+	0xb9faf807, 0xbeee037e,
+	0xbeef037f, 0xbefe0480,
+	0xbf900004, 0xbf8e0002,
+	0xbf88fffe, 0x877aff7f,
+	0x04000000, 0x8f7a857a,
+	0x886d7a6d, 0xb97b02dc,
+	0x8f7b997b, 0xb97a2a05,
 	0x807a817a, 0xbf0d997b,
 	0xbf850002, 0x8f7a897a,
 	0xbf820001, 0x8f7a8a7a,
-	0x877bff7f, 0x0000ffff,
-	0x807aff7a, 0x00000200,
-	0x807a7e7a, 0x827b807b,
-	0xf4491c3d, 0xfa000050,
-	0xf4491d3d, 0xfa000060,
-	0xf4411e7d, 0xfa000074,
-	0xbef4037e, 0x8775ff7f,
-	0x0000ffff, 0x8875ff75,
-	0x00040000, 0xbef60380,
-	0xbef703ff, 0x10807fac,
-	0x877aff7f, 0x08000000,
-	0x907a837a, 0x88777a77,
-	0x877aff7f, 0x70000000,
-	0x907a817a, 0x88777a77,
-	0xbef1037c, 0xbef00380,
-	0xb97302dc, 0x8f739973,
-	0x8873737f, 0xb97bf816,
+	0xb97b1e06, 0x8f7b8a7b,
+	0x807a7b7a, 0x877bff7f,
+	0x0000ffff, 0x807aff7a,
+	0x00000200, 0x807a7e7a,
+	0x827b807b, 0xf4491c3d,
+	0xfa000050, 0xf4491d3d,
+	0xfa000060, 0xf4411e7d,
+	0xfa000074, 0xbef4037e,
+	0x8775ff7f, 0x0000ffff,
+	0x8875ff75, 0x00040000,
+	0xbef60380, 0xbef703ff,
+	0x10807fac, 0xbef1037c,
+	0xbef00380, 0xb97302dc,
+	0x8f739973, 0xb97bf816,
 	0xba80f816, 0x00000000,
 	0xbefe03c1, 0x907c9973,
 	0x877c817c, 0xbf06817c,
@@ -776,8 +789,9 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xbefe037c, 0xbefc0370,
 	0xf4611b3a, 0xf8000000,
 	0x80708470, 0xbefc037e,
+	0x8a7aff6d, 0x80000000,
 	0xbefe037c, 0xbefc0370,
-	0xf4611b7a, 0xf8000000,
+	0xf4611eba, 0xf8000000,
 	0x80708470, 0xbefc037e,
 	0xbefe037c, 0xbefc0370,
 	0xf4611bba, 0xf8000000,
@@ -838,7 +852,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xbf820001, 0xbeff03c1,
 	0xb97b4306, 0x877bc17b,
 	0xbf840044, 0xbf8a0000,
-	0x877aff73, 0x04000000,
+	0x877aff6d, 0x80000000,
 	0xbf840040, 0x8f7b867b,
 	0x8f7b827b, 0xbef6037b,
 	0xb9702a05, 0x80708170,
@@ -894,7 +908,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xbf0a7b7c, 0xbf85ffef,
 	0xbf820025, 0xbef603ff,
 	0x01000000, 0xbefc0384,
-	0xbf0a7b7c, 0xbf840020,
+	0xbf0a7b7c, 0xbf840011,
 	0x7e008700, 0x7e028701,
 	0x7e048702, 0x7e068703,
 	0xe0704000, 0x705d0000,
@@ -911,71 +925,69 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0x705d0000, 0x807c817c,
 	0x8070ff70, 0x00000080,
 	0xbf0a7b7c, 0xbf85fff8,
-	0xbf820151, 0xbef4037e,
+	0xbf820144, 0xbef4037e,
 	0x8775ff7f, 0x0000ffff,
 	0x8875ff75, 0x00040000,
 	0xbef60380, 0xbef703ff,
-	0x10807fac, 0x876eff7f,
-	0x08000000, 0x906e836e,
-	0x88776e77, 0x876eff7f,
-	0x70000000, 0x906e816e,
-	0x88776e77, 0xb97202dc,
-	0x8f729972, 0x8872727f,
-	0x876eff7f, 0x04000000,
-	0xbf840034, 0xbefe03c1,
-	0x907c9972, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820001,
-	0xbeff03c1, 0xb96f4306,
-	0x876fc16f, 0xbf840029,
-	0x8f6f866f, 0x8f6f826f,
-	0xbef6036f, 0xb9782a05,
-	0x80788178, 0xbf0d9972,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb96e1e06, 0x8f6e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0x8078ff78,
-	0x00000080, 0xbef603ff,
-	0x01000000, 0x907c9972,
-	0x877c817c, 0xbf06817c,
-	0xbefc0380, 0xbf850009,
-	0xe0310000, 0x781d0000,
-	0x807cff7c, 0x00000080,
-	0x8078ff78, 0x00000080,
-	0xbf0a6f7c, 0xbf85fff8,
-	0xbf820008, 0xe0310000,
-	0x781d0000, 0x807cff7c,
-	0x00000100, 0x8078ff78,
-	0x00000100, 0xbf0a6f7c,
-	0xbf85fff8, 0xbef80380,
+	0x10807fac, 0xb97202dc,
+	0x8f729972, 0x876eff7f,
+	0x04000000, 0xbf840034,
 	0xbefe03c1, 0x907c9972,
 	0x877c817c, 0xbf06817c,
 	0xbf850002, 0xbeff0380,
 	0xbf820001, 0xbeff03c1,
-	0xb96f2a05, 0x806f816f,
-	0x8f6f826f, 0x907c9972,
-	0x877c817c, 0xbf06817c,
-	0xbf850021, 0xbef603ff,
-	0x01000000, 0xbeee0378,
+	0xb96f4306, 0x876fc16f,
+	0xbf840029, 0x8f6f866f,
+	0x8f6f826f, 0xbef6036f,
+	0xb9782a05, 0x80788178,
+	0xbf0d9972, 0xbf850002,
+	0x8f788978, 0xbf820001,
+	0x8f788a78, 0xb96e1e06,
+	0x8f6e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
-	0xbefc0384, 0xe0304000,
-	0x785d0000, 0xe0304080,
-	0x785d0100, 0xe0304100,
-	0x785d0200, 0xe0304180,
-	0x785d0300, 0xbf8c3f70,
-	0x7e008500, 0x7e028501,
-	0x7e048502, 0x7e068503,
-	0x807c847c, 0x8078ff78,
-	0x00000200, 0xbf0a6f7c,
-	0xbf85ffee, 0xe0304000,
-	0x6e5d0000, 0xe0304080,
-	0x6e5d0100, 0xe0304100,
-	0x6e5d0200, 0xe0304180,
-	0x6e5d0300, 0xbf820032,
+	0x8078ff78, 0x00000080,
+	0xbef603ff, 0x01000000,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbefc0380,
+	0xbf850009, 0xe0310000,
+	0x781d0000, 0x807cff7c,
+	0x00000080, 0x8078ff78,
+	0x00000080, 0xbf0a6f7c,
+	0xbf85fff8, 0xbf820008,
+	0xe0310000, 0x781d0000,
+	0x807cff7c, 0x00000100,
+	0x8078ff78, 0x00000100,
+	0xbf0a6f7c, 0xbf85fff8,
+	0xbef80380, 0xbefe03c1,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0xbeff0380, 0xbf820001,
+	0xbeff03c1, 0xb96f2a05,
+	0x806f816f, 0x8f6f826f,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850024,
+	0xbef603ff, 0x01000000,
+	0xbeee0378, 0x8078ff78,
+	0x00000200, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840050,
+	0xe0304000, 0x785d0000,
+	0xe0304080, 0x785d0100,
+	0xe0304100, 0x785d0200,
+	0xe0304180, 0x785d0300,
+	0xbf8c3f70, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807c847c,
+	0x8078ff78, 0x00000200,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xe0304000, 0x6e5d0000,
+	0xe0304080, 0x6e5d0100,
+	0xe0304100, 0x6e5d0200,
+	0xe0304180, 0x6e5d0300,
+	0xbf8c3f70, 0xbf820034,
 	0xbef603ff, 0x01000000,
 	0xbeee0378, 0x8078ff78,
 	0x00000400, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840012,
 	0xe0304000, 0x785d0000,
 	0xe0304100, 0x785d0100,
 	0xe0304200, 0x785d0200,
@@ -1060,270 +1072,272 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xb96e2a05, 0x806e816e,
 	0xbf0d9972, 0xbf850002,
 	0x8f6e896e, 0xbf820001,
-	0x8f6e8a6e, 0x806eff6e,
-	0x00000200, 0x806e746e,
-	0x826f8075, 0x876fff6f,
-	0x0000ffff, 0xf4091c37,
-	0xfa000050, 0xf4091d37,
-	0xfa000060, 0xf4011e77,
-	0xfa000074, 0xbf8cc07f,
-	0x876fff6d, 0xfc000000,
-	0x906f9a6f, 0x8f6f906f,
-	0xbeee0380, 0x886e6f6e,
-	0x876fff6d, 0x02000000,
-	0x906f996f, 0x8f6f8f6f,
-	0x886e6f6e, 0x876fff6d,
-	0x01000000, 0x906f986f,
-	0x8f6f996f, 0x886e6f6e,
-	0x876fff7a, 0x00800000,
-	0x906f976f, 0xb9eef807,
-	0x876dff6d, 0x0000ffff,
-	0x87fe7e7e, 0x87ea6a6a,
-	0xb9faf802, 0xbe80226c,
-	0xbf810000, 0xbf9f0000,
+	0x8f6e8a6e, 0xb96f1e06,
+	0x8f6f8a6f, 0x806e6f6e,
+	0x806eff6e, 0x00000200,
+	0x806e746e, 0x826f8075,
+	0x876fff6f, 0x0000ffff,
+	0xf4091c37, 0xfa000050,
+	0xf4091d37, 0xfa000060,
+	0xf4011e77, 0xfa000074,
+	0xbf8cc07f, 0x906e8977,
+	0x876fff6e, 0x003f8000,
+	0x906e8677, 0x876eff6e,
+	0x02000000, 0x886e6f6e,
+	0xb9eef807, 0x876dff6d,
+	0x0000ffff, 0x87fe7e7e,
+	0x87ea6a6a, 0xb9faf802,
+	0xbe80226c, 0xbf810000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0x00000000,
 };
 
 static const uint32_t cwsr_trap_arcturus_hex[] = {
-	0xbf820001, 0xbf8202c4,
-	0xb8f8f802, 0x89788678,
-	0xb8eef801, 0x866eff6e,
-	0x00000800, 0xbf840003,
+	0xbf820001, 0xbf8202d0,
+	0xb8f8f802, 0x8978ff78,
+	0x00020006, 0xb8fbf803,
 	0x866eff78, 0x00002000,
-	0xbf840016, 0xb8fbf803,
+	0xbf840009, 0x866eff6d,
+	0x00ff0000, 0xbf85001e,
 	0x866eff7b, 0x00000400,
-	0xbf85003b, 0x866eff7b,
-	0x00000800, 0xbf850003,
-	0x866eff7b, 0x00000100,
-	0xbf84000c, 0x866eff78,
-	0x00002000, 0xbf840005,
-	0xbf8e0010, 0xb8eef803,
-	0x866eff6e, 0x00000400,
-	0xbf84fffb, 0x8778ff78,
-	0x00002000, 0x80ec886c,
-	0x82ed806d, 0xb8eef807,
-	0x866fff6e, 0x001f8000,
-	0x8e6f8b6f, 0x8977ff77,
-	0xfc000000, 0x87776f77,
-	0x896eff6e, 0x001f8000,
-	0xb96ef807, 0xb8faf812,
+	0xbf850051, 0xbf8e0010,
+	0xb8fbf803, 0xbf82fffa,
+	0x866eff7b, 0x00000900,
+	0xbf850015, 0x866eff7b,
+	0x000071ff, 0xbf840008,
+	0x866fff7b, 0x00007080,
+	0xbf840001, 0xbeee1a87,
+	0xb8eff801, 0x8e6e8c6e,
+	0x866e6f6e, 0xbf85000a,
+	0x866eff6d, 0x00ff0000,
+	0xbf850007, 0xb8eef801,
+	0x866eff6e, 0x00000800,
+	0xbf850003, 0x866eff7b,
+	0x00000400, 0xbf850036,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xb8faf812,
 	0xb8fbf813, 0x8efa887a,
-	0xc0071bbd, 0x00000000,
-	0xbf8cc07f, 0xc0071ebd,
-	0x00000008, 0xbf8cc07f,
-	0x86ee6e6e, 0xbf840001,
-	0xbe801d6e, 0xb8fbf803,
-	0x867bff7b, 0x000001ff,
+	0xc0031bbd, 0x00000010,
+	0xbf8cc07f, 0x8e6e976e,
+	0x8977ff77, 0x00800000,
+	0x87776e77, 0xc0071bbd,
+	0x00000000, 0xbf8cc07f,
+	0xc0071ebd, 0x00000008,
+	0xbf8cc07f, 0x86ee6e6e,
+	0xbf840001, 0xbe801d6e,
+	0x866eff6d, 0x01ff0000,
+	0xbf850005, 0x8778ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x866eff6d, 0x01000000,
 	0xbf850002, 0x806c846c,
 	0x826d806d, 0x866dff6d,
-	0x0000ffff, 0x8f6e8b77,
-	0x866eff6e, 0x001f8000,
-	0xb96ef807, 0x86fe7e7e,
+	0x0000ffff, 0x8f7a8b77,
+	0x867aff7a, 0x001f8000,
+	0xb97af807, 0x86fe7e7e,
 	0x86ea6a6a, 0x8f6e8378,
 	0xb96ee0c2, 0xbf800002,
 	0xb9780002, 0xbe801f6c,
 	0x866dff6d, 0x0000ffff,
 	0xbefa0080, 0xb97a0283,
-	0xb8fa2407, 0x8e7a9b7a,
-	0x876d7a6d, 0xb8fa03c7,
-	0x8e7a9a7a, 0x876d7a6d,
 	0xb8faf807, 0x867aff7a,
-	0x00007fff, 0xb97af807,
-	0xbeee007e, 0xbeef007f,
-	0xbefe0180, 0xbf900004,
-	0x877a8478, 0xb97af802,
-	0xbf8e0002, 0xbf88fffe,
-	0xb8fa2a05, 0x807a817a,
-	0x8e7a8a7a, 0x8e7a817a,
-	0xb8fb1605, 0x807b817b,
-	0x8e7b867b, 0x807a7b7a,
-	0x807a7e7a, 0x827b807f,
-	0x867bff7b, 0x0000ffff,
-	0xc04b1c3d, 0x00000050,
-	0xbf8cc07f, 0xc04b1d3d,
-	0x00000060, 0xbf8cc07f,
-	0xc0431e7d, 0x00000074,
-	0xbf8cc07f, 0xbef4007e,
-	0x8675ff7f, 0x0000ffff,
-	0x8775ff75, 0x00040000,
-	0xbef60080, 0xbef700ff,
-	0x00807fac, 0x867aff7f,
-	0x08000000, 0x8f7a837a,
-	0x87777a77, 0x867aff7f,
-	0x70000000, 0x8f7a817a,
-	0x87777a77, 0xbef1007c,
-	0xbef00080, 0xb8f02a05,
-	0x80708170, 0x8e708a70,
-	0x8e708170, 0xb8fa1605,
-	0x807a817a, 0x8e7a867a,
-	0x80707a70, 0xbef60084,
-	0xbef600ff, 0x01000000,
-	0xbefe007c, 0xbefc0070,
-	0xc0611c7a, 0x0000007c,
-	0xbf8cc07f, 0x80708470,
-	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611b3a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xbeee007e,
+	0xbeef007f, 0xbefe0180,
+	0xbf900004, 0x877a8478,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xb8fa2a05,
+	0x807a817a, 0x8e7a8a7a,
+	0x8e7a817a, 0xb8fb1605,
+	0x807b817b, 0x8e7b867b,
+	0x807a7b7a, 0x807a7e7a,
+	0x827b807f, 0x867bff7b,
+	0x0000ffff, 0xc04b1c3d,
+	0x00000050, 0xbf8cc07f,
+	0xc04b1d3d, 0x00000060,
+	0xbf8cc07f, 0xc0431e7d,
+	0x00000074, 0xbf8cc07f,
+	0xbef4007e, 0x8675ff7f,
+	0x0000ffff, 0x8775ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x00807fac,
+	0xbef1007c, 0xbef00080,
+	0xb8f02a05, 0x80708170,
+	0x8e708a70, 0x8e708170,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x80707a70,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xbefe007c,
+	0xbefc0070, 0xc0611c7a,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
 	0xbefe007c, 0xbefc0070,
-	0xc0611b7a, 0x0000007c,
+	0xc0611b3a, 0x0000007c,
 	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611bba,
+	0xbefc0070, 0xc0611b7a,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
 	0xbefe007c, 0xbefc0070,
-	0xc0611bfa, 0x0000007c,
+	0xc0611bba, 0x0000007c,
 	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611e3a,
-	0x0000007c, 0xbf8cc07f,
-	0x80708470, 0xbefc007e,
-	0xb8fbf803, 0xbefe007c,
-	0xbefc0070, 0xc0611efa,
+	0xbefc0070, 0xc0611bfa,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
 	0xbefe007c, 0xbefc0070,
-	0xc0611a3a, 0x0000007c,
+	0xc0611e3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8fbf803,
+	0xbefe007c, 0xbefc0070,
+	0xc0611efa, 0x0000007c,
 	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611a7a,
+	0xbefc0070, 0xc0611a3a,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
-	0xb8f1f801, 0xbefe007c,
-	0xbefc0070, 0xc0611c7a,
-	0x0000007c, 0xbf8cc07f,
-	0x80708470, 0xbefc007e,
-	0x867aff7f, 0x04000000,
-	0xbeef0080, 0x876f6f7a,
-	0xb8f02a05, 0x80708170,
-	0x8e708a70, 0x8e708170,
-	0xb8fb1605, 0x807b817b,
-	0x8e7b847b, 0x8e76827b,
-	0xbef600ff, 0x01000000,
-	0xbef20174, 0x80747074,
-	0x82758075, 0xbefc0080,
-	0xbf800000, 0xbe802b00,
-	0xbe822b02, 0xbe842b04,
-	0xbe862b06, 0xbe882b08,
-	0xbe8a2b0a, 0xbe8c2b0c,
-	0xbe8e2b0e, 0xc06b003a,
-	0x00000000, 0xbf8cc07f,
-	0xc06b013a, 0x00000010,
-	0xbf8cc07f, 0xc06b023a,
-	0x00000020, 0xbf8cc07f,
-	0xc06b033a, 0x00000030,
-	0xbf8cc07f, 0x8074c074,
-	0x82758075, 0x807c907c,
-	0xbf0a7b7c, 0xbf85ffe7,
-	0xbef40172, 0xbef00080,
-	0xbefe00c1, 0xbeff00c1,
-	0xbee80080, 0xbee90080,
-	0xbef600ff, 0x01000000,
-	0x867aff78, 0x00400000,
-	0xbf850003, 0xb8faf803,
-	0x897a7aff, 0x10000000,
-	0xbf85004d, 0xbe840080,
-	0xd2890000, 0x00000900,
-	0x80048104, 0xd2890001,
+	0xbefe007c, 0xbefc0070,
+	0xc0611a7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8f1f801,
+	0xbefe007c, 0xbefc0070,
+	0xc0611c7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbeef0080,
+	0x876f6f7a, 0xb8f02a05,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fb1605,
+	0x807b817b, 0x8e7b847b,
+	0x8e76827b, 0xbef600ff,
+	0x01000000, 0xbef20174,
+	0x80747074, 0x82758075,
+	0xbefc0080, 0xbf800000,
+	0xbe802b00, 0xbe822b02,
+	0xbe842b04, 0xbe862b06,
+	0xbe882b08, 0xbe8a2b0a,
+	0xbe8c2b0c, 0xbe8e2b0e,
+	0xc06b003a, 0x00000000,
+	0xbf8cc07f, 0xc06b013a,
+	0x00000010, 0xbf8cc07f,
+	0xc06b023a, 0x00000020,
+	0xbf8cc07f, 0xc06b033a,
+	0x00000030, 0xbf8cc07f,
+	0x8074c074, 0x82758075,
+	0x807c907c, 0xbf0a7b7c,
+	0xbf85ffe7, 0xbef40172,
+	0xbef00080, 0xbefe00c1,
+	0xbeff00c1, 0xbee80080,
+	0xbee90080, 0xbef600ff,
+	0x01000000, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf85004d,
+	0xbe840080, 0xd2890000,
 	0x00000900, 0x80048104,
-	0xd2890002, 0x00000900,
-	0x80048104, 0xd2890003,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
 	0x00000900, 0x80048104,
-	0xc069003a, 0x00000070,
-	0xbf8cc07f, 0x80709070,
-	0xbf06c004, 0xbf84ffee,
-	0xbe840080, 0xd2890000,
-	0x00000901, 0x80048104,
-	0xd2890001, 0x00000901,
-	0x80048104, 0xd2890002,
-	0x00000901, 0x80048104,
-	0xd2890003, 0x00000901,
+	0xd2890003, 0x00000900,
 	0x80048104, 0xc069003a,
 	0x00000070, 0xbf8cc07f,
 	0x80709070, 0xbf06c004,
 	0xbf84ffee, 0xbe840080,
-	0xd2890000, 0x00000902,
+	0xd2890000, 0x00000901,
 	0x80048104, 0xd2890001,
-	0x00000902, 0x80048104,
-	0xd2890002, 0x00000902,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
 	0x80048104, 0xd2890003,
-	0x00000902, 0x80048104,
+	0x00000901, 0x80048104,
 	0xc069003a, 0x00000070,
 	0xbf8cc07f, 0x80709070,
 	0xbf06c004, 0xbf84ffee,
 	0xbe840080, 0xd2890000,
-	0x00000903, 0x80048104,
-	0xd2890001, 0x00000903,
-	0x80048104, 0xd2890002,
-	0x00000903, 0x80048104,
-	0xd2890003, 0x00000903,
-	0x80048104, 0xc069003a,
-	0x00000070, 0xbf8cc07f,
-	0x80709070, 0xbf06c004,
-	0xbf84ffee, 0xbf820008,
-	0xe0724000, 0x701d0000,
-	0xe0724100, 0x701d0100,
-	0xe0724200, 0x701d0200,
-	0xe0724300, 0x701d0300,
-	0xbefe00c1, 0xbeff00c1,
-	0xb8fb4306, 0x867bc17b,
-	0xbf840064, 0xbf8a0000,
-	0x867aff6f, 0x04000000,
-	0xbf840060, 0x8e7b867b,
-	0x8e7b827b, 0xbef6007b,
-	0xb8f02a05, 0x80708170,
-	0x8e708a70, 0x8e708170,
-	0xb8fa1605, 0x807a817a,
-	0x8e7a867a, 0x80707a70,
-	0x8070ff70, 0x00000080,
-	0xbef600ff, 0x01000000,
-	0xbefc0080, 0xd28c0002,
-	0x000100c1, 0xd28d0003,
-	0x000204c1, 0x867aff78,
-	0x00400000, 0xbf850003,
-	0xb8faf803, 0x897a7aff,
-	0x10000000, 0xbf850030,
-	0x24040682, 0xd86e4000,
-	0x00000002, 0xbf8cc07f,
-	0xbe840080, 0xd2890000,
-	0x00000900, 0x80048104,
-	0xd2890001, 0x00000900,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
 	0x80048104, 0xd2890002,
-	0x00000900, 0x80048104,
-	0xd2890003, 0x00000900,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
 	0x80048104, 0xc069003a,
 	0x00000070, 0xbf8cc07f,
 	0x80709070, 0xbf06c004,
 	0xbf84ffee, 0xbe840080,
-	0xd2890000, 0x00000901,
+	0xd2890000, 0x00000903,
 	0x80048104, 0xd2890001,
-	0x00000901, 0x80048104,
-	0xd2890002, 0x00000901,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
 	0x80048104, 0xd2890003,
-	0x00000901, 0x80048104,
+	0x00000903, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbf820008, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0xbefe00c1,
+	0xbeff00c1, 0xb8fb4306,
+	0x867bc17b, 0xbf840064,
+	0xbf8a0000, 0x867aff6f,
+	0x04000000, 0xbf840060,
+	0x8e7b867b, 0x8e7b827b,
+	0xbef6007b, 0xb8f02a05,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fa1605,
+	0x807a817a, 0x8e7a867a,
+	0x80707a70, 0x8070ff70,
+	0x00000080, 0xbef600ff,
+	0x01000000, 0xbefc0080,
+	0xd28c0002, 0x000100c1,
+	0xd28d0003, 0x000204c1,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf850030, 0x24040682,
+	0xd86e4000, 0x00000002,
+	0xbf8cc07f, 0xbe840080,
+	0xd2890000, 0x00000900,
+	0x80048104, 0xd2890001,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
+	0x80048104, 0xd2890003,
+	0x00000900, 0x80048104,
 	0xc069003a, 0x00000070,
 	0xbf8cc07f, 0x80709070,
 	0xbf06c004, 0xbf84ffee,
-	0x680404ff, 0x00000200,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0x680404ff,
+	0x00000200, 0xd0c9006a,
+	0x0000f702, 0xbf87ffd2,
+	0xbf820015, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe800077,
+	0x8677ff77, 0xff7fffff,
+	0x8777ff77, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8cc07f, 0xe0765000,
+	0x701d0002, 0x68040702,
 	0xd0c9006a, 0x0000f702,
-	0xbf87ffd2, 0xbf820015,
-	0xd1060002, 0x00011103,
-	0x7e0602ff, 0x00000200,
-	0xbefc00ff, 0x00010000,
-	0xbe800077, 0x8677ff77,
-	0xff7fffff, 0x8777ff77,
-	0x00058000, 0xd8ec0000,
-	0x00000002, 0xbf8cc07f,
-	0xe0765000, 0x701d0002,
-	0x68040702, 0xd0c9006a,
-	0x0000f702, 0xbf87fff7,
-	0xbef70000, 0xbef000ff,
-	0x00000400, 0xbefe00c1,
-	0xbeff00c1, 0xb8fb2a05,
-	0x807b817b, 0x8e7b827b,
-	0x8e76887b, 0xbef600ff,
+	0xbf87fff7, 0xbef70000,
+	0xbef000ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8fb2a05, 0x807b817b,
+	0x8e7b827b, 0xbef600ff,
 	0x01000000, 0xbefc0084,
 	0xbf0a7b7c, 0xbf84006d,
 	0xbf11017c, 0x807bff7b,
@@ -1440,15 +1454,11 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
 	0x701d0300, 0x807c847c,
 	0x8070ff70, 0x00000400,
 	0xbf0a7b7c, 0xbf85ffeb,
-	0xbf9c0000, 0xbf820106,
+	0xbf9c0000, 0xbf8200e3,
 	0xbef4007e, 0x8675ff7f,
 	0x0000ffff, 0x8775ff75,
 	0x00040000, 0xbef60080,
 	0xbef700ff, 0x00807fac,
-	0x866eff7f, 0x08000000,
-	0x8f6e836e, 0x87776e77,
-	0x866eff7f, 0x70000000,
-	0x8f6e816e, 0x87776e77,
 	0x866eff7f, 0x04000000,
 	0xbf84001f, 0xbefe00c1,
 	0xbeff00c1, 0xb8ef4306,
@@ -1466,26 +1476,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
 	0x807cff7c, 0x00000200,
 	0x8078ff78, 0x00000200,
 	0xbf0a6f7c, 0xbf85fff6,
-	0xbef80080, 0xbefe00c1,
-	0xbeff00c1, 0xb8ef2a05,
-	0x806f816f, 0x8e6f826f,
-	0x8e76886f, 0xbef90076,
+	0xbefe00c1, 0xbeff00c1,
 	0xbef600ff, 0x01000000,
+	0xb8ef2a05, 0x806f816f,
+	0x8e6f826f, 0x806fff6f,
+	0x00008000, 0xbef80080,
 	0xbeee0078, 0x8078ff78,
-	0x00000400, 0xbef30079,
-	0x8079ff79, 0x00000400,
-	0xbefc0084, 0xbf11087c,
-	0x806fff6f, 0x00008000,
-	0xe0524000, 0x791d0000,
-	0xe0524100, 0x791d0100,
-	0xe0524200, 0x791d0200,
-	0xe0524300, 0x791d0300,
-	0x8079ff79, 0x00000400,
-	0xbf8c0f70, 0xd3d94000,
-	0x18000100, 0xd3d94001,
-	0x18000101, 0xd3d94002,
-	0x18000102, 0xd3d94003,
-	0x18000103, 0xe0524000,
+	0x00000400, 0xbefc0084,
+	0xbf11087c, 0xe0524000,
 	0x781d0000, 0xe0524100,
 	0x781d0100, 0xe0524200,
 	0x781d0200, 0xe0524300,
@@ -1494,20 +1492,24 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
 	0x7e040302, 0x7e060303,
 	0x807c847c, 0x8078ff78,
 	0x00000400, 0xbf0a6f7c,
-	0xbf85ffdb, 0xbf9c0000,
-	0xe0524000, 0x731d0000,
-	0xe0524100, 0x731d0100,
-	0xe0524200, 0x731d0200,
-	0xe0524300, 0x731d0300,
-	0xbf8c0f70, 0xd3d94000,
-	0x18000100, 0xd3d94001,
-	0x18000101, 0xd3d94002,
-	0x18000102, 0xd3d94003,
-	0x18000103, 0xe0524000,
-	0x6e1d0000, 0xe0524100,
-	0x6e1d0100, 0xe0524200,
-	0x6e1d0200, 0xe0524300,
-	0x6e1d0300, 0xb8f82a05,
+	0xbf85ffee, 0xbefc0080,
+	0xbf11087c, 0xe0524000,
+	0x781d0000, 0xe0524100,
+	0x781d0100, 0xe0524200,
+	0x781d0200, 0xe0524300,
+	0x781d0300, 0xbf8c0f70,
+	0xd3d94000, 0x18000100,
+	0xd3d94001, 0x18000101,
+	0xd3d94002, 0x18000102,
+	0xd3d94003, 0x18000103,
+	0x807c847c, 0x8078ff78,
+	0x00000400, 0xbf0a6f7c,
+	0xbf85ffea, 0xbf9c0000,
+	0xe0524000, 0x6e1d0000,
+	0xe0524100, 0x6e1d0100,
+	0xe0524200, 0x6e1d0200,
+	0xe0524300, 0x6e1d0300,
+	0xbf8c0f70, 0xb8f82a05,
 	0x80788178, 0x8e788a78,
 	0x8e788178, 0xb8ee1605,
 	0x806e816e, 0x8e6e866e,
@@ -1559,268 +1561,225 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
 	0xc00b1c37, 0x00000050,
 	0xc00b1d37, 0x00000060,
 	0xc0031e77, 0x00000074,
-	0xbf8cc07f, 0x866fff6d,
-	0xf8000000, 0x8f6f9b6f,
-	0x8e6f906f, 0xbeee0080,
-	0x876e6f6e, 0x866fff6d,
-	0x04000000, 0x8f6f9a6f,
-	0x8e6f8f6f, 0x876e6f6e,
-	0x866fff7a, 0x00800000,
-	0x8f6f976f, 0xb96ef807,
-	0x866dff6d, 0x0000ffff,
-	0x86fe7e7e, 0x86ea6a6a,
-	0x8f6e837a, 0xb96ee0c2,
-	0xbf800002, 0xb97a0002,
-	0xbf8a0000, 0x95806f6c,
-	0xbf810000, 0x00000000,
+	0xbf8cc07f, 0x8f6e8b77,
+	0x866eff6e, 0x001f8000,
+	0xb96ef807, 0x866dff6d,
+	0x0000ffff, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e837a,
+	0xb96ee0c2, 0xbf800002,
+	0xb97a0002, 0xbf8a0000,
+	0xbe801f6c, 0xbf810000,
 };
 
 static const uint32_t cwsr_trap_aldebaran_hex[] = {
-	0xbf820001, 0xbf8202ce,
-	0xb8f8f802, 0x89788678,
-	0xb8eef801, 0x866eff6e,
-	0x00000800, 0xbf840003,
+	0xbf820001, 0xbf8202db,
+	0xb8f8f802, 0x8978ff78,
+	0x00020006, 0xb8fbf803,
 	0x866eff78, 0x00002000,
-	0xbf840016, 0xb8fbf803,
+	0xbf840009, 0x866eff6d,
+	0x00ff0000, 0xbf85001e,
 	0x866eff7b, 0x00000400,
-	0xbf85003b, 0x866eff7b,
-	0x00000800, 0xbf850003,
-	0x866eff7b, 0x00000100,
-	0xbf84000c, 0x866eff78,
-	0x00002000, 0xbf840005,
-	0xbf8e0010, 0xb8eef803,
-	0x866eff6e, 0x00000400,
-	0xbf84fffb, 0x8778ff78,
-	0x00002000, 0x80ec886c,
-	0x82ed806d, 0xb8eef807,
-	0x866fff6e, 0x001f8000,
-	0x8e6f8b6f, 0x8977ff77,
-	0xfc000000, 0x87776f77,
-	0x896eff6e, 0x001f8000,
-	0xb96ef807, 0xb8faf812,
+	0xbf850051, 0xbf8e0010,
+	0xb8fbf803, 0xbf82fffa,
+	0x866eff7b, 0x00000900,
+	0xbf850015, 0x866eff7b,
+	0x000071ff, 0xbf840008,
+	0x866fff7b, 0x00007080,
+	0xbf840001, 0xbeee1a87,
+	0xb8eff801, 0x8e6e8c6e,
+	0x866e6f6e, 0xbf85000a,
+	0x866eff6d, 0x00ff0000,
+	0xbf850007, 0xb8eef801,
+	0x866eff6e, 0x00000800,
+	0xbf850003, 0x866eff7b,
+	0x00000400, 0xbf850036,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xb8faf812,
 	0xb8fbf813, 0x8efa887a,
-	0xc0071bbd, 0x00000000,
-	0xbf8cc07f, 0xc0071ebd,
-	0x00000008, 0xbf8cc07f,
-	0x86ee6e6e, 0xbf840001,
-	0xbe801d6e, 0xb8fbf803,
-	0x867bff7b, 0x000001ff,
+	0xc0031bbd, 0x00000010,
+	0xbf8cc07f, 0x8e6e976e,
+	0x8977ff77, 0x00800000,
+	0x87776e77, 0xc0071bbd,
+	0x00000000, 0xbf8cc07f,
+	0xc0071ebd, 0x00000008,
+	0xbf8cc07f, 0x86ee6e6e,
+	0xbf840001, 0xbe801d6e,
+	0x866eff6d, 0x01ff0000,
+	0xbf850005, 0x8778ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x866eff6d, 0x01000000,
 	0xbf850002, 0x806c846c,
 	0x826d806d, 0x866dff6d,
-	0x0000ffff, 0x8f6e8b77,
-	0x866eff6e, 0x001f8000,
-	0xb96ef807, 0x86fe7e7e,
+	0x0000ffff, 0x8f7a8b77,
+	0x867aff7a, 0x001f8000,
+	0xb97af807, 0x86fe7e7e,
 	0x86ea6a6a, 0x8f6e8378,
 	0xb96ee0c2, 0xbf800002,
 	0xb9780002, 0xbe801f6c,
 	0x866dff6d, 0x0000ffff,
 	0xbefa0080, 0xb97a0283,
-	0xb8fa2407, 0x8e7a9b7a,
-	0x876d7a6d, 0xb8fa03c7,
-	0x8e7a9a7a, 0x876d7a6d,
 	0xb8faf807, 0x867aff7a,
-	0x00007fff, 0xb97af807,
-	0xbeee007e, 0xbeef007f,
-	0xbefe0180, 0xbf900004,
-	0x877a8478, 0xb97af802,
-	0xbf8e0002, 0xbf88fffe,
-	0xb8fa2985, 0x807a817a,
-	0x8e7a8a7a, 0x8e7a817a,
-	0xb8fb1605, 0x807b817b,
-	0x8e7b867b, 0x807a7b7a,
-	0x807a7e7a, 0x827b807f,
-	0x867bff7b, 0x0000ffff,
-	0xc04b1c3d, 0x00000050,
-	0xbf8cc07f, 0xc04b1d3d,
-	0x00000060, 0xbf8cc07f,
-	0xc0431e7d, 0x00000074,
-	0xbf8cc07f, 0xbef4007e,
-	0x8675ff7f, 0x0000ffff,
-	0x8775ff75, 0x00040000,
-	0xbef60080, 0xbef700ff,
-	0x00807fac, 0x867aff7f,
-	0x08000000, 0x8f7a837a,
-	0x87777a77, 0x867aff7f,
-	0x70000000, 0x8f7a817a,
-	0x87777a77, 0xbef1007c,
-	0xbef00080, 0xb8f02985,
-	0x80708170, 0x8e708a70,
-	0x8e708170, 0xb8fa1605,
-	0x807a817a, 0x8e7a867a,
-	0x80707a70, 0xbef60084,
-	0xbef600ff, 0x01000000,
-	0xbefe007c, 0xbefc0070,
-	0xc0611c7a, 0x0000007c,
-	0xbf8cc07f, 0x80708470,
-	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611b3a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xbeee007e,
+	0xbeef007f, 0xbefe0180,
+	0xbf900004, 0x877a8478,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xb8fa2985,
+	0x807a817a, 0x8e7a8a7a,
+	0x8e7a817a, 0xb8fb1605,
+	0x807b817b, 0x8e7b867b,
+	0x807a7b7a, 0x807a7e7a,
+	0x827b807f, 0x867bff7b,
+	0x0000ffff, 0xc04b1c3d,
+	0x00000050, 0xbf8cc07f,
+	0xc04b1d3d, 0x00000060,
+	0xbf8cc07f, 0xc0431e7d,
+	0x00000074, 0xbf8cc07f,
+	0xbef4007e, 0x8675ff7f,
+	0x0000ffff, 0x8775ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x00807fac,
+	0xbef1007c, 0xbef00080,
+	0xb8f02985, 0x80708170,
+	0x8e708a70, 0x8e708170,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x80707a70,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xbefe007c,
+	0xbefc0070, 0xc0611c7a,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
 	0xbefe007c, 0xbefc0070,
-	0xc0611b7a, 0x0000007c,
+	0xc0611b3a, 0x0000007c,
 	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611bba,
+	0xbefc0070, 0xc0611b7a,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
 	0xbefe007c, 0xbefc0070,
-	0xc0611bfa, 0x0000007c,
+	0xc0611bba, 0x0000007c,
 	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611e3a,
-	0x0000007c, 0xbf8cc07f,
-	0x80708470, 0xbefc007e,
-	0xb8fbf803, 0xbefe007c,
-	0xbefc0070, 0xc0611efa,
+	0xbefc0070, 0xc0611bfa,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
 	0xbefe007c, 0xbefc0070,
-	0xc0611a3a, 0x0000007c,
+	0xc0611e3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8fbf803,
+	0xbefe007c, 0xbefc0070,
+	0xc0611efa, 0x0000007c,
 	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc0070, 0xc0611a7a,
+	0xbefc0070, 0xc0611a3a,
 	0x0000007c, 0xbf8cc07f,
 	0x80708470, 0xbefc007e,
-	0xb8f1f801, 0xbefe007c,
-	0xbefc0070, 0xc0611c7a,
-	0x0000007c, 0xbf8cc07f,
-	0x80708470, 0xbefc007e,
-	0x867aff7f, 0x04000000,
-	0xbeef0080, 0x876f6f7a,
-	0xb8f02985, 0x80708170,
-	0x8e708a70, 0x8e708170,
-	0xb8fb1605, 0x807b817b,
-	0x8e7b847b, 0x8e76827b,
-	0xbef600ff, 0x01000000,
-	0xbef20174, 0x80747074,
-	0x82758075, 0xbefc0080,
-	0xbf800000, 0xbe802b00,
-	0xbe822b02, 0xbe842b04,
-	0xbe862b06, 0xbe882b08,
-	0xbe8a2b0a, 0xbe8c2b0c,
-	0xbe8e2b0e, 0xc06b003a,
-	0x00000000, 0xbf8cc07f,
-	0xc06b013a, 0x00000010,
-	0xbf8cc07f, 0xc06b023a,
-	0x00000020, 0xbf8cc07f,
-	0xc06b033a, 0x00000030,
-	0xbf8cc07f, 0x8074c074,
-	0x82758075, 0x807c907c,
-	0xbf0a7b7c, 0xbf85ffe7,
-	0xbef40172, 0xbef00080,
-	0xbefe00c1, 0xbeff00c1,
-	0xbee80080, 0xbee90080,
-	0xbef600ff, 0x01000000,
-	0x867aff78, 0x00400000,
-	0xbf850003, 0xb8faf803,
-	0x897a7aff, 0x10000000,
-	0xbf85004d, 0xbe840080,
-	0xd2890000, 0x00000900,
-	0x80048104, 0xd2890001,
+	0xbefe007c, 0xbefc0070,
+	0xc0611a7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8f1f801,
+	0xbefe007c, 0xbefc0070,
+	0xc0611c7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbeef0080,
+	0x876f6f7a, 0xb8f02985,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fb1605,
+	0x807b817b, 0x8e7b847b,
+	0x8e76827b, 0xbef600ff,
+	0x01000000, 0xbef20174,
+	0x80747074, 0x82758075,
+	0xbefc0080, 0xbf800000,
+	0xbe802b00, 0xbe822b02,
+	0xbe842b04, 0xbe862b06,
+	0xbe882b08, 0xbe8a2b0a,
+	0xbe8c2b0c, 0xbe8e2b0e,
+	0xc06b003a, 0x00000000,
+	0xbf8cc07f, 0xc06b013a,
+	0x00000010, 0xbf8cc07f,
+	0xc06b023a, 0x00000020,
+	0xbf8cc07f, 0xc06b033a,
+	0x00000030, 0xbf8cc07f,
+	0x8074c074, 0x82758075,
+	0x807c907c, 0xbf0a7b7c,
+	0xbf85ffe7, 0xbef40172,
+	0xbef00080, 0xbefe00c1,
+	0xbeff00c1, 0xbee80080,
+	0xbee90080, 0xbef600ff,
+	0x01000000, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf85004d,
+	0xbe840080, 0xd2890000,
 	0x00000900, 0x80048104,
-	0xd2890002, 0x00000900,
-	0x80048104, 0xd2890003,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
 	0x00000900, 0x80048104,
-	0xc069003a, 0x00000070,
-	0xbf8cc07f, 0x80709070,
-	0xbf06c004, 0xbf84ffee,
-	0xbe840080, 0xd2890000,
-	0x00000901, 0x80048104,
-	0xd2890001, 0x00000901,
-	0x80048104, 0xd2890002,
-	0x00000901, 0x80048104,
-	0xd2890003, 0x00000901,
+	0xd2890003, 0x00000900,
 	0x80048104, 0xc069003a,
 	0x00000070, 0xbf8cc07f,
 	0x80709070, 0xbf06c004,
 	0xbf84ffee, 0xbe840080,
-	0xd2890000, 0x00000902,
+	0xd2890000, 0x00000901,
 	0x80048104, 0xd2890001,
-	0x00000902, 0x80048104,
-	0xd2890002, 0x00000902,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
 	0x80048104, 0xd2890003,
-	0x00000902, 0x80048104,
+	0x00000901, 0x80048104,
 	0xc069003a, 0x00000070,
 	0xbf8cc07f, 0x80709070,
 	0xbf06c004, 0xbf84ffee,
 	0xbe840080, 0xd2890000,
-	0x00000903, 0x80048104,
-	0xd2890001, 0x00000903,
-	0x80048104, 0xd2890002,
-	0x00000903, 0x80048104,
-	0xd2890003, 0x00000903,
-	0x80048104, 0xc069003a,
-	0x00000070, 0xbf8cc07f,
-	0x80709070, 0xbf06c004,
-	0xbf84ffee, 0xbf820008,
-	0xe0724000, 0x701d0000,
-	0xe0724100, 0x701d0100,
-	0xe0724200, 0x701d0200,
-	0xe0724300, 0x701d0300,
-	0xbefe00c1, 0xbeff00c1,
-	0xb8fb4306, 0x867bc17b,
-	0xbf840064, 0xbf8a0000,
-	0x867aff6f, 0x04000000,
-	0xbf840060, 0x8e7b867b,
-	0x8e7b827b, 0xbef6007b,
-	0xb8f02985, 0x80708170,
-	0x8e708a70, 0x8e708170,
-	0xb8fa1605, 0x807a817a,
-	0x8e7a867a, 0x80707a70,
-	0x8070ff70, 0x00000080,
-	0xbef600ff, 0x01000000,
-	0xbefc0080, 0xd28c0002,
-	0x000100c1, 0xd28d0003,
-	0x000204c1, 0x867aff78,
-	0x00400000, 0xbf850003,
-	0xb8faf803, 0x897a7aff,
-	0x10000000, 0xbf850030,
-	0x24040682, 0xd86e4000,
-	0x00000002, 0xbf8cc07f,
-	0xbe840080, 0xd2890000,
-	0x00000900, 0x80048104,
-	0xd2890001, 0x00000900,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
 	0x80048104, 0xd2890002,
-	0x00000900, 0x80048104,
-	0xd2890003, 0x00000900,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
 	0x80048104, 0xc069003a,
 	0x00000070, 0xbf8cc07f,
 	0x80709070, 0xbf06c004,
 	0xbf84ffee, 0xbe840080,
-	0xd2890000, 0x00000901,
+	0xd2890000, 0x00000903,
 	0x80048104, 0xd2890001,
-	0x00000901, 0x80048104,
-	0xd2890002, 0x00000901,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
 	0x80048104, 0xd2890003,
-	0x00000901, 0x80048104,
+	0x00000903, 0x80048104,
 	0xc069003a, 0x00000070,
 	0xbf8cc07f, 0x80709070,
 	0xbf06c004, 0xbf84ffee,
-	0x680404ff, 0x00000200,
-	0xd0c9006a, 0x0000f702,
-	0xbf87ffd2, 0xbf820015,
-	0xd1060002, 0x00011103,
-	0x7e0602ff, 0x00000200,
-	0xbefc00ff, 0x00010000,
-	0xbe800077, 0x8677ff77,
-	0xff7fffff, 0x8777ff77,
-	0x00058000, 0xd8ec0000,
-	0x00000002, 0xbf8cc07f,
-	0xe0765000, 0x701d0002,
-	0x68040702, 0xd0c9006a,
-	0x0000f702, 0xbf87fff7,
-	0xbef70000, 0xbef000ff,
-	0x00000400, 0xbefe00c1,
-	0xbeff00c1, 0xb8fb2b05,
-	0x807b817b, 0x8e7b827b,
-	0xbef600ff, 0x01000000,
-	0xbefc0084, 0xbf0a7b7c,
-	0xbf84006d, 0xbf11017c,
-	0x807bff7b, 0x00001000,
+	0xbf820008, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0xbefe00c1,
+	0xbeff00c1, 0xb8fb4306,
+	0x867bc17b, 0xbf840064,
+	0xbf8a0000, 0x867aff6f,
+	0x04000000, 0xbf840060,
+	0x8e7b867b, 0x8e7b827b,
+	0xbef6007b, 0xb8f02985,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fa1605,
+	0x807a817a, 0x8e7a867a,
+	0x80707a70, 0x8070ff70,
+	0x00000080, 0xbef600ff,
+	0x01000000, 0xbefc0080,
+	0xd28c0002, 0x000100c1,
+	0xd28d0003, 0x000204c1,
 	0x867aff78, 0x00400000,
 	0xbf850003, 0xb8faf803,
 	0x897a7aff, 0x10000000,
-	0xbf850051, 0xbe840080,
+	0xbf850030, 0x24040682,
+	0xd86e4000, 0x00000002,
+	0xbf8cc07f, 0xbe840080,
 	0xd2890000, 0x00000900,
 	0x80048104, 0xd2890001,
 	0x00000900, 0x80048104,
@@ -1839,51 +1798,31 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
 	0x80048104, 0xc069003a,
 	0x00000070, 0xbf8cc07f,
 	0x80709070, 0xbf06c004,
-	0xbf84ffee, 0xbe840080,
-	0xd2890000, 0x00000902,
-	0x80048104, 0xd2890001,
-	0x00000902, 0x80048104,
-	0xd2890002, 0x00000902,
-	0x80048104, 0xd2890003,
-	0x00000902, 0x80048104,
-	0xc069003a, 0x00000070,
-	0xbf8cc07f, 0x80709070,
-	0xbf06c004, 0xbf84ffee,
-	0xbe840080, 0xd2890000,
-	0x00000903, 0x80048104,
-	0xd2890001, 0x00000903,
-	0x80048104, 0xd2890002,
-	0x00000903, 0x80048104,
-	0xd2890003, 0x00000903,
-	0x80048104, 0xc069003a,
-	0x00000070, 0xbf8cc07f,
-	0x80709070, 0xbf06c004,
-	0xbf84ffee, 0x807c847c,
-	0xbf0a7b7c, 0xbf85ffb1,
-	0xbf9c0000, 0xbf820012,
-	0x7e000300, 0x7e020301,
-	0x7e040302, 0x7e060303,
-	0xe0724000, 0x701d0000,
-	0xe0724100, 0x701d0100,
-	0xe0724200, 0x701d0200,
-	0xe0724300, 0x701d0300,
-	0x807c847c, 0x8070ff70,
-	0x00000400, 0xbf0a7b7c,
-	0xbf85ffef, 0xbf9c0000,
-	0xb8fb2985, 0x807b817b,
-	0x8e7b837b, 0xb8fa2b05,
-	0x807a817a, 0x8e7a827a,
-	0x80fb7a7b, 0x867b7b7b,
-	0xbf84007a, 0x807bff7b,
-	0x00001000, 0xbefc0080,
-	0xbf11017c, 0x867aff78,
+	0xbf84ffee, 0x680404ff,
+	0x00000200, 0xd0c9006a,
+	0x0000f702, 0xbf87ffd2,
+	0xbf820015, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe800077,
+	0x8677ff77, 0xff7fffff,
+	0x8777ff77, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8cc07f, 0xe0765000,
+	0x701d0002, 0x68040702,
+	0xd0c9006a, 0x0000f702,
+	0xbf87fff7, 0xbef70000,
+	0xbef000ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8fb2b05, 0x807b817b,
+	0x8e7b827b, 0xbef600ff,
+	0x01000000, 0xbefc0084,
+	0xbf0a7b7c, 0xbf84006d,
+	0xbf11017c, 0x807bff7b,
+	0x00001000, 0x867aff78,
 	0x00400000, 0xbf850003,
 	0xb8faf803, 0x897a7aff,
-	0x10000000, 0xbf850059,
-	0xd3d84000, 0x18000100,
-	0xd3d84001, 0x18000101,
-	0xd3d84002, 0x18000102,
-	0xd3d84003, 0x18000103,
+	0x10000000, 0xbf850051,
 	0xbe840080, 0xd2890000,
 	0x00000900, 0x80048104,
 	0xd2890001, 0x00000900,
@@ -1923,169 +1862,241 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
 	0xbf8cc07f, 0x80709070,
 	0xbf06c004, 0xbf84ffee,
 	0x807c847c, 0xbf0a7b7c,
-	0xbf85ffa9, 0xbf9c0000,
-	0xbf820016, 0xd3d84000,
-	0x18000100, 0xd3d84001,
-	0x18000101, 0xd3d84002,
-	0x18000102, 0xd3d84003,
-	0x18000103, 0xe0724000,
+	0xbf85ffb1, 0xbf9c0000,
+	0xbf820012, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0xe0724000,
 	0x701d0000, 0xe0724100,
 	0x701d0100, 0xe0724200,
 	0x701d0200, 0xe0724300,
 	0x701d0300, 0x807c847c,
 	0x8070ff70, 0x00000400,
-	0xbf0a7b7c, 0xbf85ffeb,
-	0xbf9c0000, 0xbf820101,
-	0xbef4007e, 0x8675ff7f,
-	0x0000ffff, 0x8775ff75,
-	0x00040000, 0xbef60080,
-	0xbef700ff, 0x00807fac,
-	0x866eff7f, 0x08000000,
-	0x8f6e836e, 0x87776e77,
-	0x866eff7f, 0x70000000,
-	0x8f6e816e, 0x87776e77,
-	0x866eff7f, 0x04000000,
-	0xbf84001f, 0xbefe00c1,
-	0xbeff00c1, 0xb8ef4306,
-	0x866fc16f, 0xbf84001a,
-	0x8e6f866f, 0x8e6f826f,
-	0xbef6006f, 0xb8f82985,
-	0x80788178, 0x8e788a78,
-	0x8e788178, 0xb8ee1605,
-	0x806e816e, 0x8e6e866e,
-	0x80786e78, 0x8078ff78,
-	0x00000080, 0xbef600ff,
-	0x01000000, 0xbefc0080,
-	0xe0510000, 0x781d0000,
-	0xe0510100, 0x781d0000,
-	0x807cff7c, 0x00000200,
-	0x8078ff78, 0x00000200,
-	0xbf0a6f7c, 0xbf85fff6,
+	0xbf0a7b7c, 0xbf85ffef,
+	0xbf9c0000, 0xb8fb2985,
+	0x807b817b, 0x8e7b837b,
+	0xb8fa2b05, 0x807a817a,
+	0x8e7a827a, 0x80fb7a7b,
+	0x867b7b7b, 0xbf84007a,
+	0x807bff7b, 0x00001000,
+	0xbefc0080, 0xbf11017c,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf850059, 0xd3d84000,
+	0x18000100, 0xd3d84001,
+	0x18000101, 0xd3d84002,
+	0x18000102, 0xd3d84003,
+	0x18000103, 0xbe840080,
+	0xd2890000, 0x00000900,
+	0x80048104, 0xd2890001,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
+	0x80048104, 0xd2890003,
+	0x00000900, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000902,
+	0x80048104, 0xd2890001,
+	0x00000902, 0x80048104,
+	0xd2890002, 0x00000902,
+	0x80048104, 0xd2890003,
+	0x00000902, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000903, 0x80048104,
+	0xd2890001, 0x00000903,
+	0x80048104, 0xd2890002,
+	0x00000903, 0x80048104,
+	0xd2890003, 0x00000903,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0x807c847c,
+	0xbf0a7b7c, 0xbf85ffa9,
+	0xbf9c0000, 0xbf820016,
+	0xd3d84000, 0x18000100,
+	0xd3d84001, 0x18000101,
+	0xd3d84002, 0x18000102,
+	0xd3d84003, 0x18000103,
+	0xe0724000, 0x701d0000,
+	0xe0724100, 0x701d0100,
+	0xe0724200, 0x701d0200,
+	0xe0724300, 0x701d0300,
+	0x807c847c, 0x8070ff70,
+	0x00000400, 0xbf0a7b7c,
+	0xbf85ffeb, 0xbf9c0000,
+	0xbf8200ee, 0xbef4007e,
+	0x8675ff7f, 0x0000ffff,
+	0x8775ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x00807fac, 0x866eff7f,
+	0x04000000, 0xbf84001f,
 	0xbefe00c1, 0xbeff00c1,
+	0xb8ef4306, 0x866fc16f,
+	0xbf84001a, 0x8e6f866f,
+	0x8e6f826f, 0xbef6006f,
+	0xb8f82985, 0x80788178,
+	0x8e788a78, 0x8e788178,
+	0xb8ee1605, 0x806e816e,
+	0x8e6e866e, 0x80786e78,
+	0x8078ff78, 0x00000080,
 	0xbef600ff, 0x01000000,
-	0xb8ef2b05, 0x806f816f,
-	0x8e6f826f, 0x806fff6f,
-	0x00008000, 0xbef80080,
-	0xbeee0078, 0x8078ff78,
-	0x00000400, 0xbefc0084,
+	0xbefc0080, 0xe0510000,
+	0x781d0000, 0xe0510100,
+	0x781d0000, 0x807cff7c,
+	0x00000200, 0x8078ff78,
+	0x00000200, 0xbf0a6f7c,
+	0xbf85fff6, 0xbefe00c1,
+	0xbeff00c1, 0xbef600ff,
+	0x01000000, 0xb8ef2b05,
+	0x806f816f, 0x8e6f826f,
+	0x806fff6f, 0x00008000,
+	0xbef80080, 0xbeee0078,
+	0x8078ff78, 0x00000400,
+	0xbefc0084, 0xbf11087c,
+	0xe0524000, 0x781d0000,
+	0xe0524100, 0x781d0100,
+	0xe0524200, 0x781d0200,
+	0xe0524300, 0x781d0300,
+	0xbf8c0f70, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0x807c847c,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xb8ef2985, 0x806f816f,
+	0x8e6f836f, 0xb8f92b05,
+	0x80798179, 0x8e798279,
+	0x80ef796f, 0x866f6f6f,
+	0xbf84001a, 0x806fff6f,
+	0x00008000, 0xbefc0080,
 	0xbf11087c, 0xe0524000,
 	0x781d0000, 0xe0524100,
 	0x781d0100, 0xe0524200,
 	0x781d0200, 0xe0524300,
 	0x781d0300, 0xbf8c0f70,
-	0x7e000300, 0x7e020301,
-	0x7e040302, 0x7e060303,
+	0xd3d94000, 0x18000100,
+	0xd3d94001, 0x18000101,
+	0xd3d94002, 0x18000102,
+	0xd3d94003, 0x18000103,
 	0x807c847c, 0x8078ff78,
 	0x00000400, 0xbf0a6f7c,
-	0xbf85ffee, 0xb8ef2985,
-	0x806f816f, 0x8e6f836f,
-	0xb8f92b05, 0x80798179,
-	0x8e798279, 0x80ef796f,
-	0x866f6f6f, 0xbf84001a,
-	0x806fff6f, 0x00008000,
-	0xbefc0080, 0xbf11087c,
-	0xe0524000, 0x781d0000,
-	0xe0524100, 0x781d0100,
-	0xe0524200, 0x781d0200,
-	0xe0524300, 0x781d0300,
-	0xbf8c0f70, 0xd3d94000,
-	0x18000100, 0xd3d94001,
-	0x18000101, 0xd3d94002,
-	0x18000102, 0xd3d94003,
-	0x18000103, 0x807c847c,
-	0x8078ff78, 0x00000400,
-	0xbf0a6f7c, 0xbf85ffea,
-	0xbf9c0000, 0xe0524000,
-	0x6e1d0000, 0xe0524100,
-	0x6e1d0100, 0xe0524200,
-	0x6e1d0200, 0xe0524300,
-	0x6e1d0300, 0xbf8c0f70,
-	0xb8f82985, 0x80788178,
-	0x8e788a78, 0x8e788178,
-	0xb8ee1605, 0x806e816e,
-	0x8e6e866e, 0x80786e78,
-	0x80f8c078, 0xb8ef1605,
-	0x806f816f, 0x8e6f846f,
-	0x8e76826f, 0xbef600ff,
-	0x01000000, 0xbefc006f,
-	0xc031003a, 0x00000078,
-	0x80f8c078, 0xbf8cc07f,
-	0x80fc907c, 0xbf800000,
-	0xbe802d00, 0xbe822d02,
-	0xbe842d04, 0xbe862d06,
-	0xbe882d08, 0xbe8a2d0a,
-	0xbe8c2d0c, 0xbe8e2d0e,
-	0xbf06807c, 0xbf84fff0,
-	0xb8f82985, 0x80788178,
-	0x8e788a78, 0x8e788178,
-	0xb8ee1605, 0x806e816e,
-	0x8e6e866e, 0x80786e78,
-	0xbef60084, 0xbef600ff,
-	0x01000000, 0xc0211bfa,
+	0xbf85ffea, 0xbf9c0000,
+	0xe0524000, 0x6e1d0000,
+	0xe0524100, 0x6e1d0100,
+	0xe0524200, 0x6e1d0200,
+	0xe0524300, 0x6e1d0300,
+	0xbf8c0f70, 0xb8f82985,
+	0x80788178, 0x8e788a78,
+	0x8e788178, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0x80f8c078,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f846f, 0x8e76826f,
+	0xbef600ff, 0x01000000,
+	0xbefc006f, 0xc031003a,
+	0x00000078, 0x80f8c078,
+	0xbf8cc07f, 0x80fc907c,
+	0xbf800000, 0xbe802d00,
+	0xbe822d02, 0xbe842d04,
+	0xbe862d06, 0xbe882d08,
+	0xbe8a2d0a, 0xbe8c2d0c,
+	0xbe8e2d0e, 0xbf06807c,
+	0xbf84fff0, 0xb8f82985,
+	0x80788178, 0x8e788a78,
+	0x8e788178, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0xbef60084,
+	0xbef600ff, 0x01000000,
+	0xc0211bfa, 0x00000078,
+	0x80788478, 0xc0211b3a,
 	0x00000078, 0x80788478,
-	0xc0211b3a, 0x00000078,
-	0x80788478, 0xc0211b7a,
+	0xc0211b7a, 0x00000078,
+	0x80788478, 0xc0211c3a,
 	0x00000078, 0x80788478,
-	0xc0211c3a, 0x00000078,
-	0x80788478, 0xc0211c7a,
+	0xc0211c7a, 0x00000078,
+	0x80788478, 0xc0211eba,
 	0x00000078, 0x80788478,
-	0xc0211eba, 0x00000078,
-	0x80788478, 0xc0211efa,
+	0xc0211efa, 0x00000078,
+	0x80788478, 0xc0211a3a,
 	0x00000078, 0x80788478,
-	0xc0211a3a, 0x00000078,
-	0x80788478, 0xc0211a7a,
+	0xc0211a7a, 0x00000078,
+	0x80788478, 0xc0211cfa,
 	0x00000078, 0x80788478,
-	0xc0211cfa, 0x00000078,
-	0x80788478, 0xbf8cc07f,
-	0xbefc006f, 0xbefe0070,
-	0xbeff0071, 0x866f7bff,
-	0x000003ff, 0xb96f4803,
-	0x866f7bff, 0xfffff800,
-	0x8f6f8b6f, 0xb96fa2c3,
-	0xb973f801, 0xb8ee2985,
-	0x806e816e, 0x8e6e8a6e,
-	0x8e6e816e, 0xb8ef1605,
-	0x806f816f, 0x8e6f866f,
-	0x806e6f6e, 0x806e746e,
-	0x826f8075, 0x866fff6f,
-	0x0000ffff, 0xc00b1c37,
-	0x00000050, 0xc00b1d37,
-	0x00000060, 0xc0031e77,
-	0x00000074, 0xbf8cc07f,
-	0x866fff6d, 0xf8000000,
-	0x8f6f9b6f, 0x8e6f906f,
-	0xbeee0080, 0x876e6f6e,
-	0x866fff6d, 0x04000000,
-	0x8f6f9a6f, 0x8e6f8f6f,
-	0x876e6f6e, 0x866fff7a,
-	0x00800000, 0x8f6f976f,
+	0xbf8cc07f, 0xbefc006f,
+	0xbefe0070, 0xbeff0071,
+	0x866f7bff, 0x000003ff,
+	0xb96f4803, 0x866f7bff,
+	0xfffff800, 0x8f6f8b6f,
+	0xb96fa2c3, 0xb973f801,
+	0xb8ee2985, 0x806e816e,
+	0x8e6e8a6e, 0x8e6e816e,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f866f, 0x806e6f6e,
+	0x806e746e, 0x826f8075,
+	0x866fff6f, 0x0000ffff,
+	0xc00b1c37, 0x00000050,
+	0xc00b1d37, 0x00000060,
+	0xc0031e77, 0x00000074,
+	0xbf8cc07f, 0x8f6e8b77,
+	0x866eff6e, 0x001f8000,
 	0xb96ef807, 0x866dff6d,
 	0x0000ffff, 0x86fe7e7e,
 	0x86ea6a6a, 0x8f6e837a,
 	0xb96ee0c2, 0xbf800002,
 	0xb97a0002, 0xbf8a0000,
-	0x95806f6c, 0xbf810000,
+	0xbe801f6c, 0xbf810000,
 };
 
 static const uint32_t cwsr_trap_gfx10_hex[] = {
-	0xbf820001, 0xbf8201cf,
+	0xbf820001, 0xbf82021c,
 	0xb0804004, 0xb978f802,
-	0x8a788678, 0xb96ef801,
-	0x876eff6e, 0x00000800,
-	0xbf840003, 0x876eff78,
+	0x8a78ff78, 0x00020006,
+	0xb97bf803, 0x876eff78,
 	0x00002000, 0xbf840009,
-	0xb97bf803, 0x876eff7b,
-	0x00000400, 0xbf85001d,
-	0x876eff7b, 0x00000100,
-	0xbf840002, 0x8878ff78,
-	0x00002000, 0xb97af812,
+	0x876eff6d, 0x00ff0000,
+	0xbf85001e, 0x876eff7b,
+	0x00000400, 0xbf850041,
+	0xbf8e0010, 0xb97bf803,
+	0xbf82fffa, 0x876eff7b,
+	0x00000900, 0xbf850015,
+	0x876eff7b, 0x000071ff,
+	0xbf840008, 0x876fff7b,
+	0x00007080, 0xbf840001,
+	0xbeee1d87, 0xb96ff801,
+	0x8f6e8c6e, 0x876e6f6e,
+	0xbf85000a, 0x876eff6d,
+	0x00ff0000, 0xbf850007,
+	0xb96ef801, 0x876eff6e,
+	0x00000800, 0xbf850003,
+	0x876eff7b, 0x00000400,
+	0xbf850026, 0xb97af812,
 	0xb97bf813, 0x8ffa887a,
-	0xf4051bbd, 0xfa000000,
-	0xbf8cc07f, 0xf4051ebd,
-	0xfa000008, 0xbf8cc07f,
-	0x87ee6e6e, 0xbf840001,
-	0xbe80206e, 0xb97bf803,
-	0x877bff7b, 0x000001ff,
+	0xf4011bbd, 0xfa000010,
+	0xbf8cc07f, 0x8f6e976e,
+	0x8a77ff77, 0x00800000,
+	0x88776e77, 0xf4051bbd,
+	0xfa000000, 0xbf8cc07f,
+	0xf4051ebd, 0xfa000008,
+	0xbf8cc07f, 0x87ee6e6e,
+	0xbf840001, 0xbe80206e,
+	0x876eff6d, 0x01ff0000,
+	0xbf850005, 0x8878ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x876eff6d, 0x01000000,
 	0xbf850002, 0x806c846c,
 	0x826d806d, 0x876dff6d,
 	0x0000ffff, 0x87fe7e7e,
@@ -2095,37 +2106,55 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xb9fa0283, 0xbeee037e,
 	0xbeef037f, 0xbefe0480,
 	0xbf900004, 0xbf8cc07f,
+	0x877aff7f, 0x04000000,
+	0x8f7a857a, 0x886d7a6d,
+	0xbefa037e, 0x877bff7f,
+	0x0000ffff, 0xbefe03c1,
+	0xbeff03c1, 0xdc5f8000,
+	0x007a0000, 0x7e000280,
+	0xbefe037a, 0xbeff037b,
 	0xb97b02dc, 0x8f7b997b,
-	0x887b7b7f, 0xb97a2a05,
-	0x807a817a, 0xbf0d997b,
-	0xbf850002, 0x8f7a897a,
-	0xbf820001, 0x8f7a8a7a,
+	0xb97a2a05, 0x807a817a,
+	0xbf0d997b, 0xbf850002,
+	0x8f7a897a, 0xbf820001,
+	0x8f7a8a7a, 0xb97b1e06,
+	0x8f7b8a7b, 0x807a7b7a,
 	0x877bff7f, 0x0000ffff,
 	0x807aff7a, 0x00000200,
 	0x807a7e7a, 0x827b807b,
-	0xbef4037e, 0x8775ff7f,
-	0x0000ffff, 0x8875ff75,
-	0x00040000, 0xbef60380,
-	0xbef703ff, 0x10807fac,
-	0x877aff7f, 0x08000000,
-	0x907a837a, 0x88777a77,
-	0x877aff7f, 0x70000000,
-	0x907a817a, 0x88777a77,
-	0xbef1037c, 0xbef00380,
-	0xb97302dc, 0x8f739973,
-	0x8873737f, 0xbefe03c1,
+	0xd7610000, 0x00010870,
+	0xd7610000, 0x00010a71,
+	0xd7610000, 0x00010c72,
+	0xd7610000, 0x00010e73,
+	0xd7610000, 0x00011074,
+	0xd7610000, 0x00011275,
+	0xd7610000, 0x00011476,
+	0xd7610000, 0x00011677,
+	0xd7610000, 0x00011a79,
+	0xd7610000, 0x00011c7e,
+	0xd7610000, 0x00011e7f,
+	0xbefe03ff, 0x00003fff,
+	0xbeff0380, 0xdc5f8040,
+	0x007a0000, 0xd760007a,
+	0x00011d00, 0xd760007b,
+	0x00011f00, 0xbefe037a,
+	0xbeff037b, 0xbef4037e,
+	0x8775ff7f, 0x0000ffff,
+	0x8875ff75, 0x00040000,
+	0xbef60380, 0xbef703ff,
+	0x10807fac, 0xbef1037c,
+	0xbef00380, 0xb97302dc,
+	0x8f739973, 0xbefe03c1,
 	0x907c9973, 0x877c817c,
 	0xbf06817c, 0xbf850002,
 	0xbeff0380, 0xbf820002,
-	0xbeff03c1, 0xbf82000b,
+	0xbeff03c1, 0xbf820009,
 	0xbef603ff, 0x01000000,
-	0xe0704000, 0x705d0000,
 	0xe0704080, 0x705d0100,
 	0xe0704100, 0x705d0200,
 	0xe0704180, 0x705d0300,
-	0xbf82000a, 0xbef603ff,
-	0x01000000, 0xe0704000,
-	0x705d0000, 0xe0704100,
+	0xbf820008, 0xbef603ff,
+	0x01000000, 0xe0704100,
 	0x705d0100, 0xe0704200,
 	0x705d0200, 0xe0704300,
 	0x705d0300, 0xb9702a05,
@@ -2140,8 +2169,9 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xbefc0380, 0xd7610002,
 	0x0000f871, 0x807c817c,
 	0xd7610002, 0x0000f86c,
-	0x807c817c, 0xd7610002,
-	0x0000f86d, 0x807c817c,
+	0x807c817c, 0x8a7aff6d,
+	0x80000000, 0xd7610002,
+	0x0000f87a, 0x807c817c,
 	0xd7610002, 0x0000f86e,
 	0x807c817c, 0xd7610002,
 	0x0000f86f, 0x807c817c,
@@ -2156,160 +2186,157 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0x0000f871, 0x807c817c,
 	0xb971f815, 0xd7610002,
 	0x0000f871, 0x807c817c,
+	0xbefe03ff, 0x0000ffff,
 	0xbeff0380, 0xe0704000,
-	0x705d0200, 0xb9702a05,
-	0x80708170, 0xbf0d9973,
-	0xbf850002, 0x8f708970,
-	0xbf820001, 0x8f708a70,
-	0xb97a1e06, 0x8f7a8a7a,
-	0x80707a70, 0xbef603ff,
-	0x01000000, 0xbef90380,
-	0xbefc0380, 0xbf800000,
-	0xbe802f00, 0xbe822f02,
-	0xbe842f04, 0xbe862f06,
-	0xbe882f08, 0xbe8a2f0a,
-	0xbe8c2f0c, 0xbe8e2f0e,
-	0xd7610002, 0x0000f200,
-	0x80798179, 0xd7610002,
-	0x0000f201, 0x80798179,
-	0xd7610002, 0x0000f202,
-	0x80798179, 0xd7610002,
-	0x0000f203, 0x80798179,
-	0xd7610002, 0x0000f204,
+	0x705d0200, 0xbefe03c1,
+	0xb9702a05, 0x80708170,
+	0xbf0d9973, 0xbf850002,
+	0x8f708970, 0xbf820001,
+	0x8f708a70, 0xb97a1e06,
+	0x8f7a8a7a, 0x80707a70,
+	0xbef603ff, 0x01000000,
+	0xbef90380, 0xbefc0380,
+	0xbf800000, 0xbe802f00,
+	0xbe822f02, 0xbe842f04,
+	0xbe862f06, 0xbe882f08,
+	0xbe8a2f0a, 0xbe8c2f0c,
+	0xbe8e2f0e, 0xd7610002,
+	0x0000f200, 0x80798179,
+	0xd7610002, 0x0000f201,
 	0x80798179, 0xd7610002,
-	0x0000f205, 0x80798179,
-	0xd7610002, 0x0000f206,
+	0x0000f202, 0x80798179,
+	0xd7610002, 0x0000f203,
 	0x80798179, 0xd7610002,
-	0x0000f207, 0x80798179,
-	0xd7610002, 0x0000f208,
+	0x0000f204, 0x80798179,
+	0xd7610002, 0x0000f205,
 	0x80798179, 0xd7610002,
-	0x0000f209, 0x80798179,
-	0xd7610002, 0x0000f20a,
+	0x0000f206, 0x80798179,
+	0xd7610002, 0x0000f207,
 	0x80798179, 0xd7610002,
-	0x0000f20b, 0x80798179,
-	0xd7610002, 0x0000f20c,
+	0x0000f208, 0x80798179,
+	0xd7610002, 0x0000f209,
 	0x80798179, 0xd7610002,
-	0x0000f20d, 0x80798179,
-	0xd7610002, 0x0000f20e,
+	0x0000f20a, 0x80798179,
+	0xd7610002, 0x0000f20b,
 	0x80798179, 0xd7610002,
-	0x0000f20f, 0x80798179,
-	0xbf06a079, 0xbf840006,
-	0xe0704000, 0x705d0200,
-	0x8070ff70, 0x00000080,
-	0xbef90380, 0x7e040280,
-	0x807c907c, 0xbf0aff7c,
-	0x00000060, 0xbf85ffbc,
-	0xbe802f00, 0xbe822f02,
-	0xbe842f04, 0xbe862f06,
-	0xbe882f08, 0xbe8a2f0a,
-	0xd7610002, 0x0000f200,
+	0x0000f20c, 0x80798179,
+	0xd7610002, 0x0000f20d,
 	0x80798179, 0xd7610002,
-	0x0000f201, 0x80798179,
-	0xd7610002, 0x0000f202,
+	0x0000f20e, 0x80798179,
+	0xd7610002, 0x0000f20f,
+	0x80798179, 0xbf06a079,
+	0xbf840006, 0xe0704000,
+	0x705d0200, 0x8070ff70,
+	0x00000080, 0xbef90380,
+	0x7e040280, 0x807c907c,
+	0xbf0aff7c, 0x00000060,
+	0xbf85ffbc, 0xbe802f00,
+	0xbe822f02, 0xbe842f04,
+	0xbe862f06, 0xbe882f08,
+	0xbe8a2f0a, 0xd7610002,
+	0x0000f200, 0x80798179,
+	0xd7610002, 0x0000f201,
 	0x80798179, 0xd7610002,
-	0x0000f203, 0x80798179,
-	0xd7610002, 0x0000f204,
+	0x0000f202, 0x80798179,
+	0xd7610002, 0x0000f203,
 	0x80798179, 0xd7610002,
-	0x0000f205, 0x80798179,
-	0xd7610002, 0x0000f206,
+	0x0000f204, 0x80798179,
+	0xd7610002, 0x0000f205,
 	0x80798179, 0xd7610002,
-	0x0000f207, 0x80798179,
-	0xd7610002, 0x0000f208,
+	0x0000f206, 0x80798179,
+	0xd7610002, 0x0000f207,
 	0x80798179, 0xd7610002,
-	0x0000f209, 0x80798179,
-	0xd7610002, 0x0000f20a,
+	0x0000f208, 0x80798179,
+	0xd7610002, 0x0000f209,
 	0x80798179, 0xd7610002,
-	0x0000f20b, 0x80798179,
-	0xe0704000, 0x705d0200,
+	0x0000f20a, 0x80798179,
+	0xd7610002, 0x0000f20b,
+	0x80798179, 0xe0704000,
+	0x705d0200, 0xbefe03c1,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0xbeff0380, 0xbf820001,
+	0xbeff03c1, 0xb97b4306,
+	0x877bc17b, 0xbf840044,
+	0xbf8a0000, 0x877aff6d,
+	0x80000000, 0xbf840040,
+	0x8f7b867b, 0x8f7b827b,
+	0xbef6037b, 0xb9702a05,
+	0x80708170, 0xbf0d9973,
+	0xbf850002, 0x8f708970,
+	0xbf820001, 0x8f708a70,
+	0xb97a1e06, 0x8f7a8a7a,
+	0x80707a70, 0x8070ff70,
+	0x00000200, 0x8070ff70,
+	0x00000080, 0xbef603ff,
+	0x01000000, 0xd7650000,
+	0x000100c1, 0xd7660000,
+	0x000200c1, 0x16000084,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbefc0380,
+	0xbf850012, 0xbe8303ff,
+	0x00000080, 0xbf800000,
+	0xbf800000, 0xbf800000,
+	0xd8d80000, 0x01000000,
+	0xbf8c0000, 0xe0704000,
+	0x705d0100, 0x807c037c,
+	0x80700370, 0xd5250000,
+	0x0001ff00, 0x00000080,
+	0xbf0a7b7c, 0xbf85fff4,
+	0xbf820011, 0xbe8303ff,
+	0x00000100, 0xbf800000,
+	0xbf800000, 0xbf800000,
+	0xd8d80000, 0x01000000,
+	0xbf8c0000, 0xe0704000,
+	0x705d0100, 0x807c037c,
+	0x80700370, 0xd5250000,
+	0x0001ff00, 0x00000100,
+	0xbf0a7b7c, 0xbf85fff4,
 	0xbefe03c1, 0x907c9973,
 	0x877c817c, 0xbf06817c,
-	0xbf850002, 0xbeff0380,
-	0xbf820001, 0xbeff03c1,
-	0xb97b4306, 0x877bc17b,
-	0xbf840044, 0xbf8a0000,
-	0x877aff73, 0x04000000,
-	0xbf840040, 0x8f7b867b,
-	0x8f7b827b, 0xbef6037b,
-	0xb9702a05, 0x80708170,
-	0xbf0d9973, 0xbf850002,
-	0x8f708970, 0xbf820001,
-	0x8f708a70, 0xb97a1e06,
-	0x8f7a8a7a, 0x80707a70,
-	0x8070ff70, 0x00000200,
-	0x8070ff70, 0x00000080,
-	0xbef603ff, 0x01000000,
-	0xd7650000, 0x000100c1,
-	0xd7660000, 0x000200c1,
-	0x16000084, 0x907c9973,
+	0xbf850004, 0xbef003ff,
+	0x00000200, 0xbeff0380,
+	0xbf820003, 0xbef003ff,
+	0x00000400, 0xbeff03c1,
+	0xb97b2a05, 0x807b817b,
+	0x8f7b827b, 0x907c9973,
 	0x877c817c, 0xbf06817c,
-	0xbefc0380, 0xbf850012,
-	0xbe8303ff, 0x00000080,
-	0xbf800000, 0xbf800000,
-	0xbf800000, 0xd8d80000,
-	0x01000000, 0xbf8c0000,
-	0xe0704000, 0x705d0100,
-	0x807c037c, 0x80700370,
-	0xd5250000, 0x0001ff00,
-	0x00000080, 0xbf0a7b7c,
-	0xbf85fff4, 0xbf820011,
-	0xbe8303ff, 0x00000100,
-	0xbf800000, 0xbf800000,
-	0xbf800000, 0xd8d80000,
-	0x01000000, 0xbf8c0000,
-	0xe0704000, 0x705d0100,
-	0x807c037c, 0x80700370,
-	0xd5250000, 0x0001ff00,
-	0x00000100, 0xbf0a7b7c,
-	0xbf85fff4, 0xbefe03c1,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbf850004,
-	0xbef003ff, 0x00000200,
-	0xbeff0380, 0xbf820003,
-	0xbef003ff, 0x00000400,
-	0xbeff03c1, 0xb97b2a05,
-	0x807b817b, 0x8f7b827b,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbf850017,
+	0xbf850017, 0xbef603ff,
+	0x01000000, 0xbefc0384,
+	0xbf0a7b7c, 0xbf840037,
+	0x7e008700, 0x7e028701,
+	0x7e048702, 0x7e068703,
+	0xe0704000, 0x705d0000,
+	0xe0704080, 0x705d0100,
+	0xe0704100, 0x705d0200,
+	0xe0704180, 0x705d0300,
+	0x807c847c, 0x8070ff70,
+	0x00000200, 0xbf0a7b7c,
+	0xbf85ffef, 0xbf820025,
 	0xbef603ff, 0x01000000,
 	0xbefc0384, 0xbf0a7b7c,
-	0xbf840037, 0x7e008700,
+	0xbf840011, 0x7e008700,
 	0x7e028701, 0x7e048702,
 	0x7e068703, 0xe0704000,
-	0x705d0000, 0xe0704080,
-	0x705d0100, 0xe0704100,
-	0x705d0200, 0xe0704180,
+	0x705d0000, 0xe0704100,
+	0x705d0100, 0xe0704200,
+	0x705d0200, 0xe0704300,
 	0x705d0300, 0x807c847c,
-	0x8070ff70, 0x00000200,
+	0x8070ff70, 0x00000400,
 	0xbf0a7b7c, 0xbf85ffef,
-	0xbf820025, 0xbef603ff,
-	0x01000000, 0xbefc0384,
-	0xbf0a7b7c, 0xbf840020,
-	0x7e008700, 0x7e028701,
-	0x7e048702, 0x7e068703,
+	0xb97b1e06, 0x877bc17b,
+	0xbf84000c, 0x8f7b837b,
+	0x807b7c7b, 0xbefe03c1,
+	0xbeff0380, 0x7e008700,
 	0xe0704000, 0x705d0000,
-	0xe0704100, 0x705d0100,
-	0xe0704200, 0x705d0200,
-	0xe0704300, 0x705d0300,
-	0x807c847c, 0x8070ff70,
-	0x00000400, 0xbf0a7b7c,
-	0xbf85ffef, 0xb97b1e06,
-	0x877bc17b, 0xbf84000c,
-	0x8f7b837b, 0x807b7c7b,
-	0xbefe03c1, 0xbeff0380,
-	0x7e008700, 0xe0704000,
-	0x705d0000, 0x807c817c,
-	0x8070ff70, 0x00000080,
-	0xbf0a7b7c, 0xbf85fff8,
-	0xbf82013c, 0xbef4037e,
-	0x8775ff7f, 0x0000ffff,
-	0x8875ff75, 0x00040000,
-	0xbef60380, 0xbef703ff,
-	0x10807fac, 0x876eff7f,
-	0x08000000, 0x906e836e,
-	0x88776e77, 0x876eff7f,
-	0x70000000, 0x906e816e,
-	0x88776e77, 0xb97202dc,
-	0x8f729972, 0x8872727f,
+	0x807c817c, 0x8070ff70,
+	0x00000080, 0xbf0a7b7c,
+	0xbf85fff8, 0xbf82013b,
+	0xbef4037e, 0x8775ff7f,
+	0x0000ffff, 0x8875ff75,
+	0x00040000, 0xbef60380,
+	0xbef703ff, 0x10807fac,
+	0xb97202dc, 0x8f729972,
 	0x876eff7f, 0x04000000,
 	0xbf840034, 0xbefe03c1,
 	0x907c9972, 0x877c817c,
@@ -2345,10 +2372,11 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xb96f2a05, 0x806f816f,
 	0x8f6f826f, 0x907c9972,
 	0x877c817c, 0xbf06817c,
-	0xbf850021, 0xbef603ff,
+	0xbf850024, 0xbef603ff,
 	0x01000000, 0xbeee0378,
 	0x8078ff78, 0x00000200,
-	0xbefc0384, 0xe0304000,
+	0xbefc0384, 0xbf0a6f7c,
+	0xbf840050, 0xe0304000,
 	0x785d0000, 0xe0304080,
 	0x785d0100, 0xe0304100,
 	0x785d0200, 0xe0304180,
@@ -2361,94 +2389,97 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0x6e5d0000, 0xe0304080,
 	0x6e5d0100, 0xe0304100,
 	0x6e5d0200, 0xe0304180,
-	0x6e5d0300, 0xbf820032,
-	0xbef603ff, 0x01000000,
-	0xbeee0378, 0x8078ff78,
-	0x00000400, 0xbefc0384,
+	0x6e5d0300, 0xbf8c3f70,
+	0xbf820034, 0xbef603ff,
+	0x01000000, 0xbeee0378,
+	0x8078ff78, 0x00000400,
+	0xbefc0384, 0xbf0a6f7c,
+	0xbf840012, 0xe0304000,
+	0x785d0000, 0xe0304100,
+	0x785d0100, 0xe0304200,
+	0x785d0200, 0xe0304300,
+	0x785d0300, 0xbf8c3f70,
+	0x7e008500, 0x7e028501,
+	0x7e048502, 0x7e068503,
+	0x807c847c, 0x8078ff78,
+	0x00000400, 0xbf0a6f7c,
+	0xbf85ffee, 0xb96f1e06,
+	0x876fc16f, 0xbf84000e,
+	0x8f6f836f, 0x806f7c6f,
+	0xbefe03c1, 0xbeff0380,
 	0xe0304000, 0x785d0000,
-	0xe0304100, 0x785d0100,
-	0xe0304200, 0x785d0200,
-	0xe0304300, 0x785d0300,
 	0xbf8c3f70, 0x7e008500,
-	0x7e028501, 0x7e048502,
-	0x7e068503, 0x807c847c,
-	0x8078ff78, 0x00000400,
-	0xbf0a6f7c, 0xbf85ffee,
-	0xb96f1e06, 0x876fc16f,
-	0xbf84000e, 0x8f6f836f,
-	0x806f7c6f, 0xbefe03c1,
-	0xbeff0380, 0xe0304000,
-	0x785d0000, 0xbf8c3f70,
-	0x7e008500, 0x807c817c,
-	0x8078ff78, 0x00000080,
-	0xbf0a6f7c, 0xbf85fff7,
-	0xbeff03c1, 0xe0304000,
-	0x6e5d0000, 0xe0304100,
-	0x6e5d0100, 0xe0304200,
-	0x6e5d0200, 0xe0304300,
-	0x6e5d0300, 0xbf8c3f70,
+	0x807c817c, 0x8078ff78,
+	0x00000080, 0xbf0a6f7c,
+	0xbf85fff7, 0xbeff03c1,
+	0xe0304000, 0x6e5d0000,
+	0xe0304100, 0x6e5d0100,
+	0xe0304200, 0x6e5d0200,
+	0xe0304300, 0x6e5d0300,
+	0xbf8c3f70, 0xb9782a05,
+	0x80788178, 0xbf0d9972,
+	0xbf850002, 0x8f788978,
+	0xbf820001, 0x8f788a78,
+	0xb96e1e06, 0x8f6e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0x80f8ff78,
+	0x00000050, 0xbef603ff,
+	0x01000000, 0xbefc03ff,
+	0x0000006c, 0x80f89078,
+	0xf429003a, 0xf0000000,
+	0xbf8cc07f, 0x80fc847c,
+	0xbf800000, 0xbe803100,
+	0xbe823102, 0x80f8a078,
+	0xf42d003a, 0xf0000000,
+	0xbf8cc07f, 0x80fc887c,
+	0xbf800000, 0xbe803100,
+	0xbe823102, 0xbe843104,
+	0xbe863106, 0x80f8c078,
+	0xf431003a, 0xf0000000,
+	0xbf8cc07f, 0x80fc907c,
+	0xbf800000, 0xbe803100,
+	0xbe823102, 0xbe843104,
+	0xbe863106, 0xbe883108,
+	0xbe8a310a, 0xbe8c310c,
+	0xbe8e310e, 0xbf06807c,
+	0xbf84fff0, 0xba80f801,
+	0x00000000, 0xbf8a0000,
 	0xb9782a05, 0x80788178,
 	0xbf0d9972, 0xbf850002,
 	0x8f788978, 0xbf820001,
 	0x8f788a78, 0xb96e1e06,
 	0x8f6e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
-	0x80f8ff78, 0x00000050,
 	0xbef603ff, 0x01000000,
-	0xbefc03ff, 0x0000006c,
-	0x80f89078, 0xf429003a,
-	0xf0000000, 0xbf8cc07f,
-	0x80fc847c, 0xbf800000,
-	0xbe803100, 0xbe823102,
-	0x80f8a078, 0xf42d003a,
-	0xf0000000, 0xbf8cc07f,
-	0x80fc887c, 0xbf800000,
-	0xbe803100, 0xbe823102,
-	0xbe843104, 0xbe863106,
-	0x80f8c078, 0xf431003a,
-	0xf0000000, 0xbf8cc07f,
-	0x80fc907c, 0xbf800000,
-	0xbe803100, 0xbe823102,
-	0xbe843104, 0xbe863106,
-	0xbe883108, 0xbe8a310a,
-	0xbe8c310c, 0xbe8e310e,
-	0xbf06807c, 0xbf84fff0,
-	0xba80f801, 0x00000000,
-	0xbf8a0000, 0xb9782a05,
-	0x80788178, 0xbf0d9972,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb96e1e06, 0x8f6e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0xbef603ff,
-	0x01000000, 0xf4211bfa,
+	0xf4211bfa, 0xf0000000,
+	0x80788478, 0xf4211b3a,
 	0xf0000000, 0x80788478,
-	0xf4211b3a, 0xf0000000,
-	0x80788478, 0xf4211b7a,
+	0xf4211b7a, 0xf0000000,
+	0x80788478, 0xf4211c3a,
 	0xf0000000, 0x80788478,
-	0xf4211c3a, 0xf0000000,
-	0x80788478, 0xf4211c7a,
+	0xf4211c7a, 0xf0000000,
+	0x80788478, 0xf4211eba,
 	0xf0000000, 0x80788478,
-	0xf4211eba, 0xf0000000,
-	0x80788478, 0xf4211efa,
+	0xf4211efa, 0xf0000000,
+	0x80788478, 0xf4211e7a,
 	0xf0000000, 0x80788478,
-	0xf4211e7a, 0xf0000000,
-	0x80788478, 0xf4211cfa,
+	0xf4211cfa, 0xf0000000,
+	0x80788478, 0xf4211bba,
 	0xf0000000, 0x80788478,
+	0xbf8cc07f, 0xb9eef814,
 	0xf4211bba, 0xf0000000,
 	0x80788478, 0xbf8cc07f,
-	0xb9eef814, 0xf4211bba,
-	0xf0000000, 0x80788478,
-	0xbf8cc07f, 0xb9eef815,
-	0xbefc036f, 0xbefe0370,
-	0xbeff0371, 0x876f7bff,
-	0x000003ff, 0xb9ef4803,
-	0x876f7bff, 0xfffff800,
-	0x906f8b6f, 0xb9efa2c3,
-	0xb9f3f801, 0xb96e2a05,
-	0x806e816e, 0xbf0d9972,
-	0xbf850002, 0x8f6e896e,
-	0xbf820001, 0x8f6e8a6e,
+	0xb9eef815, 0xbefc036f,
+	0xbefe0370, 0xbeff0371,
+	0x876f7bff, 0x000003ff,
+	0xb9ef4803, 0x876f7bff,
+	0xfffff800, 0x906f8b6f,
+	0xb9efa2c3, 0xb9f3f801,
+	0xb96e2a05, 0x806e816e,
+	0xbf0d9972, 0xbf850002,
+	0x8f6e896e, 0xbf820001,
+	0x8f6e8a6e, 0xb96f1e06,
+	0x8f6f8a6f, 0x806e6f6e,
 	0x806eff6e, 0x00000200,
 	0x806e746e, 0x826f8075,
 	0x876fff6f, 0x0000ffff,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 5081f91190b8..0348191e8592 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -35,10 +35,9 @@
 
 var SINGLE_STEP_MISSED_WORKAROUND		= 1	//workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
 
-var SQ_WAVE_STATUS_INST_ATC_SHIFT		= 23
-var SQ_WAVE_STATUS_INST_ATC_MASK		= 0x00800000
 var SQ_WAVE_STATUS_SPI_PRIO_MASK		= 0x00000006
 var SQ_WAVE_STATUS_HALT_MASK			= 0x2000
+var SQ_WAVE_STATUS_ECC_ERR_MASK			= 0x20000
 
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT		= 12
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE		= 9
@@ -52,8 +51,10 @@ var SQ_WAVE_IB_STS2_WAVE64_SHIFT		= 11
 var SQ_WAVE_IB_STS2_WAVE64_SIZE			= 1
 
 var SQ_WAVE_TRAPSTS_SAVECTX_MASK		= 0x400
-var SQ_WAVE_TRAPSTS_EXCE_MASK			= 0x1FF
+var SQ_WAVE_TRAPSTS_EXCP_MASK			= 0x1FF
 var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT		= 10
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK		= 0x80
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT		= 7
 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK		= 0x100
 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT		= 8
 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK		= 0x3FF
@@ -63,46 +64,37 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK		= 0xFFFFF800
 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT		= 11
 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE		= 21
 var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK		= 0x800
+var SQ_WAVE_TRAPSTS_EXCP_HI_MASK		= 0x7000
+
+var SQ_WAVE_MODE_EXCP_EN_SHIFT			= 12
+var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT	= 19
 
-var SQ_WAVE_IB_STS_RCNT_SHIFT			= 16
 var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT		= 15
 var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT		= 25
-var SQ_WAVE_IB_STS_REPLAY_W64H_SIZE		= 1
 var SQ_WAVE_IB_STS_REPLAY_W64H_MASK		= 0x02000000
-var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE		= 1
-var SQ_WAVE_IB_STS_RCNT_SIZE			= 6
 var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK	= 0x003F8000
-var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG	= 0x00007FFF
 
 var SQ_WAVE_MODE_DEBUG_EN_MASK			= 0x800
 
-var SQ_BUF_RSRC_WORD1_ATC_SHIFT			= 24
-var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT		= 27
-
 // bits [31:24] unused by SPI debug data
 var TTMP11_SAVE_REPLAY_W64H_SHIFT		= 31
 var TTMP11_SAVE_REPLAY_W64H_MASK		= 0x80000000
 var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT		= 24
 var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK		= 0x7F000000
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT		= 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK		= 0x800000
 
 // SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
 // when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
 var S_SAVE_BUF_RSRC_WORD1_STRIDE		= 0x00040000
 var S_SAVE_BUF_RSRC_WORD3_MISC			= 0x10807FAC
-
-var S_SAVE_SPI_INIT_ATC_MASK			= 0x08000000
-var S_SAVE_SPI_INIT_ATC_SHIFT			= 27
-var S_SAVE_SPI_INIT_MTYPE_MASK			= 0x70000000
-var S_SAVE_SPI_INIT_MTYPE_SHIFT			= 28
+var S_SAVE_PC_HI_TRAP_ID_MASK			= 0x00FF0000
+var S_SAVE_PC_HI_HT_MASK			= 0x01000000
 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK		= 0x04000000
 var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT		= 26
 
-var S_SAVE_PC_HI_RCNT_SHIFT			= 26
-var S_SAVE_PC_HI_RCNT_MASK			= 0xFC000000
-var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT		= 25
-var S_SAVE_PC_HI_FIRST_REPLAY_MASK		= 0x02000000
-var S_SAVE_PC_HI_REPLAY_W64H_SHIFT		= 24
-var S_SAVE_PC_HI_REPLAY_W64H_MASK		= 0x01000000
+var S_SAVE_PC_HI_FIRST_WAVE_MASK		= 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT		= 31
 
 var s_sgpr_save_num				= 108
 
@@ -130,19 +122,10 @@ var s_save_ttmps_hi				= s_save_trapsts
 var S_RESTORE_BUF_RSRC_WORD1_STRIDE		= S_SAVE_BUF_RSRC_WORD1_STRIDE
 var S_RESTORE_BUF_RSRC_WORD3_MISC		= S_SAVE_BUF_RSRC_WORD3_MISC
 
-var S_RESTORE_SPI_INIT_ATC_MASK			= 0x08000000
-var S_RESTORE_SPI_INIT_ATC_SHIFT		= 27
-var S_RESTORE_SPI_INIT_MTYPE_MASK		= 0x70000000
-var S_RESTORE_SPI_INIT_MTYPE_SHIFT		= 28
 var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK		= 0x04000000
 var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT		= 26
 var S_WAVE_SIZE					= 25
 
-var S_RESTORE_PC_HI_RCNT_SHIFT			= S_SAVE_PC_HI_RCNT_SHIFT
-var S_RESTORE_PC_HI_RCNT_MASK			= S_SAVE_PC_HI_RCNT_MASK
-var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT		= S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-var S_RESTORE_PC_HI_FIRST_REPLAY_MASK		= S_SAVE_PC_HI_FIRST_REPLAY_MASK
-
 var s_restore_spi_init_lo			= exec_lo
 var s_restore_spi_init_hi			= exec_hi
 var s_restore_mem_offset			= ttmp12
@@ -179,51 +162,77 @@ L_JUMP_TO_RESTORE:
 
 L_SKIP_RESTORE:
 	s_getreg_b32	s_save_status, hwreg(HW_REG_STATUS)			//save STATUS since we will change SCC
-	s_andn2_b32	s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK
 
-if SINGLE_STEP_MISSED_WORKAROUND
-	// No single step exceptions if MODE.DEBUG_EN=0.
-	s_getreg_b32    ttmp2, hwreg(HW_REG_MODE)
-	s_and_b32       ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
-	s_cbranch_scc0  L_NO_SINGLE_STEP_WORKAROUND
+	// Clear SPI_PRIO: do not save with elevated priority.
+	// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+	s_andn2_b32	s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
+
+	s_getreg_b32	s_save_trapsts, hwreg(HW_REG_TRAPSTS)
 
-	// Second-level trap already handled exception if STATUS.HALT=1.
 	s_and_b32       ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+	s_cbranch_scc0	L_NOT_HALTED
+
+L_HALTED:
+	// Host trap may occur while wave is halted.
+	s_and_b32	ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
 
+L_CHECK_SAVE:
+	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+	s_cbranch_scc1	L_SAVE
+
+	// Wave is halted but neither host trap nor SAVECTX is raised.
+	// Caused by instruction fetch memory violation.
+	// Spin wait until context saved to prevent interrupt storm.
+	s_sleep		0x10
+	s_getreg_b32	s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+	s_branch	L_CHECK_SAVE
+
+L_NOT_HALTED:
+	// Let second-level handle non-SAVECTX exception or trap.
+	// Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+	// Check non-maskable exceptions. memory_violation, illegal_instruction
+	// and xnack_error exceptions always cause the wave to enter the trap
+	// handler.
+	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
+
+	// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+	// Maskable exceptions only cause the wave to enter the trap handler if
+	// their respective bit in mode.excp_en is set.
+	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+	s_cbranch_scc0	L_CHECK_TRAP_ID
+
+	s_and_b32	ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+	s_cbranch_scc0	L_NOT_ADDR_WATCH
+	s_bitset1_b32	ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
+
+L_NOT_ADDR_WATCH:
+	s_getreg_b32	ttmp3, hwreg(HW_REG_MODE)
+	s_lshl_b32	ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
+	s_and_b32	ttmp2, ttmp2, ttmp3
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+	// Check trap_id != 0
+	s_and_b32	ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
+
+if SINGLE_STEP_MISSED_WORKAROUND
 	// Prioritize single step exception over context save.
 	// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
-	s_cbranch_scc0  L_FETCH_2ND_TRAP
-
-L_NO_SINGLE_STEP_WORKAROUND:
+	s_getreg_b32	ttmp2, hwreg(HW_REG_MODE)
+	s_and_b32	ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
 end
 
-
-	s_getreg_b32	s_save_trapsts, hwreg(HW_REG_TRAPSTS)
-	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK	//check whether this is for save
+	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
 	s_cbranch_scc1	L_SAVE
 
-	// If STATUS.MEM_VIOL is asserted then halt the wave to prevent
-	// the exception raising again and blocking context save.
-	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
-	s_cbranch_scc0	L_FETCH_2ND_TRAP
-	s_or_b32	s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
-
 L_FETCH_2ND_TRAP:
-
 #if ASIC_TARGET_NAVI1X
-	// Preserve and clear scalar XNACK state before issuing scalar loads.
-	// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
-	// unused space ttmp11[31:24].
-	s_andn2_b32	ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
-	s_getreg_b32	ttmp2, hwreg(HW_REG_IB_STS)
-	s_and_b32	ttmp3, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
-	s_lshl_b32	ttmp3, ttmp3, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
-	s_or_b32	ttmp11, ttmp11, ttmp3
-	s_and_b32	ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
-	s_lshl_b32	ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
-	s_or_b32	ttmp11, ttmp11, ttmp3
-	s_andn2_b32	ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
-	s_setreg_b32	hwreg(HW_REG_IB_STS), ttmp2
+	save_and_clear_ib_sts(ttmp14, ttmp15)
 #endif
 
 	// Read second-level TBA/TMA from first-level TMA and jump if available.
@@ -232,31 +241,49 @@ L_FETCH_2ND_TRAP:
 	s_getreg_b32	ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
 	s_getreg_b32	ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
 	s_lshl_b64	[ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+	s_load_dword    ttmp2, [ttmp14, ttmp15], 0x10 glc:1			// debug trap enabled flag
+	s_waitcnt       lgkmcnt(0)
+	s_lshl_b32      ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+	s_andn2_b32     ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+	s_or_b32        ttmp11, ttmp11, ttmp2
+
 	s_load_dwordx2	[ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1		// second-level TBA
 	s_waitcnt	lgkmcnt(0)
 	s_load_dwordx2	[ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1		// second-level TMA
 	s_waitcnt	lgkmcnt(0)
+
 	s_and_b64	[ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
 	s_cbranch_scc0	L_NO_NEXT_TRAP						// second-level trap handler not been set
 	s_setpc_b64	[ttmp2, ttmp3]						// jump to second-level trap handler
 
 L_NO_NEXT_TRAP:
-	s_getreg_b32	s_save_trapsts, hwreg(HW_REG_TRAPSTS)
-	s_and_b32	s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK
-	s_cbranch_scc1	L_EXCP_CASE						// Exception, jump back to the shader program directly.
-	s_add_u32	ttmp0, ttmp0, 4						// S_TRAP case, add 4 to ttmp0
-	s_addc_u32	ttmp1, ttmp1, 0
-L_EXCP_CASE:
+	// If not caused by trap then halt wave to prevent re-entry.
+	s_and_b32	ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
+	s_cbranch_scc1	L_TRAP_CASE
+	s_or_b32	s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+	// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+	// Rewind the PC to prevent this from occurring.
+	s_sub_u32	ttmp0, ttmp0, 0x8
+	s_subb_u32	ttmp1, ttmp1, 0x0
+
+	s_branch	L_EXIT_TRAP
+
+L_TRAP_CASE:
+	// Host trap will not cause trap re-entry.
+	s_and_b32	ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+	s_cbranch_scc1	L_EXIT_TRAP
+
+	// Advance past trap instruction to prevent re-entry.
+	s_add_u32	ttmp0, ttmp0, 0x4
+	s_addc_u32	ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
 	s_and_b32	ttmp1, ttmp1, 0xFFFF
 
 #if ASIC_TARGET_NAVI1X
-	// Restore SQ_WAVE_IB_STS.
-	s_lshr_b32	ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
-	s_and_b32	ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
-	s_lshr_b32	ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
-	s_and_b32	ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
-	s_or_b32	ttmp2, ttmp2, ttmp3
-	s_setreg_b32	hwreg(HW_REG_IB_STS), ttmp2
+	restore_ib_sts(ttmp14, ttmp15)
 #endif
 
 	// Restore SQ_WAVE_STATUS.
@@ -272,19 +299,7 @@ L_SAVE:
 	s_setreg_b32	hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp	//clear saveCtx bit
 
 #if ASIC_TARGET_NAVI1X
-	s_getreg_b32	s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)
-	s_lshl_b32	s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
-	s_or_b32	s_save_pc_hi, s_save_pc_hi, s_save_tmp
-	s_getreg_b32	s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE)
-	s_lshl_b32	s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-	s_or_b32	s_save_pc_hi, s_save_pc_hi, s_save_tmp
-	s_getreg_b32	s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT, SQ_WAVE_IB_STS_REPLAY_W64H_SIZE)
-	s_lshl_b32	s_save_tmp, s_save_tmp, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
-	s_or_b32	s_save_pc_hi, s_save_pc_hi, s_save_tmp
-	s_getreg_b32	s_save_tmp, hwreg(HW_REG_IB_STS)			//clear RCNT and FIRST_REPLAY and REPLAY_W64H in IB_STS
-	s_and_b32	s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
-
-	s_setreg_b32	hwreg(HW_REG_IB_STS), s_save_tmp
+	save_and_clear_ib_sts(s_save_tmp, s_save_trapsts)
 #endif
 
 	/* inform SPI the readiness and wait for SPI's go signal */
@@ -305,16 +320,57 @@ L_SLEEP:
 	s_waitcnt	lgkmcnt(0)
 #endif
 
+	// Save first_wave flag so we can clear high bits of save address.
+	s_and_b32	s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+	s_lshl_b32	s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+	s_or_b32	s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
+#if NO_SQC_STORE
+	// Trap temporaries must be saved via VGPR but all VGPRs are in use.
+	// There is no ttmp space to hold the resource constant for VGPR save.
+	// Save v0 by itself since it requires only two SGPRs.
+	s_mov_b32	s_save_ttmps_lo, exec_lo
+	s_and_b32	s_save_ttmps_hi, exec_hi, 0xFFFF
+	s_mov_b32	exec_lo, 0xFFFFFFFF
+	s_mov_b32	exec_hi, 0xFFFFFFFF
+	global_store_dword_addtid	v0, [s_save_ttmps_lo, s_save_ttmps_hi] slc:1 glc:1
+	v_mov_b32	v0, 0x0
+	s_mov_b32	exec_lo, s_save_ttmps_lo
+	s_mov_b32	exec_hi, s_save_ttmps_hi
+#endif
+
 	// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
-	// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+	// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
 	get_wave_size(s_save_ttmps_hi)
 	get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+	get_svgpr_size_bytes(s_save_ttmps_hi)
+	s_add_u32	s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
 	s_and_b32	s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
 	s_add_u32	s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
 	s_add_u32	s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
 	s_addc_u32	s_save_ttmps_hi, s_save_ttmps_hi, 0x0
 
-#if ASIC_TARGET_NAVI1X
+#if NO_SQC_STORE
+	v_writelane_b32	v0, ttmp4, 0x4
+	v_writelane_b32	v0, ttmp5, 0x5
+	v_writelane_b32	v0, ttmp6, 0x6
+	v_writelane_b32	v0, ttmp7, 0x7
+	v_writelane_b32	v0, ttmp8, 0x8
+	v_writelane_b32	v0, ttmp9, 0x9
+	v_writelane_b32	v0, ttmp10, 0xA
+	v_writelane_b32	v0, ttmp11, 0xB
+	v_writelane_b32	v0, ttmp13, 0xD
+	v_writelane_b32	v0, exec_lo, 0xE
+	v_writelane_b32	v0, exec_hi, 0xF
+
+	s_mov_b32	exec_lo, 0x3FFF
+	s_mov_b32	exec_hi, 0x0
+	global_store_dword_addtid	v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 slc:1 glc:1
+	v_readlane_b32	ttmp14, v0, 0xE
+	v_readlane_b32	ttmp15, v0, 0xF
+	s_mov_b32	exec_lo, ttmp14
+	s_mov_b32	exec_hi, ttmp15
+#else
 	s_store_dwordx4	[ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
 	s_store_dwordx4	[ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
 	s_store_dword   ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
@@ -326,12 +382,6 @@ L_SLEEP:
 	s_or_b32	s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
 	s_mov_b32	s_save_buf_rsrc2, 0					//NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
 	s_mov_b32	s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
-	s_and_b32	s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
-	s_lshr_b32	s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
-	s_or_b32	s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp		//or ATC
-	s_and_b32	s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
-	s_lshr_b32	s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
-	s_or_b32	s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp		//or MTYPE
 
 	s_mov_b32	s_save_m0, m0
 
@@ -361,7 +411,9 @@ L_SAVE_4VGPR_WAVE32:
 
 	// VGPR Allocated in 4-GPR granularity
 
+#if !NO_SQC_STORE
 	buffer_store_dword	v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#endif
 	buffer_store_dword	v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
 	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
 	buffer_store_dword	v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
@@ -372,7 +424,9 @@ L_SAVE_4VGPR_WAVE64:
 
 	// VGPR Allocated in 4-GPR granularity
 
+#if !NO_SQC_STORE
 	buffer_store_dword	v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#endif
 	buffer_store_dword	v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
 	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
 	buffer_store_dword	v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
@@ -397,7 +451,8 @@ L_SAVE_HWREG:
 
 	write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
 	write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
-	write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+	s_andn2_b32	s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+	write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
 	write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)
 	write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
 	write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
@@ -418,9 +473,13 @@ L_SAVE_HWREG:
 	write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
 
 #if NO_SQC_STORE
-	// Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case.
+	// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+	s_mov_b32       exec_lo, 0xFFFF
 	s_mov_b32	exec_hi, 0x0
 	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+	// Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
+	s_mov_b32       exec_lo, 0xFFFFFFFF
 #endif
 
 	/* save SGPRs */
@@ -506,7 +565,7 @@ L_SAVE_LDS_NORMAL:
 	s_cbranch_scc0	L_SAVE_LDS_DONE						//no lds used? jump to L_SAVE_DONE
 
 	s_barrier								//LDS is used? wait for other waves in the same TG
-	s_and_b32	s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+	s_and_b32	s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
 	s_cbranch_scc0	L_SAVE_LDS_DONE
 
 	// first wave do LDS save;
@@ -628,7 +687,7 @@ L_SAVE_VGPR_WAVE64:
 	// VGPR store using dw burst
 	s_mov_b32	m0, 0x4							//VGPR initial index value =4
 	s_cmp_lt_u32	m0, s_save_alloc_size
-	s_cbranch_scc0	L_SAVE_VGPR_END
+	s_cbranch_scc0	L_SAVE_SHARED_VGPR
 
 L_SAVE_VGPR_W64_LOOP:
 	v_movrels_b32	v0, v0							//v0 = v[0+m0]
@@ -646,6 +705,7 @@ L_SAVE_VGPR_W64_LOOP:
 	s_cmp_lt_u32	m0, s_save_alloc_size					//scc = (m0 < s_save_alloc_size) ? 1 : 0
 	s_cbranch_scc1	L_SAVE_VGPR_W64_LOOP					//VGPR save is complete?
 
+L_SAVE_SHARED_VGPR:
 	//Below part will be the save shared vgpr part (new for gfx10)
 	s_getreg_b32	s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
 	s_and_b32	s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF	//shared_vgpr_size is zero?
@@ -674,12 +734,7 @@ L_RESTORE:
 	s_or_b32	s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
 	s_mov_b32	s_restore_buf_rsrc2, 0					//NUM_RECORDS initial value = 0 (in bytes)
 	s_mov_b32	s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
-	s_and_b32	s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
-	s_lshr_b32	s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
-	s_or_b32	s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp	//or ATC
-	s_and_b32	s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
-	s_lshr_b32	s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
-	s_or_b32	s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp	//or MTYPE
+
 	//determine it is wave32 or wave64
 	get_wave_size(s_restore_size)
 
@@ -765,6 +820,8 @@ L_RESTORE_VGPR_NORMAL:
 	s_mov_b32	s_restore_mem_offset_save, s_restore_mem_offset		// restore start with v1, v0 will be the last
 	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 128*4
 	s_mov_b32	m0, 4							//VGPR initial index value = 4
+	s_cmp_lt_u32	m0, s_restore_alloc_size
+	s_cbranch_scc0	L_RESTORE_SGPR
 
 L_RESTORE_VGPR_WAVE32_LOOP:
 	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
@@ -786,6 +843,7 @@ L_RESTORE_VGPR_WAVE32_LOOP:
 	buffer_load_dword	v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128
 	buffer_load_dword	v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2
 	buffer_load_dword	v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3
+	s_waitcnt	vmcnt(0)
 
 	s_branch	L_RESTORE_SGPR
 
@@ -796,6 +854,8 @@ L_RESTORE_VGPR_WAVE64:
 	s_mov_b32	s_restore_mem_offset_save, s_restore_mem_offset		// restore start with v4, v0 will be the last
 	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 256*4
 	s_mov_b32	m0, 4							//VGPR initial index value = 4
+	s_cmp_lt_u32	m0, s_restore_alloc_size
+	s_cbranch_scc0	L_RESTORE_SHARED_VGPR
 
 L_RESTORE_VGPR_WAVE64_LOOP:
 	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
@@ -812,6 +872,7 @@ L_RESTORE_VGPR_WAVE64_LOOP:
 	s_cmp_lt_u32	m0, s_restore_alloc_size				//scc = (m0 < s_restore_alloc_size) ? 1 : 0
 	s_cbranch_scc1	L_RESTORE_VGPR_WAVE64_LOOP				//VGPR restore (except v0) is complete?
 
+L_RESTORE_SHARED_VGPR:
 	//Below part will be the restore shared vgpr part (new for gfx10)
 	s_getreg_b32	s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)	//shared_vgpr_size
 	s_and_b32	s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF	//shared_vgpr_size is zero?
@@ -945,8 +1006,10 @@ L_RESTORE_HWREG:
 	s_setreg_b32	hwreg(HW_REG_MODE), s_restore_mode
 
 	// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
-	// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+	// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
 	get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+	get_svgpr_size_bytes(s_restore_ttmps_hi)
+	s_add_u32	s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
 	s_add_u32	s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
 	s_add_u32	s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
 	s_addc_u32	s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
@@ -957,23 +1020,7 @@ L_RESTORE_HWREG:
 	s_waitcnt	lgkmcnt(0)
 
 #if ASIC_TARGET_NAVI1X
-	s_and_b32	s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
-	s_lshr_b32	s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
-	s_lshl_b32	s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
-	s_mov_b32	s_restore_tmp, 0x0
-	s_or_b32	s_restore_tmp, s_restore_tmp, s_restore_m0
-	s_and_b32	s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
-	s_lshr_b32	s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-	s_lshl_b32	s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
-	s_or_b32	s_restore_tmp, s_restore_tmp, s_restore_m0
-	s_and_b32	s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK
-	s_lshr_b32	s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
-	s_lshl_b32	s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT
-	s_or_b32	s_restore_tmp, s_restore_tmp, s_restore_m0
-
-	s_and_b32	s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
-	s_lshr_b32	s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
-	s_setreg_b32 	hwreg(HW_REG_IB_STS), s_restore_tmp
+	restore_ib_sts(s_restore_tmp, s_restore_m0)
 #endif
 
 	s_and_b32	s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff		//pc[47:32] //Do it here in order not to affect STATUS
@@ -1089,5 +1136,29 @@ end
 function get_wave_size(s_reg)
 	s_getreg_b32	s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
 	s_lshl_b32	s_reg, s_reg, S_WAVE_SIZE
-	s_or_b32	s_reg, s_save_spi_init_hi, s_reg			//share with exec_hi, it's at bit25
+end
+
+function save_and_clear_ib_sts(tmp1, tmp2)
+	// Preserve and clear scalar XNACK state before issuing scalar loads.
+	// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
+	// unused space ttmp11[31:24].
+	s_andn2_b32	ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
+	s_getreg_b32	tmp1, hwreg(HW_REG_IB_STS)
+	s_and_b32	tmp2, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
+	s_lshl_b32	tmp2, tmp2, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
+	s_or_b32	ttmp11, ttmp11, tmp2
+	s_and_b32	tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+	s_lshl_b32	tmp2, tmp2, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+	s_or_b32	ttmp11, ttmp11, tmp2
+	s_andn2_b32	tmp1, tmp1, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
+	s_setreg_b32	hwreg(HW_REG_IB_STS), tmp1
+end
+
+function restore_ib_sts(tmp1, tmp2)
+	s_lshr_b32	tmp1, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+	s_and_b32	tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+	s_lshr_b32	tmp1, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
+	s_and_b32	tmp1, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
+	s_or_b32	tmp1, tmp1, tmp2
+	s_setreg_b32	hwreg(HW_REG_IB_STS), tmp1
 end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index eed78a04e7c7..6770cbe3250a 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -46,8 +46,6 @@ var SINGLE_STEP_MISSED_WORKAROUND   =	1		    //workaround for lost MODE.DEBUG_EN
 /**************************************************************************/
 /*			variables					  */
 /**************************************************************************/
-var SQ_WAVE_STATUS_INST_ATC_SHIFT  = 23
-var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
 var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
 var SQ_WAVE_STATUS_HALT_MASK       = 0x2000
@@ -56,6 +54,7 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE    = 1
 var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT  = 3
 var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE   = 29
 var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK    = 0x400000
+var SQ_WAVE_STATUS_ECC_ERR_MASK         = 0x20000
 
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT	= 12
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE	= 9
@@ -72,8 +71,10 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT	= 8
 #endif
 
 var SQ_WAVE_TRAPSTS_SAVECTX_MASK    =	0x400
-var SQ_WAVE_TRAPSTS_EXCE_MASK	    =	0x1FF			// Exception mask
+var SQ_WAVE_TRAPSTS_EXCP_MASK	    =	0x1FF
 var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT   =	10
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK =	0x80
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT =	7
 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK   =	0x100
 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT  =	8
 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK	=   0x3FF
@@ -83,37 +84,30 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK	=   0xFFFFF800
 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT	=   11
 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE	=   21
 var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK	=   0x800
+var SQ_WAVE_TRAPSTS_EXCP_HI_MASK	=   0x7000
 var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK	=   0x10000000
 
-var SQ_WAVE_IB_STS_RCNT_SHIFT		=   16			//FIXME
+var SQ_WAVE_MODE_EXCP_EN_SHIFT		=   12
+var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT	= 19
+
 var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT	=   15			//FIXME
 var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK	= 0x1F8000
-var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG	= 0x00007FFF	//FIXME
 
 var SQ_WAVE_MODE_DEBUG_EN_MASK		=   0x800
 
-var SQ_BUF_RSRC_WORD1_ATC_SHIFT	    =	24
-var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT   =	27
-
 var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT	=   26			// bits [31:26] unused by SPI debug data
 var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK	=   0xFC000000
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT	=   23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK	=   0x800000
 
 /*	Save	    */
 var S_SAVE_BUF_RSRC_WORD1_STRIDE	=   0x00040000		//stride is 4 bytes
 var S_SAVE_BUF_RSRC_WORD3_MISC		=   0x00807FAC		//SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
-
-var S_SAVE_SPI_INIT_ATC_MASK		=   0x08000000		//bit[27]: ATC bit
-var S_SAVE_SPI_INIT_ATC_SHIFT		=   27
-var S_SAVE_SPI_INIT_MTYPE_MASK		=   0x70000000		//bit[30:28]: Mtype
-var S_SAVE_SPI_INIT_MTYPE_SHIFT		=   28
+var S_SAVE_PC_HI_TRAP_ID_MASK		=   0x00FF0000
+var S_SAVE_PC_HI_HT_MASK		=   0x01000000
 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK	=   0x04000000		//bit[26]: FirstWaveInTG
 var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT	=   26
 
-var S_SAVE_PC_HI_RCNT_SHIFT		=   27			//FIXME	 check with Brian to ensure all fields other than PC[47:0] can be used
-var S_SAVE_PC_HI_RCNT_MASK		=   0xF8000000		//FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT	=   26			//FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_MASK	=   0x04000000		//FIXME
-
 var s_save_spi_init_lo		    =	exec_lo
 var s_save_spi_init_hi		    =	exec_hi
 
@@ -140,18 +134,9 @@ var s_save_ttmps_hi	    =	s_save_trapsts		//no conflict
 var S_RESTORE_BUF_RSRC_WORD1_STRIDE	    =	S_SAVE_BUF_RSRC_WORD1_STRIDE
 var S_RESTORE_BUF_RSRC_WORD3_MISC	    =	S_SAVE_BUF_RSRC_WORD3_MISC
 
-var S_RESTORE_SPI_INIT_ATC_MASK		    =	0x08000000	    //bit[27]: ATC bit
-var S_RESTORE_SPI_INIT_ATC_SHIFT	    =	27
-var S_RESTORE_SPI_INIT_MTYPE_MASK	    =	0x70000000	    //bit[30:28]: Mtype
-var S_RESTORE_SPI_INIT_MTYPE_SHIFT	    =	28
 var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK	    =	0x04000000	    //bit[26]: FirstWaveInTG
 var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT	    =	26
 
-var S_RESTORE_PC_HI_RCNT_SHIFT		    =	S_SAVE_PC_HI_RCNT_SHIFT
-var S_RESTORE_PC_HI_RCNT_MASK		    =	S_SAVE_PC_HI_RCNT_MASK
-var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT	    =	S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-var S_RESTORE_PC_HI_FIRST_REPLAY_MASK	    =	S_SAVE_PC_HI_FIRST_REPLAY_MASK
-
 var s_restore_spi_init_lo		    =	exec_lo
 var s_restore_spi_init_hi		    =	exec_hi
 
@@ -199,71 +184,77 @@ L_JUMP_TO_RESTORE:
 L_SKIP_RESTORE:
 
     s_getreg_b32    s_save_status, hwreg(HW_REG_STATUS)				    //save STATUS since we will change SCC
-    s_andn2_b32	    s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK	    //check whether this is for save
 
-if SINGLE_STEP_MISSED_WORKAROUND
-    // No single step exceptions if MODE.DEBUG_EN=0.
-    s_getreg_b32    ttmp2, hwreg(HW_REG_MODE)
-    s_and_b32       ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
-    s_cbranch_scc0  L_NO_SINGLE_STEP_WORKAROUND
+    // Clear SPI_PRIO: do not save with elevated priority.
+    // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+    s_andn2_b32     s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
 
-    // Second-level trap already handled exception if STATUS.HALT=1.
-    s_and_b32       ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
 
-    // Prioritize single step exception over context save.
-    // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
-    s_cbranch_scc0  L_FETCH_2ND_TRAP
+    s_and_b32       ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+    s_cbranch_scc0  L_NOT_HALTED
 
-L_NO_SINGLE_STEP_WORKAROUND:
-end
+L_HALTED:
+    // Host trap may occur while wave is halted.
+    s_and_b32       ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
 
-    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+L_CHECK_SAVE:
     s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK    //check whether this is for save
     s_cbranch_scc1  L_SAVE					//this is the operation for save
 
-    // *********    Handle non-CWSR traps	*******************
-
-    // Illegal instruction is a non-maskable exception which blocks context save.
-    // Halt the wavefront and return from the trap.
-    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
-    s_cbranch_scc1  L_HALT_WAVE
-
-    // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
-    // Instead, halt the wavefront and return from the trap.
-    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
-    s_cbranch_scc0  L_FETCH_2ND_TRAP
-
-L_HALT_WAVE:
-    // If STATUS.HALT is set then this fault must come from SQC instruction fetch.
-    // We cannot prevent further faults. Spin wait until context saved.
-    s_and_b32       ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
-    s_cbranch_scc0  L_NOT_ALREADY_HALTED
-
-L_WAIT_CTX_SAVE:
+    // Wave is halted but neither host trap nor SAVECTX is raised.
+    // Caused by instruction fetch memory violation.
+    // Spin wait until context saved to prevent interrupt storm.
     s_sleep         0x10
-    s_getreg_b32    ttmp2, hwreg(HW_REG_TRAPSTS)
-    s_and_b32       ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
-    s_cbranch_scc0  L_WAIT_CTX_SAVE
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    s_branch        L_CHECK_SAVE
+
+L_NOT_HALTED:
+    // Let second-level handle non-SAVECTX exception or trap.
+    // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+    // Check non-maskable exceptions. memory_violation, illegal_instruction
+    // and xnack_error exceptions always cause the wave to enter the trap
+    // handler.
+    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
+
+    // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+    // Maskable exceptions only cause the wave to enter the trap handler if
+    // their respective bit in mode.excp_en is set.
+    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+    s_cbranch_scc0  L_CHECK_TRAP_ID
+
+    s_and_b32       ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+    s_cbranch_scc0  L_NOT_ADDR_WATCH
+    s_bitset1_b32   ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
+
+L_NOT_ADDR_WATCH:
+    s_getreg_b32    ttmp3, hwreg(HW_REG_MODE)
+    s_lshl_b32      ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
+    s_and_b32       ttmp2, ttmp2, ttmp3
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+    // Check trap_id != 0
+    s_and_b32       ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
 
-L_NOT_ALREADY_HALTED:
-    s_or_b32        s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+if SINGLE_STEP_MISSED_WORKAROUND
+    // Prioritize single step exception over context save.
+    // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+    s_getreg_b32    ttmp2, hwreg(HW_REG_MODE)
+    s_and_b32       ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
+end
 
-    // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
-    // Rewind the PC to prevent this from occurring. The debugger compensates for this.
-    s_sub_u32       ttmp0, ttmp0, 0x8
-    s_subb_u32      ttmp1, ttmp1, 0x0
+    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+    s_cbranch_scc1  L_SAVE
 
 L_FETCH_2ND_TRAP:
     // Preserve and clear scalar XNACK state before issuing scalar reads.
-    // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
-    s_getreg_b32    ttmp2, hwreg(HW_REG_IB_STS)
-    s_and_b32       ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
-    s_lshl_b32      ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
-    s_andn2_b32     ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
-    s_or_b32        ttmp11, ttmp11, ttmp3
-
-    s_andn2_b32     ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
-    s_setreg_b32    hwreg(HW_REG_IB_STS), ttmp2
+    save_and_clear_ib_sts(ttmp14)
 
     // Read second-level TBA/TMA from first-level TMA and jump if available.
     // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
@@ -271,27 +262,48 @@ L_FETCH_2ND_TRAP:
     s_getreg_b32    ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
     s_getreg_b32    ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
     s_lshl_b64      [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+    s_load_dword    ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
+    s_waitcnt       lgkmcnt(0)
+    s_lshl_b32      ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+    s_andn2_b32     ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+    s_or_b32        ttmp11, ttmp11, ttmp2
+
     s_load_dwordx2  [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
     s_waitcnt       lgkmcnt(0)
     s_load_dwordx2  [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
     s_waitcnt       lgkmcnt(0)
+
     s_and_b64       [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
     s_cbranch_scc0  L_NO_NEXT_TRAP // second-level trap handler not been set
     s_setpc_b64     [ttmp2, ttmp3] // jump to second-level trap handler
 
 L_NO_NEXT_TRAP:
-    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
-    s_and_b32	    s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
-    s_cbranch_scc1  L_EXCP_CASE	  // Exception, jump back to the shader program directly.
-    s_add_u32	    ttmp0, ttmp0, 4   // S_TRAP case, add 4 to ttmp0
-    s_addc_u32	ttmp1, ttmp1, 0
-L_EXCP_CASE:
+    // If not caused by trap then halt wave to prevent re-entry.
+    s_and_b32       ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
+    s_cbranch_scc1  L_TRAP_CASE
+    s_or_b32        s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+    // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+    // Rewind the PC to prevent this from occurring.
+    s_sub_u32       ttmp0, ttmp0, 0x8
+    s_subb_u32      ttmp1, ttmp1, 0x0
+
+    s_branch        L_EXIT_TRAP
+
+L_TRAP_CASE:
+    // Host trap will not cause trap re-entry.
+    s_and_b32       ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+    s_cbranch_scc1  L_EXIT_TRAP
+
+    // Advance past trap instruction to prevent re-entry.
+    s_add_u32       ttmp0, ttmp0, 0x4
+    s_addc_u32      ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
     s_and_b32	ttmp1, ttmp1, 0xFFFF
 
-    // Restore SQ_WAVE_IB_STS.
-    s_lshr_b32      ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
-    s_and_b32       ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
-    s_setreg_b32    hwreg(HW_REG_IB_STS), ttmp2
+    restore_ib_sts(ttmp14)
 
     // Restore SQ_WAVE_STATUS.
     s_and_b64       exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@@ -312,16 +324,7 @@ L_SAVE:
     s_mov_b32	    s_save_tmp, 0							    //clear saveCtx bit
     s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp	    //clear saveCtx bit
 
-    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)		    //save RCNT
-    s_lshl_b32	    s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
-    s_or_b32	    s_save_pc_hi, s_save_pc_hi, s_save_tmp
-    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE)   //save FIRST_REPLAY
-    s_lshl_b32	    s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-    s_or_b32	    s_save_pc_hi, s_save_pc_hi, s_save_tmp
-    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS)					    //clear RCNT and FIRST_REPLAY in IB_STS
-    s_and_b32	    s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
-
-    s_setreg_b32    hwreg(HW_REG_IB_STS), s_save_tmp
+    save_and_clear_ib_sts(s_save_tmp)
 
     /*	    inform SPI the readiness and wait for SPI's go signal */
     s_mov_b32	    s_save_exec_lo, exec_lo						    //save EXEC and use EXEC for the go signal from SPI
@@ -360,12 +363,6 @@ L_SAVE:
     s_or_b32	    s_save_buf_rsrc1,	s_save_buf_rsrc1,  S_SAVE_BUF_RSRC_WORD1_STRIDE
     s_mov_b32	    s_save_buf_rsrc2,	0									//NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
     s_mov_b32	    s_save_buf_rsrc3,	S_SAVE_BUF_RSRC_WORD3_MISC
-    s_and_b32	    s_save_tmp,		s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
-    s_lshr_b32	    s_save_tmp,		s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)	    //get ATC bit into position
-    s_or_b32	    s_save_buf_rsrc3,	s_save_buf_rsrc3,  s_save_tmp						//or ATC
-    s_and_b32	    s_save_tmp,		s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
-    s_lshr_b32	    s_save_tmp,		s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)	    //get MTYPE bits into position
-    s_or_b32	    s_save_buf_rsrc3,	s_save_buf_rsrc3,  s_save_tmp						//or MTYPE
 
     //FIXME  right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi  (might need to save them before using them?)
     s_mov_b32	    s_save_m0,		m0								    //save M0
@@ -690,12 +687,6 @@ L_RESTORE:
     s_or_b32	    s_restore_buf_rsrc1,    s_restore_buf_rsrc1,  S_RESTORE_BUF_RSRC_WORD1_STRIDE
     s_mov_b32	    s_restore_buf_rsrc2,    0										    //NUM_RECORDS initial value = 0 (in bytes)
     s_mov_b32	    s_restore_buf_rsrc3,    S_RESTORE_BUF_RSRC_WORD3_MISC
-    s_and_b32	    s_restore_tmp,	    s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
-    s_lshr_b32	    s_restore_tmp,	    s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)	    //get ATC bit into position
-    s_or_b32	    s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp						    //or ATC
-    s_and_b32	    s_restore_tmp,	    s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
-    s_lshr_b32	    s_restore_tmp,	    s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)   //get MTYPE bits into position
-    s_or_b32	    s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp						    //or MTYPE
 
     /*	    global mem offset		*/
 //  s_mov_b32	    s_restore_mem_offset, 0x0				    //mem offset initial value = 0
@@ -889,19 +880,7 @@ L_RESTORE:
     s_load_dword    ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
     s_waitcnt	    lgkmcnt(0)
 
-    //reuse s_restore_m0 as a temp register
-    s_and_b32	    s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
-    s_lshr_b32	    s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
-    s_lshl_b32	    s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
-    s_mov_b32	    s_restore_tmp, 0x0										    //IB_STS is zero
-    s_or_b32	    s_restore_tmp, s_restore_tmp, s_restore_m0
-    s_and_b32	    s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
-    s_lshr_b32	    s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-    s_lshl_b32	    s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
-    s_or_b32	    s_restore_tmp, s_restore_tmp, s_restore_m0
-    s_and_b32	    s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
-    s_lshr_b32	    s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
-    s_setreg_b32    hwreg(HW_REG_IB_STS),   s_restore_tmp
+    restore_ib_sts(s_restore_tmp)
 
     s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff	//pc[47:32]	   //Do it here in order not to affect STATUS
     s_and_b64	 exec, exec, exec  // Restore STATUS.EXECZ, not writable by s_setreg_b32
@@ -910,8 +889,7 @@ L_RESTORE:
 
     s_barrier							//barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
 
-//  s_rfe_b64 s_restore_pc_lo					//Return to the main shader program and resume execution
-    s_rfe_restore_b64  s_restore_pc_lo, s_restore_m0		// s_restore_m0[0] is used to set STATUS.inst_atc
+    s_rfe_b64 s_restore_pc_lo					//Return to the main shader program and resume execution
 
 
 /**************************************************************************/
@@ -1078,3 +1056,19 @@ function set_status_without_spi_prio(status, tmp)
     s_nop           0x2 // avoid S_SETREG => S_SETREG hazard
     s_setreg_b32    hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
 end
+
+function save_and_clear_ib_sts(tmp)
+    // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
+    s_getreg_b32    tmp, hwreg(HW_REG_IB_STS)
+    s_and_b32       tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+    s_lshl_b32      tmp, tmp, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+    s_andn2_b32     ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
+    s_or_b32        ttmp11, ttmp11, tmp
+    s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0
+end
+
+function restore_ib_sts(tmp)
+    s_lshr_b32      tmp, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+    s_and_b32       tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+    s_setreg_b32    hwreg(HW_REG_IB_STS), tmp
+end
commit 6880ed280edf292c542aa87567547ffb9c222597
Author: Alan Liu <HaoPing.Liu at amd.com>
Date:   Tue May 17 22:30:00 2022 +0800

    drm/amd/display: Add HDMI_ACP_SEND register
    
    Define HDMI_ACP_SEND register shift/mask.
    
    Signed-off-by: Alan Liu <HaoPing.Liu at amd.com>
    Reviewed-by: Harry Wentland <harry.wentland at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h
index c755f43aaaf8..7a2c6b12c249 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h
@@ -6070,6 +6070,8 @@
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
+#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
+#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h
index 14a3bacfcfd1..fa1f4374fafe 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h
@@ -6058,6 +6058,8 @@
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
+#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
+#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h
index 106094ed0661..39f6fde6db1d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h
@@ -7142,6 +7142,8 @@
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
+#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
+#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h
index bcd190a3fcdd..c5f4afac3b39 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h
@@ -37285,12 +37285,14 @@
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT                                                     0x5
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT                                                   0x8
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT                                                   0x9
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT                                                    0xc
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT                                                   0x10
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK                                                     0x00000001L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK                                                       0x00000010L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK                                                       0x00000020L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK                                                     0x00000100L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK                                                     0x00000200L
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK                                                      0x00001000L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK                                                     0x003F0000L
 //DIG0_HDMI_INFOFRAME_CONTROL0
 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND__SHIFT                                               0x0
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
index 9b6825b74cc1..23580907663b 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
@@ -5584,6 +5584,8 @@
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9
+#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000
+#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000
 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10
 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h
index e7c0cad41081..a788ff3b68c0 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h
@@ -30357,12 +30357,14 @@
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT                                                     0x5
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT                                                   0x8
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT                                                   0x9
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT                                                    0xc
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT                                                   0x10
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK                                                     0x00000001L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK                                                       0x00000010L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK                                                       0x00000020L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK                                                     0x00000100L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK                                                     0x00000200L
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK                                                      0x00001000L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK                                                     0x003F0000L
 //DIG0_HDMI_INFOFRAME_CONTROL0
 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT                                             0x4
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h
index dc8ce7aaa0cf..c70f7ba94d8f 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h
@@ -39439,12 +39439,14 @@
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT                                                     0x5
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT                                                   0x8
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT                                                   0x9
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT                                                    0xc
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT                                                   0x10
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK                                                     0x00000001L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK                                                       0x00000010L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK                                                       0x00000020L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK                                                     0x00000100L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK                                                     0x00000200L
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK                                                      0x00001000L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK                                                     0x003F0000L
 //DIG0_HDMI_INFOFRAME_CONTROL0
 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT                                             0x4
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h
index 91969554e36a..ca1e1eb39256 100755
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h
@@ -16956,7 +16956,7 @@
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT                                                     0x5
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT                                                   0x8
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT                                                   0x9
-
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT                                                    0xc
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT                                                   0x10
 
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK                                                     0x00000001L
@@ -16964,7 +16964,7 @@
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK                                                       0x00000020L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK                                                     0x00000100L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK                                                     0x00000200L
-
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK                                                      0x00001000L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK                                                     0x003F0000L
 
 //DIG0_HDMI_INFOFRAME_CONTROL0
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h
index 2f780aefc722..6104ae304099 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h
@@ -35487,12 +35487,14 @@
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT                                                     0x5
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT                                                   0x8
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT                                                   0x9
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT                                                    0xc
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT                                                   0x10
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK                                                     0x00000001L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK                                                       0x00000010L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK                                                       0x00000020L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK                                                     0x00000100L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK                                                     0x00000200L
+#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK                                                      0x00001000L
 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK                                                     0x003F0000L
 //DIG0_HDMI_INFOFRAME_CONTROL0
 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT                                             0x4
commit b0f4d663fce6a4232d3c20ce820f919111b1c60b
Author: Lijo Lazar <lijo.lazar at amd.com>
Date:   Thu May 19 10:50:25 2022 +0530

    drm/amd/pm: Fix missing thermal throttler status
    
    On aldebaran, when thermal throttling happens due to excessive GPU
    temperature, the reason for throttling event is missed in warning
    message. This patch fixes it.
    
    Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
    Reviewed-by: Yang Wang <kevinyang.wang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 38af648cb857..fb130409309c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1666,6 +1666,7 @@ static const struct throttling_logging_label {
 	uint32_t feature_mask;
 	const char *label;
 } logging_label[] = {
+	{(1U << THROTTLER_TEMP_GPU_BIT), "GPU"},
 	{(1U << THROTTLER_TEMP_MEM_BIT), "HBM"},
 	{(1U << THROTTLER_TEMP_VR_GFX_BIT), "VR of GFX rail"},
 	{(1U << THROTTLER_TEMP_VR_MEM_BIT), "VR of HBM rail"},
commit 49b74d12d1e02fc67b2854a593e589372d894e62
Author: Sunil Khatri <sunil.khatri at amd.com>
Date:   Tue May 17 11:33:45 2022 +0530

    drm/amdgpu: add support of tmz for GC 10.3.7
    
    Add support of IP GC 10.3.7 in amdgpu_gmc_tmz_set.
    
    Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
    Reviewed-by: Alexander Deucher <Alexander.Deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 7e55ee61f84c..798c56214a23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -540,6 +540,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
 	case IP_VERSION(10, 3, 1):
 	/* YELLOW_CARP*/
 	case IP_VERSION(10, 3, 3):
+	/* GC 10.3.7 */
+	case IP_VERSION(10, 3, 7):
 		/* Don't enable it by default yet.
 		 */
 		if (amdgpu_tmz < 1) {
commit 0ef3dc7e97884a861db4cb3dfd721db71edb0236
Author: Sunil Khatri <sunil.khatri at amd.com>
Date:   Tue May 17 11:28:46 2022 +0530

    drm/amdgpu: change code name to ip version for tmz set
    
    Use IP version rather then code name of IPs for
    tmz set.
    
    Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
    Reviewed-by: Alexander Deucher <Alexander.Deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 88b852b3a2cb..7e55ee61f84c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -512,9 +512,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
  */
 void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
 {
-	switch (adev->asic_type) {
-	case CHIP_RAVEN:
-	case CHIP_RENOIR:
+	switch (adev->ip_versions[GC_HWIP][0]) {
+	/* RAVEN */
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 1, 0):
+	/* RENOIR looks like RAVEN */
+	case IP_VERSION(9, 3, 0):
 		if (amdgpu_tmz == 0) {
 			adev->gmc.tmz_enabled = false;
 			dev_info(adev->dev,
@@ -525,12 +528,18 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
 				 "Trusted Memory Zone (TMZ) feature enabled\n");
 		}
 		break;
-	case CHIP_NAVI10:
-	case CHIP_NAVI14:
-	case CHIP_NAVI12:
-	case CHIP_VANGOGH:
-	case CHIP_YELLOW_CARP:
-	case CHIP_IP_DISCOVERY:
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	/* VANGOGH */
+	case IP_VERSION(10, 3, 1):
+	/* YELLOW_CARP*/
+	case IP_VERSION(10, 3, 3):
 		/* Don't enable it by default yet.
 		 */
 		if (amdgpu_tmz < 1) {
commit 4d33e7040d70b50f1fb564f7020644ec5b45d6b7
Author: Sunil Khatri <sunil.khatri at amd.com>
Date:   Tue May 17 11:27:11 2022 +0530

    drm/amdgpu: move amdgpu_gmc_tmz_set after ip_version populated
    
    To enable TMZ feature based on IP version needs adev->ip_version
    populated but its empty. Move amdgpu_gmc_tmz_set to a place where
    ip_version is populated.
    
    Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
    Reviewed-by: Alexander Deucher <Alexander.Deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c45736a902fb..625424f3082b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1556,9 +1556,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 
 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 
-	amdgpu_gmc_tmz_set(adev);
-
-
 	return 0;
 }
 
@@ -3701,6 +3698,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
+	/* Enable TMZ based on IP_VERSION */
+	amdgpu_gmc_tmz_set(adev);
+
 	amdgpu_gmc_noretry_set(adev);
 	/* Need to get xgmi info early to decide the reset behavior*/
 	if (adev->gmc.xgmi.supported) {
commit 950d64250fba5fbb8c290f692de8ffdee380726c
Author: Stanley.Yang <Stanley.Yang at amd.com>
Date:   Wed Apr 27 12:16:51 2022 +0800

    drm/amdgpu: support ras on SRIOV
    
    support umc/gfx/sdma ras on guest side
    
    Changed from V1:
        move sriov judgment in amdgpu_ras_interrupt_fatal_error_handler
    
    Signed-off-by: Stanley.Yang <Stanley.Yang at amd.com>
    Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9af8d7a1d011..c45736a902fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5219,6 +5219,10 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
 		if (r)
 			adev->asic_reset_res = r;
+
+		/* Aldebaran supports ras in SRIOV, so need resume ras during reset */
+		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+			amdgpu_ras_resume(adev);
 	} else {
 		r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
 		if (r && r == -EAGAIN)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 035891ec59d5..2de9309a4193 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -726,7 +726,9 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 	/* Do not enable if it is not allowed. */
 	WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
 
-	if (!amdgpu_ras_intr_triggered()) {
+	/* Only enable ras feature operation handle on host side */
+	if (!amdgpu_sriov_vf(adev) &&
+		!amdgpu_ras_intr_triggered()) {
 		ret = psp_ras_enable_features(&adev->psp, info, enable);
 		if (ret) {
 			dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
@@ -1523,7 +1525,9 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
  */
 void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
 {
-	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
+	/* Fatal error events are handled on host side */
+	if (amdgpu_sriov_vf(adev) ||
+		!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
 		return;
 
 	if (adev->nbio.ras &&
@@ -2270,10 +2274,14 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
 {
 	adev->ras_hw_enabled = adev->ras_enabled = 0;
 
-	if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
+	if (!adev->is_atom_fw ||
 	    !amdgpu_ras_asic_supported(adev))
 		return;
 
+	if (!(amdgpu_sriov_vf(adev) &&
+		(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))))
+		return;
+
 	if (!adev->gmc.xgmi.connected_to_cpu) {
 		if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
 			dev_info(adev->dev, "MEM ECC is active.\n");
@@ -2285,15 +2293,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
 
 		if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
 			dev_info(adev->dev, "SRAM ECC is active.\n");
-			adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
-						    1 << AMDGPU_RAS_BLOCK__DF);
-
-			if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
-				adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
-						1 << AMDGPU_RAS_BLOCK__JPEG);
-			else
-				adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
-						1 << AMDGPU_RAS_BLOCK__JPEG);
+			if (!amdgpu_sriov_vf(adev)) {
+				adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+							    1 << AMDGPU_RAS_BLOCK__DF);
+
+				if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
+					adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
+							1 << AMDGPU_RAS_BLOCK__JPEG);
+				else
+					adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
+							1 << AMDGPU_RAS_BLOCK__JPEG);
+			} else {
+				adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
+								1 << AMDGPU_RAS_BLOCK__SDMA |
+								1 << AMDGPU_RAS_BLOCK__GFX);
+			}
 		} else {
 			dev_info(adev->dev, "SRAM ECC is not presented.\n");
 		}
@@ -2637,6 +2651,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 	struct amdgpu_ras_block_object *obj;
 	int r;
 
+	/* Guest side doesn't need init ras feature */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 		if (!node->ras_obj) {
 			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 8e221a1ba937..42c1f050542f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -124,6 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
 		struct amdgpu_iv_entry *entry)
 {
 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+	if (amdgpu_sriov_vf(adev))
+		return AMDGPU_RAS_SUCCESS;
+
 	amdgpu_ras_reset_gpu(adev);
 
 	return AMDGPU_RAS_SUCCESS;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index d6d79e97def9..18014ed0e853 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -85,9 +85,12 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
 		err = psp_init_sos_microcode(psp, chip_name);
 		if (err)
 			return err;
-		err = psp_init_ta_microcode(&adev->psp, chip_name);
-		if (err)
-			return err;
+		/* It's not necessary to load ras ta on Guest side */
+		if (!amdgpu_sriov_vf(adev)) {
+			err = psp_init_ta_microcode(&adev->psp, chip_name);
+			if (err)
+				return err;
+		}
 		break;
 	case IP_VERSION(13, 0, 1):
 	case IP_VERSION(13, 0, 3):
commit 2c270d3e71ed0b68b2f75c0b15645fb023b4032c
Author: Dan Carpenter <dan.carpenter at oracle.com>
Date:   Wed May 18 20:38:36 2022 +0300

    drm/amdgpu/pm: smu_v13_0_4: delete duplicate condition
    
    There is no need to check if "clock_ranges' is non-NULL.  It is checked
    already on the line before.
    
    Signed-off-by: Dan Carpenter <dan.carpenter at oracle.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 7d6ff141b43f..5a17b51aa0f9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -644,42 +644,40 @@ static int smu_v13_0_4_set_watermarks_table(struct smu_context *smu,
 	if (!table || !clock_ranges)
 		return -EINVAL;
 
-	if (clock_ranges) {
-		if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES ||
-			clock_ranges->num_writer_wm_sets > NUM_WM_RANGES)
-			return -EINVAL;
-
-		for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) {
-			table->WatermarkRow[WM_DCFCLK][i].MinClock =
-				clock_ranges->reader_wm_sets[i].min_drain_clk_mhz;
-			table->WatermarkRow[WM_DCFCLK][i].MaxClock =
-				clock_ranges->reader_wm_sets[i].max_drain_clk_mhz;
-			table->WatermarkRow[WM_DCFCLK][i].MinMclk =
-				clock_ranges->reader_wm_sets[i].min_fill_clk_mhz;
-			table->WatermarkRow[WM_DCFCLK][i].MaxMclk =
-				clock_ranges->reader_wm_sets[i].max_fill_clk_mhz;
-
-			table->WatermarkRow[WM_DCFCLK][i].WmSetting =
-				clock_ranges->reader_wm_sets[i].wm_inst;
-		}
+	if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES ||
+		clock_ranges->num_writer_wm_sets > NUM_WM_RANGES)
+		return -EINVAL;
 
-		for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) {
-			table->WatermarkRow[WM_SOCCLK][i].MinClock =
-				clock_ranges->writer_wm_sets[i].min_fill_clk_mhz;
-			table->WatermarkRow[WM_SOCCLK][i].MaxClock =
-				clock_ranges->writer_wm_sets[i].max_fill_clk_mhz;
-			table->WatermarkRow[WM_SOCCLK][i].MinMclk =
-				clock_ranges->writer_wm_sets[i].min_drain_clk_mhz;
-			table->WatermarkRow[WM_SOCCLK][i].MaxMclk =
-				clock_ranges->writer_wm_sets[i].max_drain_clk_mhz;
-
-			table->WatermarkRow[WM_SOCCLK][i].WmSetting =
-				clock_ranges->writer_wm_sets[i].wm_inst;
-		}
+	for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) {
+		table->WatermarkRow[WM_DCFCLK][i].MinClock =
+			clock_ranges->reader_wm_sets[i].min_drain_clk_mhz;
+		table->WatermarkRow[WM_DCFCLK][i].MaxClock =
+			clock_ranges->reader_wm_sets[i].max_drain_clk_mhz;
+		table->WatermarkRow[WM_DCFCLK][i].MinMclk =
+			clock_ranges->reader_wm_sets[i].min_fill_clk_mhz;
+		table->WatermarkRow[WM_DCFCLK][i].MaxMclk =
+			clock_ranges->reader_wm_sets[i].max_fill_clk_mhz;
+
+		table->WatermarkRow[WM_DCFCLK][i].WmSetting =
+			clock_ranges->reader_wm_sets[i].wm_inst;
+	}
 
-		smu->watermarks_bitmap |= WATERMARKS_EXIST;
+	for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) {
+		table->WatermarkRow[WM_SOCCLK][i].MinClock =
+			clock_ranges->writer_wm_sets[i].min_fill_clk_mhz;
+		table->WatermarkRow[WM_SOCCLK][i].MaxClock =
+			clock_ranges->writer_wm_sets[i].max_fill_clk_mhz;
+		table->WatermarkRow[WM_SOCCLK][i].MinMclk =
+			clock_ranges->writer_wm_sets[i].min_drain_clk_mhz;
+		table->WatermarkRow[WM_SOCCLK][i].MaxMclk =
+			clock_ranges->writer_wm_sets[i].max_drain_clk_mhz;
+
+		table->WatermarkRow[WM_SOCCLK][i].WmSetting =
+			clock_ranges->writer_wm_sets[i].wm_inst;
 	}
 
+	smu->watermarks_bitmap |= WATERMARKS_EXIST;
+
 	/* pass data to smu controller */
 	if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
 	     !(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
commit 3670c46f07d13ba42bb66948fde68495078457ec
Author: Evan Quan <evan.quan at amd.com>
Date:   Fri May 13 11:03:07 2022 +0800

    drm/amd/pm: enable memory temp reading for SMU 13.0.0
    
    With the latest vbios, the memory temp reading is working.
    
    Signed-off-by: Evan Quan <evan.quan at amd.com>
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 7bb2923eb819..7432b3e76d3d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -275,9 +275,7 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu,
 		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VDDIO_MEM_SCALING_BIT);
 	}
 
-#if 0
 	*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MEM_TEMP_READ_BIT);
-#endif
 
 	if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
 		*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT);
commit 0aceb728f4e4790d80ce64e83bb8fad40693db13
Author: Evan Quan <evan.quan at amd.com>
Date:   Tue May 10 15:00:43 2022 +0800

    drm/amd/pm: enable more dpm features for SMU 13.0.0
    
    Enable OOB Monitor and SOC CG which are ready since 78.38.0.
    
    Signed-off-by: Evan Quan <evan.quan at amd.com>
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 7bfceca246ae..7bb2923eb819 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -299,6 +299,9 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu,
 	*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT);
 	*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DSTATE_BIT);
 
+	*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT);
+	*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT);
+
 	return 0;
 }
 
commit 6fd693817dcf07aed021b4196993822fad225664
Author: Evan Quan <evan.quan at amd.com>
Date:   Tue May 10 11:04:06 2022 +0800

    drm/amd/pm: correct the softpptable ids used for SMU 13.0.0
    
    To better match with the pptable_id settings from VBIOS.
    
    Signed-off-by: Evan Quan <evan.quan at amd.com>
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Acked-by: Alex Deucher <alexander.deucher at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index ae6321af9d88..7be4f6875a7b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -218,13 +218,25 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
 			pptable_id == 3688)
 			pptable_id = 36881;
 		/*
-		 * Temporary solution for SMU V13.0.0:
-		 *   - use 99991 signed pptable when SCPM enabled
-		 * TODO: drop this when the pptable carried in vbios
-		 * is ready.
+		 * Temporary solution for SMU V13.0.0 with SCPM enabled:
+		 *   - use 36831 signed pptable when pp_table_id is 3683
+		 *   - use 36641 signed pptable when pp_table_id is 3664 or 0
+		 * TODO: drop these when the pptable carried in vbios is ready.
 		 */
-		if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0))
-			pptable_id = 99991;
+		if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) {
+			switch (pptable_id) {
+			case 0:
+			case 3664:
+				pptable_id = 36641;
+				break;
+			case 3683:
+				pptable_id = 36831;
+				break;
+			default:
+				dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
+				return -EINVAL;
+			}
+		}
 	}
 
 	/* "pptable_id == 0" means vbios carries the pptable. */
@@ -448,13 +460,24 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
 		pptable_id = smu->smu_table.boot_values.pp_table_id;
 
 		/*
-		 * Temporary solution for SMU V13.0.0:
-		 *   - use 9999 unsigned pptable when SCPM disabled
-		 * TODO: drop this when the pptable carried in vbios
-		 * is ready.
+		 * Temporary solution for SMU V13.0.0 with SCPM disabled:
+		 *   - use 3664 or 3683 on request
+		 *   - use 3664 when pptable_id is 0
+		 * TODO: drop these when the pptable carried in vbios is ready.
 		 */
-		if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0))
-			pptable_id = 9999;
+		if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) {
+			switch (pptable_id) {
+			case 0:
+				pptable_id = 3664;
+				break;
+			case 3664:
+			case 3683:
+				break;
+			default:
+				dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
+				return -EINVAL;
+			}
+		}
 	}
 
 	/* force using vbios pptable in sriov mode */
commit 1c65e54881f3a56f16783b0b772501a8ddeb8c10
Author: Evan Quan <evan.quan at amd.com>
Date:   Mon May 9 11:42:23 2022 +0800

    drm/amd/pm: update SMU 13.0.0 driver_if header
    
    To align with 78.37.0 and later PMFWs.
    
    Signed-off-by: Evan Quan <evan.quan at amd.com>
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index ecc6411dfc8d..c1f76236da26 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -671,8 +671,8 @@ typedef struct {
   uint16_t               reserved[2];
 
   //Frequency changes
-  uint16_t               GfxclkFmin;           // MHz
-  uint16_t               GfxclkFmax;           // MHz
+  int16_t                GfxclkFmin;           // MHz
+  int16_t                GfxclkFmax;           // MHz
   uint16_t               UclkFmin;             // MHz
   uint16_t               UclkFmax;             // MHz
 
@@ -683,15 +683,14 @@ typedef struct {
   //Fan control
   uint8_t                FanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS];
   uint8_t                FanLinearTempPoints[NUM_OD_FAN_MAX_POINTS];
-  uint16_t               FanMaximumRpm;
   uint16_t               FanMinimumPwm;
-  uint16_t               FanAcousticLimitRpm;
+  uint16_t               AcousticTargetRpmThreshold;
+  uint16_t               AcousticLimitRpmThreshold;
   uint16_t               FanTargetTemperature; // Degree Celcius
   uint8_t                FanZeroRpmEnable;
   uint8_t                FanZeroRpmStopTemp;
   uint8_t                FanMode;
-  uint8_t                Padding[1];
-
+  uint8_t                MaxOpTemp;
 
   uint32_t               Spare[13];
   uint32_t               MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround
@@ -719,15 +718,14 @@ typedef struct {
 
   uint8_t                FanLinearPwmPoints;
   uint8_t                FanLinearTempPoints;
-  uint16_t               FanMaximumRpm;
   uint16_t               FanMinimumPwm;
-  uint16_t               FanAcousticLimitRpm;
+  uint16_t               AcousticTargetRpmThreshold;
+  uint16_t               AcousticLimitRpmThreshold;
   uint16_t               FanTargetTemperature; // Degree Celcius
   uint8_t                FanZeroRpmEnable;
   uint8_t                FanZeroRpmStopTemp;
   uint8_t                FanMode;
-  uint8_t                Padding[1];
-
+  uint8_t                MaxOpTemp;
 
   uint32_t               Spare[13];
 
@@ -997,7 +995,8 @@ typedef struct {
   uint16_t SocketPowerLimitAcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms
   uint16_t SocketPowerLimitDcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms
 
-  uint32_t       SpareVmin[12];
+  QuadraticInt_t Vmin_droop;
+  uint32_t       SpareVmin[9];
 
 
   //SECTION: DPM Configuration 1
@@ -1286,7 +1285,6 @@ typedef struct {
   uint32_t    PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued
   uint32_t    BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS
 
-
   // SECTION: Board Reserved
   uint32_t     BoardSpare[64];
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 2b44d41a5157..afa1991e26f9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -30,7 +30,7 @@
 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x27
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x28
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x28
 
 #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500  //500ms
commit 704d6bf605faf65555438c2fa0282c02dca1a7b6
Author: Evan Quan <evan.quan at amd.com>
Date:   Fri May 6 17:36:06 2022 +0800

    drm/amd/pm: skip dpm disablement on suspend for SMU 13.0.0
    
    Since PMFW will handle this properly. Driver involvement is
    unnecessary.
    
    Signed-off-by: Evan Quan <evan.quan at amd.com>
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 6016b325b6b5..a601024ba4de 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1436,6 +1436,7 @@ static int smu_disable_dpms(struct smu_context *smu)
 		case IP_VERSION(11, 0, 0):
 		case IP_VERSION(11, 0, 5):
 		case IP_VERSION(11, 0, 9):
+		case IP_VERSION(13, 0, 0):
 			return 0;
 		default:
 			break;
commit 72063c71c39162d897c7c6f47fdc26425cfba03b
Author: Evan Quan <evan.quan at amd.com>
Date:   Thu May 5 11:49:14 2022 +0800

    drm/amd/pm: enable more dpm features for SMU 13.0.0
    
    Enable MP0CLK DPM and FW Dstate since they are already supported
    by latest 78.36.0 PMFW.
    
    Signed-off-by: Evan Quan <evan.quan at amd.com>
    Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 197a0e2ff063..7bfceca246ae 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -296,6 +296,9 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu,
 
 	*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT);
 
+	*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT);
+	*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DSTATE_BIT);
+
 	return 0;
 }
 
commit d2f4460a3d9502513419f06cc376c7ade49d5753
Author: Gong Yuanjun <ruc_gongyuanjun at 163.com>
Date:   Tue May 17 17:57:46 2022 +0800

    drm/amd/pm: fix a potential gpu_metrics_table memory leak
    
    gpu_metrics_table is allocated in yellow_carp_init_smc_tables() but
    not freed in yellow_carp_fini_smc_tables().
    
    Signed-off-by: Gong Yuanjun <ruc_gongyuanjun at 163.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index 87257b1b028f..feff4f8c927c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -190,6 +190,9 @@ static int yellow_carp_fini_smc_tables(struct smu_context *smu)
 	kfree(smu_table->watermarks_table);
 	smu_table->watermarks_table = NULL;
 
+	kfree(smu_table->gpu_metrics_table);
+	smu_table->gpu_metrics_table = NULL;
+
 	return 0;
 }
 
commit a2b28708b645c5632dc93669ab06e97874c8244f
Author: Gong Yuanjun <ruc_gongyuanjun at 163.com>
Date:   Tue May 17 17:57:00 2022 +0800

    drm/radeon: fix a possible null pointer dereference
    
    In radeon_fp_native_mode(), the return value of drm_mode_duplicate()
    is assigned to mode, which will lead to a NULL pointer dereference
    on failure of drm_mode_duplicate(). Add a check to avoid npd.
    
    The failure status of drm_cvt_mode() on the other path is checked too.
    
    Signed-off-by: Gong Yuanjun <ruc_gongyuanjun at 163.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index a16892c16f60..58db79921cd3 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -473,6 +473,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode
 	    native_mode->vdisplay != 0 &&
 	    native_mode->clock != 0) {
 		mode = drm_mode_duplicate(dev, native_mode);
+		if (!mode)
+			return NULL;
 		mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
 		drm_mode_set_name(mode);
 
@@ -487,6 +489,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode
 		 * simpler.
 		 */
 		mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false);
+		if (!mode)
+			return NULL;
 		mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
 		DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name);
 	}
commit 10784fec9cbddad2ca6031b28c1ca1d041444dc5
Author: Haohui Mai <ricetons at gmail.com>
Date:   Tue May 17 04:24:38 2022 -0700

    drm/amdgpu/gfx10: rework KIQ programming
    
    Make sure the queue is not longer active before programming
    the kiq EOP registers.
    
    Signed-off-by: Haohui Mai <ricetons at gmail.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index dada7ab5d43f..02754ee86c81 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6956,20 +6956,6 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
 	/* disable wptr polling */
 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
-	/* write the EOP addr */
-	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
-	       mqd->cp_hqd_eop_base_addr_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
-	       mqd->cp_hqd_eop_base_addr_hi);
-
-	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
-	       mqd->cp_hqd_eop_control);
-
-	/* enable doorbell? */
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
-	       mqd->cp_hqd_pq_doorbell_control);
-
 	/* disable the queue if it's active */
 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
 		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
@@ -6988,6 +6974,19 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
 		       mqd->cp_hqd_pq_wptr_hi);
 	}
 
+	/* disable doorbells */
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+	/* write the EOP addr */
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+	       mqd->cp_hqd_eop_base_addr_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+	       mqd->cp_hqd_eop_base_addr_hi);
+
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+	       mqd->cp_hqd_eop_control);
+
 	/* set the pointer to the MQD */
 	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
 	       mqd->cp_mqd_base_addr_lo);
commit 842035543c0bfa35b1471e74094a107673815b01
Author: Haohui Mai <ricetons at gmail.com>
Date:   Mon May 16 23:06:35 2022 -0700

    drm/amdgpu: Set CP_HQD_PQ_CONTROL.RPTR_BLOCK_SIZE correctly
    
    Remove the accidental shifts on the values of RPTR_BLOCK_SIZE
    in gfx_v8-v11. The bug essentially always programs the
    corresponding fields to zero instead of the correct value.
    The hardware clamps the min value to 5 so this resulted in a
    value of 5 being programmed.
    
    Signed-off-by: Haohui Mai <ricetons at gmail.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 63fbe03283bf..dada7ab5d43f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6898,7 +6898,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 			    (order_base_2(prop->queue_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
-			    ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
 #ifdef __BIG_ENDIAN
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 8773cbd1f03b..8c0a3fc7aaa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4082,7 +4082,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 			    (order_base_2(prop->queue_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
-			    ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 90f64219d291..fb9302910742 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4490,7 +4490,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 			    (order_base_2(ring->ring_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
-			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
 #ifdef __BIG_ENDIAN
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f49a2dd89ee7..f12ae6e2359a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3514,7 +3514,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 			    (order_base_2(ring->ring_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
-			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
 #ifdef __BIG_ENDIAN
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
 #endif
commit 69493c034d2455204dfcd370de8c4dc204374a94
Author: Christian König <christian.koenig at amd.com>
Date:   Fri May 13 13:54:02 2022 +0200

    drm/amdgpu: cleanup ctx implementation
    
    Let each context have a pointer to the ctx manager and properly
    initialize the adev pointer inside the context manager.
    
    Reduce the BUG_ON() in amdgpu_ctx_add_fence() into a WARN_ON() and
    directly return the sequence number instead of writing into a parmeter.
    
    Signed-off-by: Christian König <christian.koenig at amd.com>
    Reviewed-by: Shashank Sharma <shashank.sharma at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e552a2004868..84caab5e4d22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1252,7 +1252,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 
-	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
+	seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
 	amdgpu_cs_post_dependencies(p);
 
 	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c317078d1afd..a61e4c83a545 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -135,9 +135,9 @@ static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_
 
 static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
 {
-	struct amdgpu_device *adev = ctx->adev;
-	int32_t ctx_prio;
+	struct amdgpu_device *adev = ctx->mgr->adev;
 	unsigned int hw_prio;
+	int32_t ctx_prio;
 
 	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
 			ctx->init_priority : ctx->override_priority;
@@ -166,7 +166,7 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
 				  const u32 ring)
 {
-	struct amdgpu_device *adev = ctx->adev;
+	struct amdgpu_device *adev = ctx->mgr->adev;
 	struct amdgpu_ctx_entity *entity;
 	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
 	unsigned num_scheds = 0;
@@ -220,10 +220,8 @@ error_free_entity:
 	return r;
 }
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
-			   int32_t priority,
-			   struct drm_file *filp,
-			   struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
+			   struct drm_file *filp, struct amdgpu_ctx *ctx)
 {
 	int r;
 
@@ -233,15 +231,14 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 	memset(ctx, 0, sizeof(*ctx));
 
-	ctx->adev = adev;
-
 	kref_init(&ctx->refcount);
+	ctx->mgr = mgr;
 	spin_lock_init(&ctx->ring_lock);
 	mutex_init(&ctx->lock);
 
-	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+	ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
 	ctx->reset_counter_query = ctx->reset_counter;
-	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+	ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
 	ctx->init_priority = priority;
 	ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
 	ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
@@ -266,7 +263,7 @@ static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
 					u32 *stable_pstate)
 {
-	struct amdgpu_device *adev = ctx->adev;
+	struct amdgpu_device *adev = ctx->mgr->adev;
 	enum amd_dpm_forced_level current_level;
 
 	current_level = amdgpu_dpm_get_performance_level(adev);
@@ -294,7 +291,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
 static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
 					u32 stable_pstate)
 {
-	struct amdgpu_device *adev = ctx->adev;
+	struct amdgpu_device *adev = ctx->mgr->adev;
 	enum amd_dpm_forced_level level;
 	u32 current_stable_pstate;
 	int r;
@@ -345,7 +342,8 @@ done:
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
-	struct amdgpu_device *adev = ctx->adev;
+	struct amdgpu_ctx_mgr *mgr = ctx->mgr;
+	struct amdgpu_device *adev = mgr->adev;
 	unsigned i, j, idx;
 
 	if (!adev)
@@ -421,7 +419,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 	}
 
 	*id = (uint32_t)r;
-	r = amdgpu_ctx_init(adev, priority, filp, ctx);
+	r = amdgpu_ctx_init(mgr, priority, filp, ctx);
 	if (r) {
 		idr_remove(&mgr->ctx_handles, *id);
 		*id = 0;
@@ -671,9 +669,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 	return 0;
 }
 
-void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
-			  struct drm_sched_entity *entity,
-			  struct dma_fence *fence, uint64_t *handle)
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			      struct drm_sched_entity *entity,
+			      struct dma_fence *fence)
 {
 	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 	uint64_t seq = centity->sequence;
@@ -682,8 +680,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
 
 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = centity->fences[idx];
-	if (other)
-		BUG_ON(!dma_fence_is_signaled(other));
+	WARN_ON(other && !dma_fence_is_signaled(other));
 
 	dma_fence_get(fence);
 
@@ -693,8 +690,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
 	spin_unlock(&ctx->ring_lock);
 
 	dma_fence_put(other);
-	if (handle)
-		*handle = seq;
+	return seq;
 }
 
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
@@ -731,7 +727,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
 					   int hw_ip,
 					   int32_t priority)
 {
-	struct amdgpu_device *adev = ctx->adev;
+	struct amdgpu_device *adev = ctx->mgr->adev;
 	unsigned int hw_prio;
 	struct drm_gpu_scheduler **scheds = NULL;
 	unsigned num_scheds;
@@ -796,8 +792,10 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 	return r;
 }
 
-void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+			 struct amdgpu_device *adev)
 {
+	mgr->adev = adev;
 	mutex_init(&mgr->lock);
 	idr_init(&mgr->ctx_handles);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 142f2f87d44c..681050bc828c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -40,7 +40,7 @@ struct amdgpu_ctx_entity {
 
 struct amdgpu_ctx {
 	struct kref			refcount;
-	struct amdgpu_device		*adev;
+	struct amdgpu_ctx_mgr		*mgr;
 	unsigned			reset_counter;
 	unsigned			reset_counter_query;
 	uint32_t			vram_lost_counter;
@@ -70,9 +70,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 			  u32 ring, struct drm_sched_entity **entity);
-void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
-			  struct drm_sched_entity *entity,
-			  struct dma_fence *fence, uint64_t *seq);
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			      struct drm_sched_entity *entity,
+			      struct dma_fence *fence);
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 				       struct drm_sched_entity *entity,
 				       uint64_t seq);
@@ -85,7 +85,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 			       struct drm_sched_entity *entity);
 
-void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+			 struct amdgpu_device *adev);
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 497478f8a5d3..801f6fa692e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1152,7 +1152,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	mutex_init(&fpriv->bo_list_lock);
 	idr_init(&fpriv->bo_list_handles);
 
-	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
+	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
 
 	file_priv->driver_priv = fpriv;
 	goto out_suspend;
commit a0af5dbdc914eae667fba8322cb02afc4ce3967b
Author: Jonathan Kim <jonathan.kim at amd.com>
Date:   Thu May 12 20:38:18 2022 -0400

    drm/amdkfd: simplify cpu hive assignment
    
    CPU hive assignment currently assumes when a GPU hive is connected_to_cpu,
    there is only one hive in the system.
    
    Only assign CPUs to the hive if they are explicitly directly connected to
    the GPU hive to get rid of the need for this assumption.
    
    It's more efficient to do this when querying IO links since other non-CRAT
    info has to be filled in anyways.  Also, stop re-assigning the
    same CPU to the same GPU hive if it has already been done before.
    
    Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
    Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 2e20f54bb147..8d50d207cf66 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1271,6 +1271,12 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
 		if (!peer_dev)
 			continue;
 
+		/* Include the CPU peer in GPU hive if connected over xGMI. */
+		if (!peer_dev->gpu && !peer_dev->node_props.hive_id &&
+				dev->node_props.hive_id &&
+				dev->gpu->adev->gmc.xgmi.connected_to_cpu)
+			peer_dev->node_props.hive_id = dev->node_props.hive_id;
+
 		list_for_each_entry(inbound_link, &peer_dev->io_link_props,
 									list) {
 			if (inbound_link->node_to != link->node_from)
@@ -1302,22 +1308,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 
 	pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
 
-	/* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */
-	if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) {
-		struct kfd_topology_device *top_dev;
-
-		down_read(&topology_lock);
-
-		list_for_each_entry(top_dev, &topology_device_list, list) {
-			if (top_dev->gpu)
-				break;
-
-			top_dev->node_props.hive_id = gpu->hive_id;
-		}
-
-		up_read(&topology_lock);
-	}
-
 	/* Check to see if this gpu device exists in the topology_device_list.
 	 * If so, assign the gpu to that device,
 	 * else create a Virtual CRAT for this gpu device and then parse that
commit 2c2dd0555fd6bcea6d43ab3224c6af718b910e22
Author: Haohui Mai <ricetons at gmail.com>
Date:   Mon May 16 05:00:53 2022 -0700

    drm/amdgpu: Clean up of initializing doorbells for gfx_v9 and gfx_v10
    
    Clean up redundant, copy-paste code blocks during the initialization of
    the doorbells in mqd_init().
    
    Signed-off-by: Haohui Mai <ricetons at gmail.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 65a4126135b0..63fbe03283bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6919,23 +6919,6 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
 
-	tmp = 0;
-	/* enable the doorbell if requested */
-	if (prop->use_doorbell) {
-		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-				DOORBELL_OFFSET, prop->doorbell_index);
-
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-				    DOORBELL_EN, 1);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-				    DOORBELL_SOURCE, 0);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-				    DOORBELL_HIT, 0);
-	}
-
-	mqd->cp_hqd_pq_doorbell_control = tmp;
-
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 83639b5ea6a9..f49a2dd89ee7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3535,23 +3535,6 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
 
-	tmp = 0;
-	/* enable the doorbell if requested */
-	if (ring->use_doorbell) {
-		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-				DOORBELL_OFFSET, ring->doorbell_index);
-
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-					 DOORBELL_EN, 1);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-					 DOORBELL_SOURCE, 0);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-					 DOORBELL_HIT, 0);
-	}
-
-	mqd->cp_hqd_pq_doorbell_control = tmp;
-
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	ring->wptr = 0;
 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
commit c51bdd1a9c34936c1a6b2f6eb79703c730cc0e6e
Author: Aric Cyr <aric.cyr at amd.com>
Date:   Sun May 8 23:31:34 2022 -0400

    drm/amd/display: 3.2.186
    
    This version brings along the following:
    - Improvements in link training fallback
    - Adding individual edp hotplug support
    - Fixes in DPIA HPD status, display clock change hang, etc.
    - FPU isolation work for DCN30
    
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Aric Cyr <aric.cyr at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 7cfc04a8ef15..a31ea3644ec2 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.185"
+#define DC_VER "3.2.186"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
commit 4d1d699f479dc8e01fbb6b7b5a8f2116de3a3883
Author: Michael Strauss <michael.strauss at amd.com>
Date:   Fri May 6 17:17:55 2022 -0400

    Revert "drm/amd/display: Refactor LTTPR cap retrieval"
    
    This reverts commit 3b90318d44f87a3582f876802253a7748d270385.
    
    [WHY]
    Regressions unintentionally caused by change,
    reverting until this can be resolved.
    
    Reviewed-by: Aric Cyr <Aric.Cyr at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Michael Strauss <michael.strauss at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index d8de8dbf3676..3c9523218c19 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -5126,13 +5126,16 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link)
 	return true;
 }
 
-void dp_retrieve_lttpr_cap(struct dc_link *link)
+bool dp_retrieve_lttpr_cap(struct dc_link *link)
 {
+	uint8_t lttpr_dpcd_data[8];
 	bool allow_lttpr_non_transparent_mode = 0;
+	bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable;
 	bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware;
 	enum dc_status status = DC_ERROR_UNEXPECTED;
+	bool is_lttpr_present = false;
 
-	memset(link->lttpr_dpcd_data, '\0', sizeof(link->lttpr_dpcd_data));
+	memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data));
 
 	if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 &&
 			link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) {
@@ -5142,116 +5145,82 @@ void dp_retrieve_lttpr_cap(struct dc_link *link)
 		allow_lttpr_non_transparent_mode = 1;
 	}
 
-	link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
-	link->lttpr_support = LTTPR_UNSUPPORTED;
-
 	/*
-	 * Logic to determine LTTPR support
+	 * Logic to determine LTTPR mode
 	 */
-	if (vbios_lttpr_interop)
-		link->lttpr_support = LTTPR_SUPPORTED;
-	else if (link->dc->config.allow_lttpr_non_transparent_mode.raw == 0
-			|| !link->dc->caps.extended_aux_timeout_support)
-			link->lttpr_support = LTTPR_UNSUPPORTED;
-	else
-		link->lttpr_support = LTTPR_CHECK_EXT_SUPPORT;
+	link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
+	if (vbios_lttpr_enable && vbios_lttpr_interop)
+		link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
+	else if (!vbios_lttpr_enable && vbios_lttpr_interop) {
+		if (allow_lttpr_non_transparent_mode)
+			link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
+		else
+			link->lttpr_mode = LTTPR_MODE_TRANSPARENT;
+	} else if (!vbios_lttpr_enable && !vbios_lttpr_interop) {
+		if (!allow_lttpr_non_transparent_mode || !link->dc->caps.extended_aux_timeout_support)
+			link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
+		else
+			link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
+	}
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
 	/* Check DP tunnel LTTPR mode debug option. */
 	if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
 	    link->dc->debug.dpia_debug.bits.force_non_lttpr)
-		link->lttpr_support = LTTPR_UNSUPPORTED;
+		link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
+#endif
 
-	if (link->lttpr_support > LTTPR_UNSUPPORTED) {
+	if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT || link->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
 		/* By reading LTTPR capability, RX assumes that we will enable
 		 * LTTPR extended aux timeout if LTTPR is present.
 		 */
 		status = core_link_read_dpcd(
 				link,
 				DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV,
-				link->lttpr_dpcd_data,
-				sizeof(link->lttpr_dpcd_data));
-	}
-}
-
-bool dp_parse_lttpr_mode(struct dc_link *link)
-{
-	bool dpcd_allow_lttpr_non_transparent_mode = false;
-	bool is_lttpr_present = false;
-
-	bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable;
-
-	if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 &&
-			link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) {
-		dpcd_allow_lttpr_non_transparent_mode = true;
-	} else if (link->dc->config.allow_lttpr_non_transparent_mode.bits.DP1_4A &&
-			!link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
-		dpcd_allow_lttpr_non_transparent_mode = true;
+				lttpr_dpcd_data,
+				sizeof(lttpr_dpcd_data));
+
+		link->dpcd_caps.lttpr_caps.revision.raw =
+				lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.max_link_rate =
+				lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.phy_repeater_cnt =
+				lttpr_dpcd_data[DP_PHY_REPEATER_CNT -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.max_lane_count =
+				lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.mode =
+				lttpr_dpcd_data[DP_PHY_REPEATER_MODE -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.max_ext_timeout =
+				lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+		link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw =
+				lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw =
+				lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES -
+								DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+		/* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
+		is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
+				link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
+				link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
+		if (is_lttpr_present) {
+			CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: ");
+			configure_lttpr_mode_transparent(link);
+		} else
+			link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
 	}
-
-	/*
-	 * Logic to determine LTTPR mode
-	 */
-	if (link->lttpr_support == LTTPR_SUPPORTED)
-		if (vbios_lttpr_enable)
-			link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
-		else if (dpcd_allow_lttpr_non_transparent_mode)
-			link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
-		else
-			link->lttpr_mode = LTTPR_MODE_TRANSPARENT;
-	else	// lttpr_support == LTTPR_CHECK_EXT_SUPPORT
-		if (dpcd_allow_lttpr_non_transparent_mode) {
-			link->lttpr_support = LTTPR_SUPPORTED;
-			link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT;
-		} else {
-			link->lttpr_support = LTTPR_UNSUPPORTED;
-		}
-
-	if (link->lttpr_support == LTTPR_UNSUPPORTED)
-		return false;
-
-	link->dpcd_caps.lttpr_caps.revision.raw =
-			link->lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV -
-							DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
-	link->dpcd_caps.lttpr_caps.max_link_rate =
-			link->lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER -
-							DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
-	link->dpcd_caps.lttpr_caps.phy_repeater_cnt =
-			link->lttpr_dpcd_data[DP_PHY_REPEATER_CNT -
-							DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
-	link->dpcd_caps.lttpr_caps.max_lane_count =
-			link->lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER -
-							DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
-	link->dpcd_caps.lttpr_caps.mode =
-			link->lttpr_dpcd_data[DP_PHY_REPEATER_MODE -
-							DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
-	link->dpcd_caps.lttpr_caps.max_ext_timeout =
-			link->lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
-							DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
-	link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw =
-			link->lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER -
-							DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
-	link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw =
-			link->lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES -
-							DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
-
-
-	/* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
-	is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
-			link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
-			link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
-	if (is_lttpr_present) {
-		CONN_DATA_DETECT(link, link->lttpr_dpcd_data, sizeof(link->lttpr_dpcd_data), "LTTPR Caps: ");
-		configure_lttpr_mode_transparent(link);
-	} else
-		link->lttpr_mode = LTTPR_MODE_NON_LTTPR;
-
 	return is_lttpr_present;
 }
 
@@ -5403,8 +5372,7 @@ static bool retrieve_link_cap(struct dc_link *link)
 		status = wa_try_to_wake_dprx(link, timeout_ms);
 	}
 
-	dp_retrieve_lttpr_cap(link);
-
+	is_lttpr_present = dp_retrieve_lttpr_cap(link);
 	/* Read DP tunneling information. */
 	status = dpcd_get_tunneling_device_data(link);
 
@@ -5440,9 +5408,6 @@ static bool retrieve_link_cap(struct dc_link *link)
 		return false;
 	}
 
-	if (link->lttpr_support > LTTPR_UNSUPPORTED)
-		is_lttpr_present = dp_parse_lttpr_mode(link);
-
 	if (!is_lttpr_present)
 		dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 251f2bbc96b9..a3c37ee3f849 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -129,8 +129,6 @@ struct dc_link {
 	bool link_state_valid;
 	bool aux_access_disabled;
 	bool sync_lt_in_progress;
-	uint8_t lttpr_dpcd_data[8];
-	enum lttpr_support lttpr_support;
 	enum lttpr_mode lttpr_mode;
 	bool is_internal_display;
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
index 78f09893c118..44f167d2584f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
@@ -217,8 +217,7 @@ void disable_dp_hpo_output(struct dc_link *link,
 void setup_dp_hpo_stream(struct pipe_ctx *pipe_ctx, bool enable);
 bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx);
 
-void dp_retrieve_lttpr_cap(struct dc_link *link);
-bool dp_apply_lttpr_mode(struct dc_link *link);
+bool dp_retrieve_lttpr_cap(struct dc_link *link);
 void edp_panel_backlight_power_on(struct dc_link *link);
 void dp_receiver_power_ctrl(struct dc_link *link, bool on);
 void dp_source_sequence_trace(struct dc_link *link, uint8_t dp_test_mode);
diff --git a/drivers/gpu/drm/amd/display/include/link_service_types.h b/drivers/gpu/drm/amd/display/include/link_service_types.h
index 9f465b4d626e..447a56286dd0 100644
--- a/drivers/gpu/drm/amd/display/include/link_service_types.h
+++ b/drivers/gpu/drm/amd/display/include/link_service_types.h
@@ -80,12 +80,6 @@ enum link_training_result {
 	DP_128b_132b_CDS_DONE_TIMEOUT,
 };
 
-enum lttpr_support {
-	LTTPR_UNSUPPORTED,
-	LTTPR_CHECK_EXT_SUPPORT,
-	LTTPR_SUPPORTED,
-};
-
 enum lttpr_mode {
 	LTTPR_MODE_NON_LTTPR,
 	LTTPR_MODE_TRANSPARENT,
commit a32cc8177eabcd3497721836241f3d456342be62
Author: Bhawanpreet Lakha <Bhawanpreet.Lakha at amd.com>
Date:   Thu May 5 16:16:47 2022 -0400

    drm/amd/display: Fic incorrect pipe being used for clk update
    
    [Why]
    we save the prev_dppclk value using "dpp_inst" but
    when reading this value we use the index "i". In
    a case where a pipe is fused off we can end up reading
    the incorrect instance because i != dpp_inst in this
    case.
    
    [How]
    read the prev_dppclk using dpp_inst instead of i
    
    Reviewed-by: Dmytro Laktyushkin <Dmytro.Laktyushkin at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 02943ca65807..cf1b5f354ae9 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -122,7 +122,7 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
 		dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst;
 		dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
 
-		prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
+		prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[dpp_inst];
 
 		if (safe_to_lower || prev_dppclk_khz < dppclk_khz)
 			clk_mgr->dccg->funcs->update_dpp_dto(
commit e4b0eac3e6242abf5f5ebcffdeb852e7ffa1c3d0
Author: Jasdeep Dhillon <jdhillon at amd.com>
Date:   Fri May 6 13:03:45 2022 -0400

    drm/amd/display: Move FPU associated DCN30 code to DML folder
    
    [why & how]
    As part of the FPU isolation work documented in
    https://patchwork.freedesktop.org/series/93042/, isolate
    code that uses FPU in DCN30 to DML, where all FPU code
    should locate.
    
    Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Jasdeep Dhillon <jdhillon at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
index f5e8916601d3..b604fb26f288 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
@@ -28,6 +28,8 @@
 #include "dc.h"
 #include "dcn_calc_math.h"
 
+#include "dml/dcn30/dcn30_fpu.h"
+
 #define REG(reg)\
 	optc1->tg_regs->reg
 
@@ -184,6 +186,14 @@ void optc3_set_dsc_config(struct timing_generator *optc,
 
 }
 
+void optc3_set_vrr_m_const(struct timing_generator *optc,
+		double vtotal_avg)
+{
+	DC_FP_START();
+	optc3_fpu_set_vrr_m_const(optc, vtotal_avg);
+	DC_FP_END();
+}
+
 void optc3_set_odm_bypass(struct timing_generator *optc,
 		const struct dc_crtc_timing *dc_crtc_timing)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 336b2ce6a636..1c1a67c4cec1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -84,6 +84,7 @@
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
 
+#include "dml/dcn30/dcn30_fpu.h"
 #include "dml/dcn30/display_mode_vba_30.h"
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
@@ -91,137 +92,6 @@
 
 #define DC_LOGGER_INIT(logger)
 
-struct _vcs_dpi_ip_params_st dcn3_0_ip = {
-	.use_min_dcfclk = 0,
-	.clamp_min_dcfclk = 0,
-	.odm_capable = 1,
-	.gpuvm_enable = 0,
-	.hostvm_enable = 0,
-	.gpuvm_max_page_table_levels = 4,
-	.hostvm_max_page_table_levels = 4,
-	.hostvm_cached_page_table_levels = 0,
-	.pte_group_size_bytes = 2048,
-	.num_dsc = 6,
-	.rob_buffer_size_kbytes = 184,
-	.det_buffer_size_kbytes = 184,
-	.dpte_buffer_size_in_pte_reqs_luma = 84,
-	.pde_proc_buffer_size_64k_reqs = 48,
-	.dpp_output_buffer_pixels = 2560,
-	.opp_output_buffer_lines = 1,
-	.pixel_chunk_size_kbytes = 8,
-	.pte_enable = 1,
-	.max_page_table_levels = 2,
-	.pte_chunk_size_kbytes = 2,  // ?
-	.meta_chunk_size_kbytes = 2,
-	.writeback_chunk_size_kbytes = 8,
-	.line_buffer_size_bits = 789504,
-	.is_line_buffer_bpp_fixed = 0,  // ?
-	.line_buffer_fixed_bpp = 0,     // ?
-	.dcc_supported = true,
-	.writeback_interface_buffer_size_kbytes = 90,
-	.writeback_line_buffer_buffer_size = 0,
-	.max_line_buffer_lines = 12,
-	.writeback_luma_buffer_size_kbytes = 12,  // writeback_line_buffer_buffer_size = 656640
-	.writeback_chroma_buffer_size_kbytes = 8,
-	.writeback_chroma_line_buffer_width_pixels = 4,
-	.writeback_max_hscl_ratio = 1,
-	.writeback_max_vscl_ratio = 1,
-	.writeback_min_hscl_ratio = 1,
-	.writeback_min_vscl_ratio = 1,
-	.writeback_max_hscl_taps = 1,
-	.writeback_max_vscl_taps = 1,
-	.writeback_line_buffer_luma_buffer_size = 0,
-	.writeback_line_buffer_chroma_buffer_size = 14643,
-	.cursor_buffer_size = 8,
-	.cursor_chunk_size = 2,
-	.max_num_otg = 6,
-	.max_num_dpp = 6,
-	.max_num_wb = 1,
-	.max_dchub_pscl_bw_pix_per_clk = 4,
-	.max_pscl_lb_bw_pix_per_clk = 2,
-	.max_lb_vscl_bw_pix_per_clk = 4,
-	.max_vscl_hscl_bw_pix_per_clk = 4,
-	.max_hscl_ratio = 6,
-	.max_vscl_ratio = 6,
-	.hscl_mults = 4,
-	.vscl_mults = 4,
-	.max_hscl_taps = 8,
-	.max_vscl_taps = 8,
-	.dispclk_ramp_margin_percent = 1,
-	.underscan_factor = 1.11,
-	.min_vblank_lines = 32,
-	.dppclk_delay_subtotal = 46,
-	.dynamic_metadata_vm_enabled = true,
-	.dppclk_delay_scl_lb_only = 16,
-	.dppclk_delay_scl = 50,
-	.dppclk_delay_cnvc_formatter = 27,
-	.dppclk_delay_cnvc_cursor = 6,
-	.dispclk_delay_subtotal = 119,
-	.dcfclk_cstate_latency = 5.2, // SRExitTime
-	.max_inter_dcn_tile_repeaters = 8,
-	.odm_combine_4to1_supported = true,
-
-	.xfc_supported = false,
-	.xfc_fill_bw_overhead_percent = 10.0,
-	.xfc_fill_constant_bytes = 0,
-	.gfx7_compat_tiling_supported = 0,
-	.number_of_cursors = 1,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
-	.clock_limits = {
-			{
-				.state = 0,
-				.dispclk_mhz = 562.0,
-				.dppclk_mhz = 300.0,
-				.phyclk_mhz = 300.0,
-				.phyclk_d18_mhz = 667.0,
-				.dscclk_mhz = 405.6,
-			},
-		},
-	.min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
-	.num_states = 1,
-	.sr_exit_time_us = 15.5,
-	.sr_enter_plus_exit_time_us = 20,
-	.urgent_latency_us = 4.0,
-	.urgent_latency_pixel_data_only_us = 4.0,
-	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
-	.urgent_latency_vm_data_only_us = 4.0,
-	.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
-	.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
-	.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
-	.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
-	.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
-	.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
-	.max_avg_sdp_bw_use_normal_percent = 60.0,
-	.max_avg_dram_bw_use_normal_percent = 40.0,
-	.writeback_latency_us = 12.0,
-	.max_request_size_bytes = 256,
-	.fabric_datapath_to_dcn_data_return_bytes = 64,
-	.dcn_downspread_percent = 0.5,
-	.downspread_percent = 0.38,
-	.dram_page_open_time_ns = 50.0,
-	.dram_rw_turnaround_time_ns = 17.5,
-	.dram_return_buffer_per_channel_bytes = 8192,
-	.round_trip_ping_latency_dcfclk_cycles = 191,
-	.urgent_out_of_order_return_per_channel_bytes = 4096,
-	.channel_interleave_bytes = 256,
-	.num_banks = 8,
-	.gpuvm_min_page_size_bytes = 4096,
-	.hostvm_min_page_size_bytes = 4096,
-	.dram_clock_change_latency_us = 404,
-	.dummy_pstate_latency_us = 5,
-	.writeback_dram_clock_change_latency_us = 23.0,
-	.return_bus_width_bytes = 64,
-	.dispclk_dppclk_vco_speed_mhz = 3650,
-	.xfc_bus_transport_time_us = 20,      // ?
-	.xfc_xbuf_latency_tolerance_us = 4,  // ?
-	.use_urgent_burst_bw = 1,            // ?
-	.do_urgent_latency_adjustment = true,
-	.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
-	.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
-};
-
 enum dcn30_clk_src_array_id {
 	DCN30_CLK_SRC_PLL0,
 	DCN30_CLK_SRC_PLL1,
@@ -1480,90 +1350,9 @@ int dcn30_populate_dml_pipes_from_context(
 void dcn30_populate_dml_writeback_from_context(
 	struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
 {
-	int pipe_cnt, i, j;
-	double max_calc_writeback_dispclk;
-	double writeback_dispclk;
-	struct writeback_st dout_wb;
-
-	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
-		struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
-
-		if (!stream)
-			continue;
-		max_calc_writeback_dispclk = 0;
-
-		/* Set writeback information */
-		pipes[pipe_cnt].dout.wb_enable = 0;
-		pipes[pipe_cnt].dout.num_active_wb = 0;
-		for (j = 0; j < stream->num_wb_info; j++) {
-			struct dc_writeback_info *wb_info = &stream->writeback_info[j];
-
-			if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
-					(wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
-				pipes[pipe_cnt].dout.wb_enable = 1;
-				pipes[pipe_cnt].dout.num_active_wb++;
-				dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
-					wb_info->dwb_params.cnv_params.crop_height :
-					wb_info->dwb_params.cnv_params.src_height;
-				dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
-					wb_info->dwb_params.cnv_params.crop_width :
-					wb_info->dwb_params.cnv_params.src_width;
-				dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
-				dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
-
-				/* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */
-				if (dc->dml.ip.writeback_max_hscl_taps > 1) {
-					dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
-					dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
-				} else {
-					dout_wb.wb_htaps_luma = 1;
-					dout_wb.wb_vtaps_luma = 1;
-				}
-				dout_wb.wb_htaps_chroma = 0;
-				dout_wb.wb_vtaps_chroma = 0;
-				dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
-					(double)wb_info->dwb_params.cnv_params.crop_width /
-						(double)wb_info->dwb_params.dest_width :
-					(double)wb_info->dwb_params.cnv_params.src_width /
-						(double)wb_info->dwb_params.dest_width;
-				dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
-					(double)wb_info->dwb_params.cnv_params.crop_height /
-						(double)wb_info->dwb_params.dest_height :
-					(double)wb_info->dwb_params.cnv_params.src_height /
-						(double)wb_info->dwb_params.dest_height;
-				if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
-					wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
-					dout_wb.wb_pixel_format = dm_444_64;
-				else
-					dout_wb.wb_pixel_format = dm_444_32;
-
-				/* Workaround for cases where multiple writebacks are connected to same plane
-				 * In which case, need to compute worst case and set the associated writeback parameters
-				 * This workaround is necessary due to DML computation assuming only 1 set of writeback
-				 * parameters per pipe
-				 */
-				writeback_dispclk = dml30_CalculateWriteBackDISPCLK(
-						dout_wb.wb_pixel_format,
-						pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
-						dout_wb.wb_hratio,
-						dout_wb.wb_vratio,
-						dout_wb.wb_htaps_luma,
-						dout_wb.wb_vtaps_luma,
-						dout_wb.wb_src_width,
-						dout_wb.wb_dst_width,
-						pipes[pipe_cnt].pipe.dest.htotal,
-						dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size);
-
-				if (writeback_dispclk > max_calc_writeback_dispclk) {
-					max_calc_writeback_dispclk = writeback_dispclk;
-					pipes[pipe_cnt].dout.wb = dout_wb;
-				}
-			}
-		}
-
-		pipe_cnt++;
-	}
-
+	DC_FP_START();
+	dcn30_fpu_populate_dml_writeback_from_context(dc, res_ctx, pipes);
+	DC_FP_END();
 }
 
 unsigned int dcn30_calc_max_scaled_time(
@@ -1598,7 +1387,7 @@ void dcn30_set_mcif_arb_params(
 	enum mmhubbub_wbif_mode wbif_mode;
 	struct display_mode_lib *dml = &context->bw_ctx.dml;
 	struct mcif_arb_params *wb_arb_params;
-	int i, j, k, dwb_pipe;
+	int i, j, dwb_pipe;
 
 	/* Writeback MCIF_WB arbitration parameters */
 	dwb_pipe = 0;
@@ -1622,17 +1411,15 @@ void dcn30_set_mcif_arb_params(
 			else
 				wbif_mode = PACKED_444;
 
-			for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) {
-				wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
-				wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
-			}
+			DC_FP_START();
+			dcn30_fpu_set_mcif_arb_params(wb_arb_params, dml, pipes, pipe_cnt, j);
+			DC_FP_END();
 			wb_arb_params->time_per_pixel = (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* time_per_pixel should be in u6.6 format */
 			wb_arb_params->slice_lines = 32;
 			wb_arb_params->arbitration_slice = 2; /* irrelevant since there is no YUV output */
 			wb_arb_params->max_scaled_time = dcn30_calc_max_scaled_time(wb_arb_params->time_per_pixel,
 					wbif_mode,
 					wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */
-			wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[j] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
 
 			dwb_pipe++;
 
@@ -2111,178 +1898,11 @@ validate_out:
 	return out;
 }
 
-/*
- * This must be noinline to ensure anything that deals with FP registers
- * is contained within this call; previously our compiling with hard-float
- * would result in fp instructions being emitted outside of the boundaries
- * of the DC_FP_START/END macros, which makes sense as the compiler has no
- * idea about what is wrapped and what is not
- *
- * This is largely just a workaround to avoid breakage introduced with 5.6,
- * ideally all fp-using code should be moved into its own file, only that
- * should be compiled with hard-float, and all code exported from there
- * should be strictly wrapped with DC_FP_START/END
- */
-static noinline void dcn30_calculate_wm_and_dlg_fp(
-		struct dc *dc, struct dc_state *context,
-		display_e2e_pipe_params_st *pipes,
-		int pipe_cnt,
-		int vlevel)
+void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
 {
-	int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
-	int i, pipe_idx;
-	double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
-	bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
-
-	if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
-		dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
-
-	pipes[0].clks_cfg.voltage = vlevel;
-	pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
-	pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
-
-	/* Set B:
-	 * DCFCLK: 1GHz or min required above 1GHz
-	 * FCLK/UCLK: Max
-	 */
-	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
-		if (vlevel == 0) {
-			pipes[0].clks_cfg.voltage = 1;
-			pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
-		}
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
-		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
-		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
-	}
-	context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
-	pipes[0].clks_cfg.voltage = vlevel;
-	pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
-
-	/* Set D:
-	 * DCFCLK: Min Required
-	 * FCLK(proportional to UCLK): 1GHz or Max
-	 * MALL stutter, sr_enter_exit = 4, sr_exit = 2us
-	 */
-	/*
-	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
-		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
-		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
-	}
-	context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	*/
-
-	/* Set C:
-	 * DCFCLK: Min Required
-	 * FCLK(proportional to UCLK): 1GHz or Max
-	 * pstate latency overridden to 5us
-	 */
-	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
-		unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
-		unsigned int min_dram_speed_mts_margin = 160;
-
-		if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)
-			min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
-
-		/* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
-		for (i = 3; i > 0; i--)
-			if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
-				break;
-
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
-		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
-		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
-	}
-
-	context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
-	if (!pstate_en) {
-		/* The only difference between A and C is p-state latency, if p-state is not supported we want to
-		 * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark
-		 */
-		context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
-		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
-	} else {
-		/* Set A:
-		 * DCFCLK: Min Required
-		 * FCLK(proportional to UCLK): 1GHz or Max
-		 *
-		 * Set A calculated last so that following calculations are based on Set A
-		 */
-		dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
-		context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	}
-
-	context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
-
-	/* Make set D = set A until set D is enabled */
-	context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
-
-	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-		if (!context->res_ctx.pipe_ctx[i].stream)
-			continue;
-
-		pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
-		pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-
-		if (dc->config.forced_clocks) {
-			pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
-			pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
-		}
-		if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
-			pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
-		if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
-			pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
-
-		pipe_idx++;
-	}
-
 	DC_FP_START();
-	dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+	dcn30_fpu_update_soc_for_wm_a(dc, context);
 	DC_FP_END();
-
-	if (!pstate_en)
-		/* Restore full p-state latency */
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us =
-				dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
-}
-
-void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
-{
-	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
-		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
-		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
-	}
 }
 
 void dcn30_calculate_wm_and_dlg(
@@ -2292,7 +1912,7 @@ void dcn30_calculate_wm_and_dlg(
 		int vlevel)
 {
 	DC_FP_START();
-	dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
+	dcn30_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel);
 	DC_FP_END();
 }
 
@@ -2351,40 +1971,6 @@ validate_out:
 	return out;
 }
 
-/*
- * This must be noinline to ensure anything that deals with FP registers
- * is contained within this call; previously our compiling with hard-float
- * would result in fp instructions being emitted outside of the boundaries
- * of the DC_FP_START/END macros, which makes sense as the compiler has no
- * idea about what is wrapped and what is not
- *
- * This is largely just a workaround to avoid breakage introduced with 5.6,
- * ideally all fp-using code should be moved into its own file, only that
- * should be compiled with hard-float, and all code exported from there
- * should be strictly wrapped with DC_FP_START/END
- */
-static noinline void dcn30_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
-		unsigned int *optimal_dcfclk,
-		unsigned int *optimal_fclk)
-{
-       double bw_from_dram, bw_from_dram1, bw_from_dram2;
-
-       bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans *
-		dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100);
-       bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans *
-		dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100);
-
-       bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
-
-       if (optimal_fclk)
-               *optimal_fclk = bw_from_dram /
-               (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
-
-       if (optimal_dcfclk)
-               *optimal_dcfclk =  bw_from_dram /
-               (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
-}
-
 void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
 {
 	unsigned int i, j;
@@ -2399,47 +1985,43 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
 	unsigned int num_dcfclk_sta_targets = 4;
 	unsigned int num_uclk_states;
 
+	struct dc_bounding_box_max_clk dcn30_bb_max_clk;
+
+	memset(&dcn30_bb_max_clk, 0, sizeof(dcn30_bb_max_clk));
+
 	if (dc->ctx->dc_bios->vram_info.num_chans)
 		dcn3_0_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
 
-	if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
-		dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
-
-	dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
-	dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+	DC_FP_START();
+	dcn30_fpu_update_dram_channel_width_bytes(dc);
+	DC_FP_END();
 
 	if (bw_params->clk_table.entries[0].memclk_mhz) {
-		int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
 
 		for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
-			if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
-				max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
-			if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
-				max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
-			if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
-				max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
-			if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
-				max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+			if (bw_params->clk_table.entries[i].dcfclk_mhz > dcn30_bb_max_clk.max_dcfclk_mhz)
+				dcn30_bb_max_clk.max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+			if (bw_params->clk_table.entries[i].dispclk_mhz > dcn30_bb_max_clk.max_dispclk_mhz)
+				dcn30_bb_max_clk.max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+			if (bw_params->clk_table.entries[i].dppclk_mhz > dcn30_bb_max_clk.max_dppclk_mhz)
+				dcn30_bb_max_clk.max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+			if (bw_params->clk_table.entries[i].phyclk_mhz > dcn30_bb_max_clk.max_phyclk_mhz)
+				dcn30_bb_max_clk.max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
 		}
 
-		if (!max_dcfclk_mhz)
-			max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
-		if (!max_dispclk_mhz)
-			max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
-		if (!max_dppclk_mhz)
-			max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
-		if (!max_phyclk_mhz)
-			max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
+		DC_FP_START();
+		dcn30_fpu_update_max_clk(&dcn30_bb_max_clk);
+		DC_FP_END();
 
-		if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+		if (dcn30_bb_max_clk.max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
 			// If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
-			dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+			dcfclk_sta_targets[num_dcfclk_sta_targets] = dcn30_bb_max_clk.max_dcfclk_mhz;
 			num_dcfclk_sta_targets++;
-		} else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+		} else if (dcn30_bb_max_clk.max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
 			// If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
 			for (i = 0; i < num_dcfclk_sta_targets; i++) {
-				if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
-					dcfclk_sta_targets[i] = max_dcfclk_mhz;
+				if (dcfclk_sta_targets[i] > dcn30_bb_max_clk.max_dcfclk_mhz) {
+					dcfclk_sta_targets[i] = dcn30_bb_max_clk.max_dcfclk_mhz;
 					break;
 				}
 			}
@@ -2452,7 +2034,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
 		// Calculate optimal dcfclk for each uclk
 		for (i = 0; i < num_uclk_states; i++) {
 			DC_FP_START();
-			dcn30_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+			dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
 					&optimal_dcfclk_for_uclk[i], NULL);
 			DC_FP_END();
 			if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
@@ -2479,7 +2061,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
 				dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
 				dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
 			} else {
-				if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+				if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) {
 					dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
 					dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
 				} else {
@@ -2494,33 +2076,15 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
 		}
 
 		while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
-				optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+				optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) {
 			dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
 			dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
 		}
 
 		dcn3_0_soc.num_states = num_states;
-		for (i = 0; i < dcn3_0_soc.num_states; i++) {
-			dcn3_0_soc.clock_limits[i].state = i;
-			dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
-			dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
-			dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
-
-			/* Fill all states with max values of all other clocks */
-			dcn3_0_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
-			dcn3_0_soc.clock_limits[i].dppclk_mhz  = max_dppclk_mhz;
-			dcn3_0_soc.clock_limits[i].phyclk_mhz  = max_phyclk_mhz;
-			dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
-			/* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
-			/* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
-			dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz;
-			dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz;
-			dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz;
-		}
-		/* re-init DML with updated bb */
-		dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
-		if (dc->current_state)
-			dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
+		DC_FP_START();
+		dcn30_fpu_update_bw_bounding_box(dc, bw_params, &dcn30_bb_max_clk, dcfclk_mhz, dram_speed_mts);
+		DC_FP_END();
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
index b92e4cc0232f..3330a1026fa5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
@@ -35,6 +35,9 @@ struct dc;
 struct resource_pool;
 struct _vcs_dpi_display_pipe_params_st;
 
+extern struct _vcs_dpi_ip_params_st dcn3_0_ip;
+extern struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc;
+
 struct dcn30_resource_pool {
 	struct resource_pool base;
 };
@@ -96,4 +99,6 @@ enum dc_status dcn30_add_stream_to_ctx(
 
 void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
 
+void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
+
 #endif /* _DCN30_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index 4daf8931aa7c..a5df74110284 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -81,6 +81,8 @@
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
 
+#include "dml/dcn30/dcn30_fpu.h"
+
 #include "dml/dcn30/display_mode_vba_30.h"
 #include "dml/dcn301/dcn301_fpu.h"
 #include "vm_helper.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
index f0938653bb88..f537888f4fa6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
@@ -43,6 +43,8 @@
 #include "dcn20/dcn20_dsc.h"
 #include "dcn20/dcn20_resource.h"
 
+#include "dml/dcn30/dcn30_fpu.h"
+
 #include "dcn10/dcn10_resource.h"
 
 #include "dce/dce_abm.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 4fcbc0502808..76f863eb86ef 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -25,6 +25,8 @@
 #include "dcn20/dcn20_dsc.h"
 #include "dcn20/dcn20_resource.h"
 
+#include "dml/dcn30/dcn30_fpu.h"
+
 #include "dcn10/dcn10_resource.h"
 
 #include "dc_link_ddc.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index ccf1b71a8269..3d9f07d4770b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -36,6 +36,8 @@
 #include "dcn20/dcn20_resource.h"
 #include "dcn30/dcn30_resource.h"
 
+#include "dml/dcn30/dcn30_fpu.h"
+
 #include "dcn10/dcn10_ipp.h"
 #include "dcn30/dcn30_hubbub.h"
 #include "dcn31/dcn31_hubbub.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index ee911452c048..a64b88ca01a9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -71,6 +71,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
@@ -113,7 +114,7 @@ DML += dcn20/dcn20_fpu.o
 DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o
 DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
 DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
-DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
+DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
 DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o
 DML += dcn31/dcn31_fpu.o
 DML += dcn301/dcn301_fpu.o
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
new file mode 100644
index 000000000000..574676a0711a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
@@ -0,0 +1,617 @@
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "resource.h"
+#include "clk_mgr.h"
+#include "reg_helper.h"
+#include "dcn_calc_math.h"
+#include "dcn20/dcn20_resource.h"
+#include "dcn30/dcn30_resource.h"
+
+
+#include "display_mode_vba_30.h"
+#include "dcn30_fpu.h"
+
+#define REG(reg)\
+	optc1->tg_regs->reg
+
+#define CTX \
+	optc1->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+	optc1->tg_shift->field_name, optc1->tg_mask->field_name
+
+
+struct _vcs_dpi_ip_params_st dcn3_0_ip = {
+	.use_min_dcfclk = 0,
+	.clamp_min_dcfclk = 0,
+	.odm_capable = 1,
+	.gpuvm_enable = 0,
+	.hostvm_enable = 0,
+	.gpuvm_max_page_table_levels = 4,
+	.hostvm_max_page_table_levels = 4,
+	.hostvm_cached_page_table_levels = 0,
+	.pte_group_size_bytes = 2048,
+	.num_dsc = 6,
+	.rob_buffer_size_kbytes = 184,
+	.det_buffer_size_kbytes = 184,
+	.dpte_buffer_size_in_pte_reqs_luma = 84,
+	.pde_proc_buffer_size_64k_reqs = 48,
+	.dpp_output_buffer_pixels = 2560,
+	.opp_output_buffer_lines = 1,
+	.pixel_chunk_size_kbytes = 8,
+	.pte_enable = 1,
+	.max_page_table_levels = 2,
+	.pte_chunk_size_kbytes = 2,  // ?
+	.meta_chunk_size_kbytes = 2,
+	.writeback_chunk_size_kbytes = 8,
+	.line_buffer_size_bits = 789504,
+	.is_line_buffer_bpp_fixed = 0,  // ?
+	.line_buffer_fixed_bpp = 0,     // ?
+	.dcc_supported = true,
+	.writeback_interface_buffer_size_kbytes = 90,
+	.writeback_line_buffer_buffer_size = 0,
+	.max_line_buffer_lines = 12,
+	.writeback_luma_buffer_size_kbytes = 12,  // writeback_line_buffer_buffer_size = 656640
+	.writeback_chroma_buffer_size_kbytes = 8,
+	.writeback_chroma_line_buffer_width_pixels = 4,
+	.writeback_max_hscl_ratio = 1,
+	.writeback_max_vscl_ratio = 1,
+	.writeback_min_hscl_ratio = 1,
+	.writeback_min_vscl_ratio = 1,
+	.writeback_max_hscl_taps = 1,
+	.writeback_max_vscl_taps = 1,
+	.writeback_line_buffer_luma_buffer_size = 0,
+	.writeback_line_buffer_chroma_buffer_size = 14643,
+	.cursor_buffer_size = 8,
+	.cursor_chunk_size = 2,
+	.max_num_otg = 6,
+	.max_num_dpp = 6,
+	.max_num_wb = 1,
+	.max_dchub_pscl_bw_pix_per_clk = 4,
+	.max_pscl_lb_bw_pix_per_clk = 2,
+	.max_lb_vscl_bw_pix_per_clk = 4,
+	.max_vscl_hscl_bw_pix_per_clk = 4,
+	.max_hscl_ratio = 6,
+	.max_vscl_ratio = 6,
+	.hscl_mults = 4,
+	.vscl_mults = 4,
+	.max_hscl_taps = 8,
+	.max_vscl_taps = 8,
+	.dispclk_ramp_margin_percent = 1,
+	.underscan_factor = 1.11,
+	.min_vblank_lines = 32,
+	.dppclk_delay_subtotal = 46,
+	.dynamic_metadata_vm_enabled = true,
+	.dppclk_delay_scl_lb_only = 16,
+	.dppclk_delay_scl = 50,
+	.dppclk_delay_cnvc_formatter = 27,
+	.dppclk_delay_cnvc_cursor = 6,
+	.dispclk_delay_subtotal = 119,
+	.dcfclk_cstate_latency = 5.2, // SRExitTime
+	.max_inter_dcn_tile_repeaters = 8,
+	.max_num_hdmi_frl_outputs = 1,
+	.odm_combine_4to1_supported = true,
+
+	.xfc_supported = false,
+	.xfc_fill_bw_overhead_percent = 10.0,
+	.xfc_fill_constant_bytes = 0,
+	.gfx7_compat_tiling_supported = 0,
+	.number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
+	.clock_limits = {
+			{
+				.state = 0,
+				.dispclk_mhz = 562.0,
+				.dppclk_mhz = 300.0,
+				.phyclk_mhz = 300.0,
+				.phyclk_d18_mhz = 667.0,
+				.dscclk_mhz = 405.6,
+			},
+		},
+
+	.min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
+	.num_states = 1,
+	.sr_exit_time_us = 15.5,
+	.sr_enter_plus_exit_time_us = 20,
+	.urgent_latency_us = 4.0,
+	.urgent_latency_pixel_data_only_us = 4.0,
+	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+	.urgent_latency_vm_data_only_us = 4.0,
+	.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+	.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+	.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+	.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
+	.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+	.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+	.max_avg_sdp_bw_use_normal_percent = 60.0,
+	.max_avg_dram_bw_use_normal_percent = 40.0,
+	.writeback_latency_us = 12.0,
+	.max_request_size_bytes = 256,
+	.fabric_datapath_to_dcn_data_return_bytes = 64,
+	.dcn_downspread_percent = 0.5,
+	.downspread_percent = 0.38,
+	.dram_page_open_time_ns = 50.0,
+	.dram_rw_turnaround_time_ns = 17.5,
+	.dram_return_buffer_per_channel_bytes = 8192,
+	.round_trip_ping_latency_dcfclk_cycles = 191,
+	.urgent_out_of_order_return_per_channel_bytes = 4096,
+	.channel_interleave_bytes = 256,
+	.num_banks = 8,
+	.gpuvm_min_page_size_bytes = 4096,
+	.hostvm_min_page_size_bytes = 4096,
+	.dram_clock_change_latency_us = 404,
+	.dummy_pstate_latency_us = 5,
+	.writeback_dram_clock_change_latency_us = 23.0,
+	.return_bus_width_bytes = 64,
+	.dispclk_dppclk_vco_speed_mhz = 3650,
+	.xfc_bus_transport_time_us = 20,      // ?
+	.xfc_xbuf_latency_tolerance_us = 4,  // ?
+	.use_urgent_burst_bw = 1,            // ?
+	.do_urgent_latency_adjustment = true,
+	.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+	.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+
+void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
+		double vtotal_avg)
+{
+struct optc *optc1 = DCN10TG_FROM_TG(optc);
+	double vtotal_min, vtotal_max;
+	double ratio, modulo, phase;
+	uint32_t vblank_start;
+	uint32_t v_total_mask_value = 0;
+
+	dc_assert_fp_enabled();
+
+	/* Compute VTOTAL_MIN and VTOTAL_MAX, so that
+	 * VOTAL_MAX - VTOTAL_MIN = 1
+	 */
+	v_total_mask_value = 16;
+	vtotal_min = dcn_bw_floor(vtotal_avg);
+	vtotal_max = dcn_bw_ceil(vtotal_avg);
+
+	/* Check that bottom VBLANK is at least 2 lines tall when running with
+	 * VTOTAL_MIN. Note that VTOTAL registers are defined as 'total number
+	 * of lines in a frame - 1'.
+	 */
+	REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START,
+		&vblank_start);
+	ASSERT(vtotal_min >= vblank_start + 1);
+
+	/* Special case where the average frame rate can be achieved
+	 * without using the DTO
+	 */
+	if (vtotal_min == vtotal_max) {
+		REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
+
+		optc->funcs->set_vtotal_min_max(optc, 0, 0);
+		REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
+		REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
+		REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
+			OTG_V_TOTAL_MIN_SEL, 0,
+			OTG_V_TOTAL_MAX_SEL, 0,
+			OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
+		return;
+	}
+
+	ratio = vtotal_max - vtotal_avg;
+	modulo = 65536.0 * 65536.0 - 1.0; /* 2^32 - 1 */
+	phase = ratio * modulo;
+
+	/* Special cases where the DTO phase gets rounded to 0 or
+	 * to DTO modulo
+	 */
+	if (phase <= 0 || phase >= modulo) {
+		REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL,
+			phase <= 0 ?
+				(uint32_t)vtotal_max : (uint32_t)vtotal_min);
+		REG_SET(OTG_V_TOTAL_MIN, 0, OTG_V_TOTAL_MIN, 0);
+		REG_SET(OTG_V_TOTAL_MAX, 0, OTG_V_TOTAL_MAX, 0);
+		REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
+		REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
+		REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
+			OTG_V_TOTAL_MIN_SEL, 0,
+			OTG_V_TOTAL_MAX_SEL, 0,
+			OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
+		return;
+	}
+	REG_UPDATE_6(OTG_V_TOTAL_CONTROL,
+		OTG_V_TOTAL_MIN_SEL, 1,
+		OTG_V_TOTAL_MAX_SEL, 1,
+		OTG_SET_V_TOTAL_MIN_MASK_EN, 1,
+		OTG_SET_V_TOTAL_MIN_MASK, v_total_mask_value,
+		OTG_VTOTAL_MID_REPLACING_MIN_EN, 0,
+		OTG_VTOTAL_MID_REPLACING_MAX_EN, 0);
+	REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
+	optc->funcs->set_vtotal_min_max(optc, vtotal_min, vtotal_max);
+	REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, (uint32_t)phase);
+	REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, (uint32_t)modulo);
+}
+
+void dcn30_fpu_populate_dml_writeback_from_context(
+		struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
+{
+	int pipe_cnt, i, j;
+	double max_calc_writeback_dispclk;
+	double writeback_dispclk;
+	struct writeback_st dout_wb;
+
+	dc_assert_fp_enabled();
+
+	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+		struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
+
+		if (!stream)
+			continue;
+		max_calc_writeback_dispclk = 0;
+
+		/* Set writeback information */
+		pipes[pipe_cnt].dout.wb_enable = 0;
+		pipes[pipe_cnt].dout.num_active_wb = 0;
+		for (j = 0; j < stream->num_wb_info; j++) {
+			struct dc_writeback_info *wb_info = &stream->writeback_info[j];
+
+			if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
+					(wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
+				pipes[pipe_cnt].dout.wb_enable = 1;
+				pipes[pipe_cnt].dout.num_active_wb++;
+				dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
+					wb_info->dwb_params.cnv_params.crop_height :
+					wb_info->dwb_params.cnv_params.src_height;
+				dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
+					wb_info->dwb_params.cnv_params.crop_width :
+					wb_info->dwb_params.cnv_params.src_width;
+				dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
+				dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
+
+				/* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */
+				if (dc->dml.ip.writeback_max_hscl_taps > 1) {
+					dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
+					dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
+				} else {
+					dout_wb.wb_htaps_luma = 1;
+					dout_wb.wb_vtaps_luma = 1;
+				}
+				dout_wb.wb_htaps_chroma = 0;
+				dout_wb.wb_vtaps_chroma = 0;
+				dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
+					(double)wb_info->dwb_params.cnv_params.crop_width /
+						(double)wb_info->dwb_params.dest_width :
+					(double)wb_info->dwb_params.cnv_params.src_width /
+						(double)wb_info->dwb_params.dest_width;
+				dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
+					(double)wb_info->dwb_params.cnv_params.crop_height /
+						(double)wb_info->dwb_params.dest_height :
+					(double)wb_info->dwb_params.cnv_params.src_height /
+						(double)wb_info->dwb_params.dest_height;
+				if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
+					wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
+					dout_wb.wb_pixel_format = dm_444_64;
+				else
+					dout_wb.wb_pixel_format = dm_444_32;
+
+				/* Workaround for cases where multiple writebacks are connected to same plane
+				 * In which case, need to compute worst case and set the associated writeback parameters
+				 * This workaround is necessary due to DML computation assuming only 1 set of writeback
+				 * parameters per pipe
+				 */
+				writeback_dispclk = dml30_CalculateWriteBackDISPCLK(
+						dout_wb.wb_pixel_format,
+						pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
+						dout_wb.wb_hratio,
+						dout_wb.wb_vratio,
+						dout_wb.wb_htaps_luma,
+						dout_wb.wb_vtaps_luma,
+						dout_wb.wb_src_width,
+						dout_wb.wb_dst_width,
+						pipes[pipe_cnt].pipe.dest.htotal,
+						dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size);
+
+				if (writeback_dispclk > max_calc_writeback_dispclk) {
+					max_calc_writeback_dispclk = writeback_dispclk;
+					pipes[pipe_cnt].dout.wb = dout_wb;
+				}
+			}
+		}
+
+		pipe_cnt++;
+	}
+}
+
+void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
+	struct display_mode_lib *dml,
+	display_e2e_pipe_params_st *pipes,
+	int pipe_cnt,
+	int cur_pipe)
+{
+    int i;
+
+	dc_assert_fp_enabled();
+
+    for (i = 0; i < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); i++) {
+		wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
+		wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
+    }
+
+    wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
+}
+
+void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
+{
+
+dc_assert_fp_enabled();
+
+if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
+		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
+		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
+	}
+}
+
+void dcn30_fpu_calculate_wm_and_dlg(
+		struct dc *dc, struct dc_state *context,
+		display_e2e_pipe_params_st *pipes,
+		int pipe_cnt,
+		int vlevel)
+{
+int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+	int i, pipe_idx;
+	double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
+	bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
+
+dc_assert_fp_enabled();
+
+	if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
+		dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
+
+	pipes[0].clks_cfg.voltage = vlevel;
+	pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+	pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+	/* Set B:
+	 * DCFCLK: 1GHz or min required above 1GHz
+	 * FCLK/UCLK: Max
+	 */
+	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
+		if (vlevel == 0) {
+			pipes[0].clks_cfg.voltage = 1;
+			pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
+		}
+		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
+		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
+		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
+	}
+	context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+	pipes[0].clks_cfg.voltage = vlevel;
+	pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+
+	/* Set D:
+	 * DCFCLK: Min Required
+	 * FCLK(proportional to UCLK): 1GHz or Max
+	 * MALL stutter, sr_enter_exit = 4, sr_exit = 2us
+	 */
+	/*
+	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
+		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
+		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
+		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
+	}
+	context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	*/
+
+	/* Set C:
+	 * DCFCLK: Min Required
+	 * FCLK(proportional to UCLK): 1GHz or Max
+	 * pstate latency overridden to 5us
+	 */
+	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
+		unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+		unsigned int min_dram_speed_mts_margin = 160;
+
+		if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)
+			min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
+
+		/* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
+		for (i = 3; i > 0; i--)
+			if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
+				break;
+
+		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
+
+		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
+		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
+	}
+
+	context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+	if (!pstate_en) {
+		/* The only difference between A and C is p-state latency, if p-state is not supported we want to
+		 * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark
+		 */
+		context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
+		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
+	} else {
+		/* Set A:
+		 * DCFCLK: Min Required
+		 * FCLK(proportional to UCLK): 1GHz or Max
+		 *
+		 * Set A calculated last so that following calculations are based on Set A
+		 */
+		dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+		context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+		context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+		context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+		context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+		context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+	}
+
+	context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
+
+	/* Make set D = set A until set D is enabled */
+	context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
+
+	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+		if (!context->res_ctx.pipe_ctx[i].stream)
+			continue;
+
+		pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
+		pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+		if (dc->config.forced_clocks) {
+			pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+			pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+		}
+		if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
+			pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+		if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+			pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+		pipe_idx++;
+	}
+
+	DC_FP_START();
+	dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+	DC_FP_END();
+
+	if (!pstate_en)
+		/* Restore full p-state latency */
+		context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+				dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+
+}
+
+void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc)
+{
+	dc_assert_fp_enabled();
+
+	if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+		dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+}
+
+void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk)
+{
+		dc_assert_fp_enabled();
+
+		if (!dcn30_bb_max_clk->max_dcfclk_mhz)
+			dcn30_bb_max_clk->max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
+		if (!dcn30_bb_max_clk->max_dispclk_mhz)
+			dcn30_bb_max_clk->max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
+		if (!dcn30_bb_max_clk->max_dppclk_mhz)
+			dcn30_bb_max_clk->max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
+		if (!dcn30_bb_max_clk->max_phyclk_mhz)
+			dcn30_bb_max_clk->max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
+}
+
+void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+		unsigned int *optimal_dcfclk,
+		unsigned int *optimal_fclk)
+{
+	double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+	dc_assert_fp_enabled();
+
+	bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans *
+		dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100);
+	bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans *
+		dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+	bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+	if (optimal_fclk)
+		*optimal_fclk = bw_from_dram /
+		(dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+	if (optimal_dcfclk)
+		*optimal_dcfclk =  bw_from_dram /
+		(dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
+	struct clk_bw_params *bw_params,
+	struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
+	unsigned int *dcfclk_mhz,
+	unsigned int *dram_speed_mts)
+{
+	unsigned int i;
+
+	dc_assert_fp_enabled();
+
+	dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+	dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+	for (i = 0; i < dcn3_0_soc.num_states; i++) {
+		dcn3_0_soc.clock_limits[i].state = i;
+		dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+		dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+		dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+		/* Fill all states with max values of all other clocks */
+		dcn3_0_soc.clock_limits[i].dispclk_mhz = dcn30_bb_max_clk->max_dispclk_mhz;
+		dcn3_0_soc.clock_limits[i].dppclk_mhz  = dcn30_bb_max_clk->max_dppclk_mhz;
+		dcn3_0_soc.clock_limits[i].phyclk_mhz  = dcn30_bb_max_clk->max_phyclk_mhz;
+		dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
+		/* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
+		/* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
+		dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz;
+		dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz;
+		dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz;
+	}
+	/* re-init DML with updated bb */
+	dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
+	if (dc->current_state)
+		dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
+
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
new file mode 100644
index 000000000000..dedfe7b5f173
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN30_FPU_H__
+#define __DCN30_FPU_H__
+
+#include "core_types.h"
+#include "dcn20/dcn20_optc.h"
+
+void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
+		double vtotal_avg);
+
+void dcn30_fpu_populate_dml_writeback_from_context(
+		struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes);
+
+void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
+	struct display_mode_lib *dml,
+	display_e2e_pipe_params_st *pipes,
+	int pipe_cnt,
+	int cur_pipe);
+
+void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
+
+void dcn30_fpu_calculate_wm_and_dlg(
+		struct dc *dc, struct dc_state *context,
+		display_e2e_pipe_params_st *pipes,
+		int pipe_cnt,
+		int vlevel);
+
+void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc);
+
+void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk);
+
+void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+		unsigned int *optimal_dcfclk,
+		unsigned int *optimal_fclk);
+
+void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
+	struct clk_bw_params *bw_params,
+	struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
+	unsigned int *dcfclk_mhz,
+	unsigned int *dram_speed_mts);
+
+
+#endif /* __DCN30_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 26f3a55c35d7..555d4d9e1454 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -486,4 +486,11 @@ struct dc_state {
 	} perf_params;
 };
 
+struct dc_bounding_box_max_clk {
+	int max_dcfclk_mhz;
+	int max_dispclk_mhz;
+	int max_dppclk_mhz;
+	int max_phyclk_mhz;
+};
+
 #endif /* _CORE_TYPES_H_ */
commit 66a197203794339b028eedfa880bff9367fce783
Author: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>
Date:   Thu May 5 16:50:42 2022 -0400

    drm/amd/display: Check zero planes for OTG disable W/A on clock change
    
    [Why]
    A display clock change hang can occur when switching between DIO and HPO
    enabled modes during the optimize_bandwidth in dc_commit_state_no_check
    call.
    
    This happens when going from 4k120 8bpc 420 to 4k144 10bpc 444.
    
    Display clock in the DIO case is 1200MHz, but pixel rate is 600MHz
    because the pixel format is 420.
    
    Display clock in the HPO case is less (800MHz?) because of ODM combine
    which results in a smaller divider.
    
    The DIO is still active in prepare but not active in the optimize which
    results in the hang occuring.
    
    During this change there are no planes on the stream so it's safe to
    apply the workaround, but dpms_off = false and signal type is not
    virtual.
    
    [How]
    Check for plane_count == 0, no planes on the stream.
    
    It's easiest to check pipe->plane_state == NULL as an equivalent check
    rather than trying to search for the stream status in the context
    associated with the stream, so let's do that.
    
    The primary, non MPO pipe should not have a NULL plane state.
    
    Reviewed-by: Dmytro Laktyushkin <Dmytro.Laktyushkin at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index 27501b735a9c..a2ade6e93f5e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -91,7 +91,8 @@ static void dcn315_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable)
 
 		if (pipe->top_pipe || pipe->prev_odm_pipe)
 			continue;
-		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
+		if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
+				     dc_is_virtual_signal(pipe->stream->signal))) {
 			if (disable)
 				pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 			else
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 3121dd2d2a91..fc3af81ed6c6 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -122,7 +122,8 @@ static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable)
 
 		if (pipe->top_pipe || pipe->prev_odm_pipe)
 			continue;
-		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
+		if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
+				     dc_is_virtual_signal(pipe->stream->signal))) {
 			if (disable)
 				pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 			else
commit ab144f0b4ad615b86934ce9cbdd27b23f65ba3a4
Author: Derek Lai <Derek.Lai at amd.com>
Date:   Thu May 5 17:59:49 2022 +0800

    drm/amd/display: Allow individual control of eDP hotplug support
    
    [Why]
    Second eDP can send display off notification through HPD
    but DC isn't hooked up to handle. Some primary eDP panels
    will toggle on/off incorrectly if it's enabled generically.
    
    [How]
    Extend the debug option to allow individually enabling hotplug
    either the first eDP or the second eDP in a dual eDP system.
    
    Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Derek Lai <Derek.Lai at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index b40abd2bf7f6..a789ea8af27f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1605,8 +1605,25 @@ static bool dc_link_construct_legacy(struct dc_link *link,
 		if (link->hpd_gpio) {
 			if (!link->dc->config.allow_edp_hotplug_detection)
 				link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
-			link->irq_source_hpd_rx =
-					dal_irq_get_rx_source(link->hpd_gpio);
+
+			switch (link->dc->config.allow_edp_hotplug_detection) {
+			case 1: // only the 1st eDP handles hotplug
+				if (link->link_index == 0)
+					link->irq_source_hpd_rx =
+						dal_irq_get_rx_source(link->hpd_gpio);
+				else
+					link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
+				break;
+			case 2: // only the 2nd eDP handles hotplug
+				if (link->link_index == 1)
+					link->irq_source_hpd_rx =
+						dal_irq_get_rx_source(link->hpd_gpio);
+				else
+					link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
+				break;
+			default:
+				break;
+			}
 		}
 
 		break;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 26c24db8f1da..7cfc04a8ef15 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -329,7 +329,7 @@ struct dc_config {
 	bool disable_dmcu;
 	bool enable_4to1MPC;
 	bool enable_windowed_mpo_odm;
-	bool allow_edp_hotplug_detection;
+	uint32_t allow_edp_hotplug_detection;
 	bool clamp_min_dcfclk;
 	uint64_t vblank_alignment_dto_params;
 	uint8_t  vblank_alignment_max_frame_time_diff;
commit 49947b906a6bd9668eaf4f9cf691973c25c26955
Author: David Galiffi <David.Galiffi at amd.com>
Date:   Tue May 3 18:30:25 2022 -0400

    drm/amd/display: Check if modulo is 0 before dividing.
    
    [How & Why]
    If a value of 0 is read, then this will cause a divide-by-0 panic.
    
    Reviewed-by: Martin Leung <Martin.Leung at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: David Galiffi <David.Galiffi at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 5e6fea85a7b5..845aa8a1027d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -1101,9 +1101,12 @@ static bool get_pixel_clk_frequency_100hz(
 			 * not be programmed equal to DPREFCLK
 			 */
 			modulo_hz = REG_READ(MODULO[inst]);
-			*pixel_clk_khz = div_u64((uint64_t)clock_hz*
-				clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
-				modulo_hz);
+			if (modulo_hz)
+				*pixel_clk_khz = div_u64((uint64_t)clock_hz*
+					clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
+					modulo_hz);
+			else
+				*pixel_clk_khz = 0;
 		} else {
 			/* NOTE: There is agreement with VBIOS here that MODULO is
 			 * programmed equal to DPREFCLK, in which case PHASE will be
commit 3f69ee66f507a9e1180fd3a67b43807fae9b0e37
Author: Paul Hsieh <paul.hsieh at amd.com>
Date:   Tue May 3 14:26:41 2022 +0800

    drm/amd/display: clear request when release aux engine
    
    [Why]
    when driver and dmub request aux engine at the same time,
    dmub grant the aux engine but driver fail. Then driver
    release aux engine but doesn't clear the request bit.
    Then aux engine will be occupied by driver forever.
    
    [How]
    When driver release aux engine, clear request bit as well.
    
    Reviewed-by: Anthony Koo <Anthony.Koo at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Paul Hsieh <paul.hsieh at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index 29e20d92b0bb..9e39cd7b203e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -87,7 +87,8 @@ static void release_engine(
 
 	engine->ddc = NULL;
 
-	REG_UPDATE(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1);
+	REG_UPDATE_2(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1,
+		AUX_SW_USE_AUX_REG_REQ, 0);
 }
 
 #define SW_CAN_ACCESS_AUX 1
commit 903940b0b7c7f48e9743c65ae7cd65267083539f
Author: Alvin Lee <Alvin.Lee2 at amd.com>
Date:   Mon May 2 15:04:31 2022 -0400

    drm/amd/display: Clean up code in dc
    
    [Why & How]
    Code clean up in dc.
    
    Reviewed-by: Jun Lei <Jun.Lei at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Alvin Lee <Alvin.Lee2 at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index e41a48f596a3..f14449401188 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2901,14 +2901,15 @@ static void commit_planes_for_stream(struct dc *dc,
 						top_pipe_to_program->stream_res.tg);
 		}
 
-	if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
+	if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
 		dc->hwss.interdependent_update_lock(dc, context, true);
-	else
+	} else {
 		/* Lock the top pipe while updating plane addrs, since freesync requires
 		 *  plane addr update event triggers to be synchronized.
 		 *  top_pipe_to_program is expected to never be NULL
 		 */
 		dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true);
+	}
 
 	// Stream updates
 	if (stream_update)
@@ -2924,10 +2925,11 @@ static void commit_planes_for_stream(struct dc *dc,
 		if (dc->hwss.program_front_end_for_ctx)
 			dc->hwss.program_front_end_for_ctx(dc, context);
 
-		if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
+		if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
 			dc->hwss.interdependent_update_lock(dc, context, false);
-		else
+		} else {
 			dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
+		}
 		dc->hwss.post_unlock_program_front_end(dc, context);
 		return;
 	}
@@ -3052,10 +3054,11 @@ static void commit_planes_for_stream(struct dc *dc,
 
 	}
 
-	if (should_lock_all_pipes && dc->hwss.interdependent_update_lock)
+	if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
 		dc->hwss.interdependent_update_lock(dc, context, false);
-	else
+	} else {
 		dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
+	}
 
 	if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
 		if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index e1f87bd72e4a..0da024912dbe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -1773,7 +1773,6 @@ void dcn20_post_unlock_program_front_end(
 	 */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
 		if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable) {
 			struct hubp *hubp = pipe->plane_res.hubp;
 			int j = 0;
commit fc0b067df7ed973addbba8e136d9a729df86ccdc
Author: Jimmy Kizito <Jimmy.Kizito at amd.com>
Date:   Fri Apr 1 15:24:47 2022 -0400

    drm/amd/display: Query DPIA HPD status.
    
    [Why]
    Driver needs up to date DPIA HPD status.
    
    [How]
    Use HPD query command to get DPIA HPD status.
    
    Reviewed-by: Meenakshikumar Somasundaram <Meenakshikumar.Somasundaram at amd.com>
    Reviewed-by: Jun Lei <Jun.Lei at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Jimmy Kizito <Jimmy.Kizito at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 67ef357e5798..b40abd2bf7f6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -33,6 +33,7 @@
 #include "gpio_service_interface.h"
 #include "core_status.h"
 #include "dc_link_dp.h"
+#include "dc_link_dpia.h"
 #include "dc_link_ddc.h"
 #include "link_hwss.h"
 #include "opp.h"
@@ -240,7 +241,7 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
 
 	/* Link may not have physical HPD pin. */
 	if (link->ep_type != DISPLAY_ENDPOINT_PHY) {
-		if (link->is_hpd_pending || !link->hpd_status)
+		if (link->is_hpd_pending || !dc_link_dpia_query_hpd_status(link))
 			*type = dc_connection_none;
 		else
 			*type = dc_connection_single;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
index a5765f36d86f..1b7a8774b0c9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
@@ -34,6 +34,7 @@
 #include "dm_helpers.h"
 #include "dmub/inc/dmub_cmd.h"
 #include "inc/link_dpcd.h"
+#include "dc_dmub_srv.h"
 
 #define DC_LOGGER \
 	link->ctx->logger
@@ -69,6 +70,24 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link)
 	return status;
 }
 
+bool dc_link_dpia_query_hpd_status(struct dc_link *link)
+{
+	union dmub_rb_cmd cmd = {0};
+	struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv;
+	bool is_hpd_high = false;
+
+	/* prepare QUERY_HPD command */
+	cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE;
+	cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1;
+	cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
+
+	/* Return HPD status reported by DMUB if query successfully executed. */
+	if (dc_dmub_srv_cmd_with_reply_data(dmub_srv, &cmd) && cmd.query_hpd.data.status == AUX_RET_SUCCESS)
+		is_hpd_high = cmd.query_hpd.data.result;
+
+	return is_hpd_high;
+}
+
 /* Configure link as prescribed in link_setting; set LTTPR mode; and
  * Initialize link training settings.
  * Abort link training if sink unplug detected.
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h
index 74dafd0f9d3d..39c1d1d07357 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h
@@ -87,6 +87,11 @@ union dpia_set_config_data {
  */
 enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link);
 
+/* Query hot plug status of USB4 DP tunnel.
+ * Returns true if HPD high.
+ */
+bool dc_link_dpia_query_hpd_status(struct dc_link *link);
+
 /* Train DP tunneling link for USB4 DPIA display endpoint.
  * DPIA equivalent of dc_link_dp_perfrorm_link_training.
  * Aborts link training upon detection of sink unplug.
commit d84c4d194ebad0f5d327da72404c37c7de2c1714
Author: Jimmy Kizito <Jimmy.Kizito at amd.com>
Date:   Thu Apr 14 09:49:37 2022 -0400

    drm/amd/display: Update link training fallback behaviour.
    
    [Why]
    Some displays may need several link training attempts before
    link training succeeds.
    
    [How]
    If training succeeds after falling back to lower link bandwidth,
    retry at original link bandwidth instead of abandoning link training
    whenever link bandwidth is less than stream bandwidth.
    
    Reviewed-by: Jun Lei <Jun.Lei at amd.com>
    Acked-by: Qingqing Zhuo <qingqing.zhuo at amd.com>
    Signed-off-by: Jimmy Kizito <Jimmy.Kizito at amd.com>
    Tested-by: Daniel Wheeler <daniel.wheeler at amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 975d631534b5..d8de8dbf3676 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -2783,31 +2783,37 @@ bool perform_link_training_with_retries(
 	struct dc_link *link = stream->link;
 	enum dp_panel_mode panel_mode = dp_get_panel_mode(link);
 	enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
-	struct dc_link_settings current_setting = *link_setting;
+	struct dc_link_settings cur_link_settings = *link_setting;
 	const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
 	int fail_count = 0;
+	bool is_link_bw_low = false; /* link bandwidth < stream bandwidth */
+	bool is_link_bw_min = /* RBR x 1 */
+		(cur_link_settings.link_rate <= LINK_RATE_LOW) &&
+		(cur_link_settings.lane_count <= LANE_COUNT_ONE);
 
 	dp_trace_commit_lt_init(link);
 
 
-	if (dp_get_link_encoding_format(&current_setting) == DP_8b_10b_ENCODING)
+	if (dp_get_link_encoding_format(&cur_link_settings) == DP_8b_10b_ENCODING)
 		/* We need to do this before the link training to ensure the idle
 		 * pattern in SST mode will be sent right after the link training
 		 */
 		link_hwss->setup_stream_encoder(pipe_ctx);
 
 	dp_trace_set_lt_start_timestamp(link, false);
-	for (j = 0; j < attempts; ++j) {
+	j = 0;
+	while (j < attempts && fail_count < (attempts * 10)) {
 
-		DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d\n",
-			__func__, (unsigned int)j + 1, attempts);
+		DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d @ rate(%d) x lane(%d)\n",
+			__func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate,
+			cur_link_settings.lane_count);
 
 		dp_enable_link_phy(
 			link,
 			&pipe_ctx->link_res,
 			signal,
 			pipe_ctx->clock_source->id,
-			&current_setting);
+			&cur_link_settings);
 
 		if (stream->sink_patches.dppowerup_delay > 0) {
 			int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
@@ -2832,30 +2838,30 @@ bool perform_link_training_with_retries(
 		dp_set_panel_mode(link, panel_mode);
 
 		if (link->aux_access_disabled) {
-			dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &current_setting);
+			dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings);
 			return true;
 		} else {
 			/** @todo Consolidate USB4 DP and DPx.x training. */
 			if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
 				status = dc_link_dpia_perform_link_training(link,
 						&pipe_ctx->link_res,
-						&current_setting,
+						&cur_link_settings,
 						skip_video_pattern);
 
 				/* Transmit idle pattern once training successful. */
-				if (status == LINK_TRAINING_SUCCESS)
+				if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low)
 					dp_set_hw_test_pattern(link, &pipe_ctx->link_res, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0);
 			} else {
 				status = dc_link_dp_perform_link_training(link,
 						&pipe_ctx->link_res,
-						&current_setting,
+						&cur_link_settings,
 						skip_video_pattern);
 			}
 
 			dp_trace_lt_total_count_increment(link, false);
 			dp_trace_lt_result_update(link, status, false);
 			dp_trace_set_lt_end_timestamp(link, false);
-			if (status == LINK_TRAINING_SUCCESS)
+			if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low)
 				return true;
 		}
 
@@ -2866,8 +2872,9 @@ bool perform_link_training_with_retries(
 		if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY)
 			break;
 
-		DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n",
-			__func__, (unsigned int)j + 1, attempts);
+		DC_LOG_WARNING("%s: Link training attempt %u of %d failed @ rate(%d) x lane(%d)\n",
+			__func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate,
+			cur_link_settings.lane_count);
 
 		dp_disable_link_phy(link, &pipe_ctx->link_res, signal);
 
@@ -2876,27 +2883,49 @@ bool perform_link_training_with_retries(
 			enum dc_connection_type type = dc_connection_none;
 
 			dc_link_detect_sink(link, &type);
-			if (type == dc_connection_none)
+			if (type == dc_connection_none) {
+				DC_LOG_HW_LINK_TRAINING("%s: Aborting training because sink unplugged\n", __func__);
 				break;
-		} else if (do_fallback) {
+			}
+		}
+
+		/* Try to train again at original settings if:
+		 * - not falling back between training attempts;
+		 * - aborted previous attempt due to reasons other than sink unplug;
+		 * - successfully trained but at a link rate lower than that required by stream;
+		 * - reached minimum link bandwidth.
+		 */
+		if (!do_fallback || (status == LINK_TRAINING_ABORT) ||
+				(status == LINK_TRAINING_SUCCESS && is_link_bw_low) ||
+				is_link_bw_min) {
+			j++;
+			cur_link_settings = *link_setting;
+			delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
+			is_link_bw_low = false;
+			is_link_bw_min = (cur_link_settings.link_rate <= LINK_RATE_LOW) &&
+				(cur_link_settings.lane_count <= LANE_COUNT_ONE);
+
+		} else if (do_fallback) { /* Try training at lower link bandwidth if doing fallback. */
 			uint32_t req_bw;
 			uint32_t link_bw;
 
-			decide_fallback_link_setting(link, *link_setting, &current_setting, status);
-			/* Fail link training if reduced link bandwidth no longer meets
-			 * stream requirements.
+			decide_fallback_link_setting(link, *link_setting, &cur_link_settings, status);
+			/* Flag if reduced link bandwidth no longer meets stream requirements or fallen back to
+			 * minimum link bandwidth.
 			 */
 			req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing);
-			link_bw = dc_link_bandwidth_kbps(link, &current_setting);
-			if (req_bw > link_bw)
-				break;
+			link_bw = dc_link_bandwidth_kbps(link, &cur_link_settings);
+			is_link_bw_low = (req_bw > link_bw);
+			is_link_bw_min = ((cur_link_settings.link_rate <= LINK_RATE_LOW) &&
+				(cur_link_settings.lane_count <= LANE_COUNT_ONE));
+
+			if (is_link_bw_low)
+				DC_LOG_WARNING("%s: Link bandwidth too low after fallback req_bw(%d) > link_bw(%d)\n",
+					__func__, req_bw, link_bw);
 		}
 
 		msleep(delay_between_attempts);
-
-		delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
 	}
-
 	return false;
 }
 
commit 1a5409facf9ceba401f83925c6d15bae2121d29c
Merge: 947a844bb3eb 64b22a0da12a
Author: Rob Clark <robdclark at chromium.org>
Date:   Thu May 19 11:19:30 2022 -0700

    Merge tag 'msm-next-5.19-fixes' of https://gitlab.freedesktop.org/abhinavk/msm into msm-fixes-staging
    
    5.19 fixes for msm-next
    
    - Limiting WB modes to max sspp linewidth
    - Fixing the supported rotations to add 180 back for IGT
    - Fix to handle pm_runtime_get_sync() errors to avoid unclocked access
      in the bind() path for dpu driver
    - Fix the irq_free() without request issue which was a big-time
      hitter in the CI-runs.
    
    Signed-off-by: Abhinav Kumar <quic_abhinavk at quicinc.com>
    Signed-off-by: Rob Clark <robdclark at chromium.org>



More information about the openchrome-devel mailing list