<div dir="ltr"><div>Reviewed-by: Marek Olšák <<a href="mailto:marek.olsak@amd.com">marek.olsak@amd.com</a>></div><div><br></div><div>Marek<br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Fri, Jul 12, 2019 at 9:47 AM Haehnle, Nicolai <<a href="mailto:Nicolai.Haehnle@amd.com">Nicolai.Haehnle@amd.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Prefetch mode 0 is not supported and can lead to hangs with certain very<br>
specific code patterns. Set a sound prefetch mode for all VMIDs rather<br>
than forcing all shaders to set the prefetch mode at the beginning.<br>
<br>
Reduce code duplication a bit while we're at it. Note that the 64-bit<br>
address mode enum and the retry all enum are both 0, so the only<br>
functional change is in the INITIAL_INST_PREFETCH field.<br>
<br>
Signed-off-by: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com" target="_blank">nicolai.haehnle@amd.com</a>><br>
--<br>
I haven't been able to properly test this yet, but it is the right thing<br>
to be doing in principle.<br>
---<br>
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 ++++++++++----------------<br>
 1 file changed, 10 insertions(+), 17 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
index 0d94c812df1b..b8498c359191 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
@@ -157,20 +157,27 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =<br>
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),<br>
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),<br>
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000),<br>
 };<br>
<br>
 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =<br>
 {<br>
        /* Pending on emulation bring up */<br>
 };<br>
<br>
+#define DEFAULT_SH_MEM_CONFIG \<br>
+       ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \<br>
+        (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \<br>
+        (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \<br>
+        (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))<br>
+<br>
+<br>
 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);<br>
 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);<br>
 static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);<br>
 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);<br>
 static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,<br>
                                  struct amdgpu_cu_info *cu_info);<br>
 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);<br>
 static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,<br>
                                   u32 sh_num, u32 instance);<br>
 static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);<br>
@@ -1476,40 +1483,35 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade<br>
        return pa_sc_tile_steering_override;<br>
 }<br>
<br>
 #define DEFAULT_SH_MEM_BASES   (0x6000)<br>
 #define FIRST_COMPUTE_VMID     (8)<br>
 #define LAST_COMPUTE_VMID      (16)<br>
<br>
 static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)<br>
 {<br>
        int i;<br>
-       uint32_t sh_mem_config;<br>
        uint32_t sh_mem_bases;<br>
<br>
        /*<br>
         * Configure apertures:<br>
         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)<br>
         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)<br>
         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)<br>
         */<br>
        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);<br>
<br>
-       sh_mem_config = SH_MEM_ADDRESS_MODE_64 |<br>
-                       SH_MEM_ALIGNMENT_MODE_UNALIGNED <<<br>
-                       SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;<br>
-<br>
        mutex_lock(&adev->srbm_mutex);<br>
        for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {<br>
                nv_grbm_select(adev, 0, 0, 0, i);<br>
                /* CP and shaders */<br>
-               WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);<br>
+               WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);<br>
                WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);<br>
        }<br>
        nv_grbm_select(adev, 0, 0, 0, 0);<br>
        mutex_unlock(&adev->srbm_mutex);<br>
 }<br>
<br>
 static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)<br>
 {<br>
        int i, j, k;<br>
        int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1;<br>
@@ -1590,31 +1592,22 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)<br>
        gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);<br>
        adev->gfx.config.pa_sc_tile_steering_override =<br>
                gfx_v10_0_init_pa_sc_tile_steering_override(adev);<br>
<br>
        /* XXX SH_MEM regs */<br>
        /* where to put LDS, scratch, GPUVM in FSA64 space */<br>
        mutex_lock(&adev->srbm_mutex);<br>
        for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {<br>
                nv_grbm_select(adev, 0, 0, 0, i);<br>
                /* CP and shaders */<br>
-               if (i == 0) {<br>
-                       tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,<br>
-                                           SH_MEM_ALIGNMENT_MODE_UNALIGNED);<br>
-                       tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_MODE, 0);<br>
-                       WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);<br>
-                       WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);<br>
-               } else {<br>
-                       tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,<br>
-                                           SH_MEM_ALIGNMENT_MODE_UNALIGNED);<br>
-                       tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_MODE, 0);<br>
-                       WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);<br>
+               WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);<br>
+               if (i != 0) {<br>
                        tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,<br>
                                (adev->gmc.private_aperture_start >> 48));<br>
                        tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,<br>
                                (adev->gmc.shared_aperture_start >> 48));<br>
                        WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);<br>
                }<br>
        }<br>
        nv_grbm_select(adev, 0, 0, 0, 0);<br>
<br>
        mutex_unlock(&adev->srbm_mutex);<br>
-- <br>
2.20.1<br>
<br>
_______________________________________________<br>
amd-gfx mailing list<br>
<a href="mailto:amd-gfx@lists.freedesktop.org" target="_blank">amd-gfx@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a></blockquote></div>