[Nouveau] Nouveau dmem NULL Pointer deref (SVM)

Tobias Klausmann tobias.johannes.klausmann at mni.thm.de
Thu Mar 21 15:59:14 UTC 2019


Hi,

just for your information and maybe for some help: with 5.1rc1 and SVM 
enabled i see the following backtrace [1] when the nouveau card (reverse 
prime) goes to sleep, for now i have papered over with [2] which leaves 
me with userspace hangs. Any pointers where to look for the actual culprit?

PS: Card is: nouveau 0000:01:00.0: NVIDIA GP106 (136000a1)

Greetings,

Tobias


[1]:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000028
#PF error: [normal kernel read fault]
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP PTI
CPU: 3 PID: 435 Comm: kworker/3:4 Not tainted 5.1.0-rc1-desktop-debug+ #80
Hardware name: Acer Aspire VN7-593G/Pluto_KLS, BIOS V1.11 08/01/2018
Workqueue: pm pm_runtime_work
RIP: 0010:nouveau_bo_unpin (linux/./include/linux/compiler.h:193 
linux/./arch/x86/include/asm/atomic.h:31 
linux/./include/asm-generic/atomic-instrumented.h:27 
linux/./include/linux/refcount.h:43 linux/./include/linux/kref.h:38 
linux/./include/drm/ttm/ttm_bo_driver.h:721 
linux/drivers/gpu/drm/nouveau/nouveau_bo.c:454) nouveau
Code: 89 d9 48 c7 c6 50 04 e5 c0 c4 42 79 f7 c0 bd f0 ff ff ff e8 42 d5 
7a c6 ff 83 00 04 00 00 e9 17 ff ff ff 41 54 55 53 48 89 fb <8b> 47 28 
85 c0 0f 84 cf 00 00 00 48 8b bb c0 01 00 00 31 f6 4c 8b
All code
========
    0:    89 d9                    mov    %ebx,%ecx
    2:    48 c7 c6 50 04 e5 c0     mov    $0xffffffffc0e50450,%rsi
    9:    c4 42 79 f7 c0           shlx   %eax,%r8d,%r8d
    e:    bd f0 ff ff ff           mov    $0xfffffff0,%ebp
   13:    e8 42 d5 7a c6           callq  0xffffffffc67ad55a
   18:    ff 83 00 04 00 00        incl   0x400(%rbx)
   1e:    e9 17 ff ff ff           jmpq   0xffffffffffffff3a
   23:    41 54                    push   %r12
   25:    55                       push   %rbp
   26:    53                       push   %rbx
   27:    48 89 fb                 mov    %rdi,%rbx
   2a:*    8b 47 28                 mov    0x28(%rdi),%eax <-- trapping 
instruction
   2d:    85 c0                    test   %eax,%eax
   2f:    0f 84 cf 00 00 00        je     0x104
   35:    48 8b bb c0 01 00 00     mov    0x1c0(%rbx),%rdi
   3c:    31 f6                    xor    %esi,%esi
   3e:    4c                       rex.WR
   3f:    8b                       .byte 0x8b

Code starting with the faulting instruction
===========================================
    0:    8b 47 28                 mov    0x28(%rdi),%eax
    3:    85 c0                    test   %eax,%eax
    5:    0f 84 cf 00 00 00        je     0xda
    b:    48 8b bb c0 01 00 00     mov    0x1c0(%rbx),%rdi
   12:    31 f6                    xor    %esi,%esi
   14:    4c                       rex.WR
   15:    8b                       .byte 0x8b
RSP: 0018:ffffbf0b41237d20 EFLAGS: 00010216
RAX: ffff9dfe0ba2ec00 RBX: 0000000000000000 RCX: ffffffffc0ceb630
RDX: ffff9dfe0ba2ec38 RSI: 000000007fffffff RDI: 0000000000000000
RBP: ffff9dfe0a07e000 R08: 0000000000000000 R09: ffffffffc0d4a9a0
R10: 8080808080808080 R11: 0000000000001800 R12: 0000000000000001
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000008
FS:  0000000000000000(0000) GS:ffff9dfe3ecc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000028 CR3: 00000001a500e002 CR4: 00000000003606e0
Call Trace:
nouveau_dmem_suspend (linux/drivers/gpu/drm/nouveau/nouveau_dmem.c:482 
(discriminator 9)) nouveau
nouveau_do_suspend (linux/drivers/gpu/drm/nouveau/nouveau_drm.c:748) nouveau
nouveau_pmops_runtime_suspend 
(linux/drivers/gpu/drm/nouveau/nouveau_drm.c:915) nouveau
pci_pm_runtime_suspend (linux/drivers/pci/pci-driver.c:1262)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? pci_has_legacy_pm_support (linux/drivers/pci/pci-driver.c:1238)
__rpm_callback (linux/drivers/base/power/runtime.c:357)
? pci_has_legacy_pm_support (linux/drivers/pci/pci-driver.c:1238)
rpm_callback (linux/drivers/base/power/runtime.c:490)
? pci_has_legacy_pm_support (linux/drivers/pci/pci-driver.c:1238)
rpm_suspend (linux/drivers/base/power/runtime.c:629)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
pm_runtime_work (linux/drivers/base/power/runtime.c:922)
process_one_work (linux/./arch/x86/include/asm/preempt.h:26 
linux/kernel/workqueue.c:2278)
worker_thread (linux/./include/linux/compiler.h:193 
linux/./include/linux/list.h:237 linux/kernel/workqueue.c:2416)
? process_one_work (linux/kernel/workqueue.c:2358)
kthread (linux/kernel/kthread.c:253)
? kthread_create_worker_on_cpu (linux/kernel/kthread.c:213)
ret_from_fork (linux/arch/x86/entry/entry_64.S:358)
Modules linked in: rfcomm af_packet snd_hda_codec_hdmi bnep uvcvideo 
videobuf2_vmalloc rtsx_usb_sdmmc videobuf2_memops btusb rtsx_usb_ms 
videobuf2_v4l2 btrtl mmc_core memstick btbcm videodev btintel 
videobuf2_common rtsx_usb bluetooth usbhid ecdh_generic 
snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio nouveau arc4 
i915 nls_iso8859_1 nls_cp437 vfat fat intel_rapl x86_pkg_temp_thermal 
intel_powerclamp coretemp kvm_intel ath10k_pci kvm ath10k_core msr 
snd_hda_intel mxm_wmi ath snd_hda_codec irqbypass joydev ttm 
snd_hda_core mac80211 snd_hwdep crct10dif_pclmul drm_kms_helper 
crc32_pclmul snd_pcm crc32c_intel hid_multitouch drm snd_timer 
ghash_clmulni_intel hid_generic mei_hdcp iTCO_wdt aesni_intel snd 
iTCO_vendor_support cfg80211 aes_x86_64 crypto_simd fb_sys_fops cryptd 
acerfan r8169 syscopyarea glue_helper sysfillrect idma64 sysimgblt 
realtek acer_wmi i2c_algo_bit soundcore mei_me libphy 
intel_wmi_thunderbolt sparse_keymap pcspkr intel_pch_thermal
wmi_bmof rfkill intel_lpss_pci mei i2c_i801 intel_lpss thermal battery 
ac tpm_crb tpm_tis tpm_tis_core pinctrl_sunrisepoint pinctrl_intel tpm 
pcc_cpufreq acpi_pad button xhci_pci serio_raw xhci_hcd usbcore i2c_hid 
wmi video sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua 
efivarfs autofs4
CR2: 0000000000000028
---[ end trace abc30d2b924ee9b1 ]---
RIP: 0010:nouveau_bo_unpin (linux/./include/linux/compiler.h:193 
linux/./arch/x86/include/asm/atomic.h:31 
linux/./include/asm-generic/atomic-instrumented.h:27 
linux/./include/linux/refcount.h:43 linux/./include/linux/kref.h:38 
linux/./include/drm/ttm/ttm_bo_driver.h:721 
linux/drivers/gpu/drm/nouveau/nouveau_bo.c:454) nouveau
Code: 89 d9 48 c7 c6 50 04 e5 c0 c4 42 79 f7 c0 bd f0 ff ff ff e8 42 d5 
7a c6 ff 83 00 04 00 00 e9 17 ff ff ff 41 54 55 53 48 89 fb <8b> 47 28 
85 c0 0f 84 cf 00 00 00 48 8b bb c0 01 00 00 31 f6 4c 8b
All code
========
    0:    89 d9                    mov    %ebx,%ecx
    2:    48 c7 c6 50 04 e5 c0     mov    $0xffffffffc0e50450,%rsi
    9:    c4 42 79 f7 c0           shlx   %eax,%r8d,%r8d
    e:    bd f0 ff ff ff           mov    $0xfffffff0,%ebp
   13:    e8 42 d5 7a c6           callq  0xffffffffc67ad55a
   18:    ff 83 00 04 00 00        incl   0x400(%rbx)
   1e:    e9 17 ff ff ff           jmpq   0xffffffffffffff3a
   23:    41 54                    push   %r12
   25:    55                       push   %rbp
   26:    53                       push   %rbx
   27:    48 89 fb                 mov    %rdi,%rbx
   2a:*    8b 47 28                 mov    0x28(%rdi),%eax <-- trapping 
instruction
   2d:    85 c0                    test   %eax,%eax
   2f:    0f 84 cf 00 00 00        je     0x104
   35:    48 8b bb c0 01 00 00     mov    0x1c0(%rbx),%rdi
   3c:    31 f6                    xor    %esi,%esi
   3e:    4c                       rex.WR
   3f:    8b                       .byte 0x8b

Code starting with the faulting instruction
===========================================
    0:    8b 47 28                 mov    0x28(%rdi),%eax
    3:    85 c0                    test   %eax,%eax
    5:    0f 84 cf 00 00 00        je     0xda
    b:    48 8b bb c0 01 00 00     mov    0x1c0(%rbx),%rdi
   12:    31 f6                    xor    %esi,%esi
   14:    4c                       rex.WR
   15:    8b                       .byte 0x8b
RSP: 0018:ffffbf0b41237d20 EFLAGS: 00010216
RAX: ffff9dfe0ba2ec00 RBX: 0000000000000000 RCX: ffffffffc0ceb630
RDX: ffff9dfe0ba2ec38 RSI: 000000007fffffff RDI: 0000000000000000
RBP: ffff9dfe0a07e000 R08: 0000000000000000 R09: ffffffffc0d4a9a0
R10: 8080808080808080 R11: 0000000000001800 R12: 0000000000000001
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000008
FS:  0000000000000000(0000) GS:ffff9dfe3ecc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000028 CR3: 00000001a500e002 CR4: 00000000003606e0


[2]:

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 34a998012bf6..aee10a120896 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -377,11 +377,21 @@ nouveau_bo_placement_set(struct nouveau_bo *nvbo, 
uint32_t type, uint32_t busy)
  int
  nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig)
  {
+    WARN_ON(!virt_addr_valid(nvbo));
+
+    if (!virt_addr_valid(nvbo))
+        return 0;
+
      struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
      struct ttm_buffer_object *bo = &nvbo->bo;
      bool force = false, evict = false;
      int ret;

+    WARN_ON(!virt_addr_valid(bo));
+
+    if (!virt_addr_valid(bo))
+        return 0;
+
      ret = ttm_bo_reserve(bo, false, false, NULL);
      if (ret)
          return ret;
@@ -447,10 +457,20 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t 
memtype, bool contig)
  int
  nouveau_bo_unpin(struct nouveau_bo *nvbo)
  {
+    WARN_ON(!virt_addr_valid(nvbo));
+
+    if (!virt_addr_valid(nvbo))
+        return 0;
+
      struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
      struct ttm_buffer_object *bo = &nvbo->bo;
      int ret, ref;

+    WARN_ON(!virt_addr_valid(bo));
+
+    if (!virt_addr_valid(bo))
+        return 0;
+
      ret = ttm_bo_reserve(bo, false, false, NULL);
      if (ret)
          return ret;



More information about the Nouveau mailing list