[Nouveau] Nouveau dmem NULL Pointer deref (SVM)
Tobias Klausmann
tobias.johannes.klausmann at mni.thm.de
Thu Mar 21 15:59:14 UTC 2019
Hi,
just for your information and maybe for some help: with 5.1rc1 and SVM
enabled i see the following backtrace [1] when the nouveau card (reverse
prime) goes to sleep, for now i have papered over with [2] which leaves
me with userspace hangs. Any pointers where to look for the actual culprit?
PS: Card is: nouveau 0000:01:00.0: NVIDIA GP106 (136000a1)
Greetings,
Tobias
[1]:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000028
#PF error: [normal kernel read fault]
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP PTI
CPU: 3 PID: 435 Comm: kworker/3:4 Not tainted 5.1.0-rc1-desktop-debug+ #80
Hardware name: Acer Aspire VN7-593G/Pluto_KLS, BIOS V1.11 08/01/2018
Workqueue: pm pm_runtime_work
RIP: 0010:nouveau_bo_unpin (linux/./include/linux/compiler.h:193
linux/./arch/x86/include/asm/atomic.h:31
linux/./include/asm-generic/atomic-instrumented.h:27
linux/./include/linux/refcount.h:43 linux/./include/linux/kref.h:38
linux/./include/drm/ttm/ttm_bo_driver.h:721
linux/drivers/gpu/drm/nouveau/nouveau_bo.c:454) nouveau
Code: 89 d9 48 c7 c6 50 04 e5 c0 c4 42 79 f7 c0 bd f0 ff ff ff e8 42 d5
7a c6 ff 83 00 04 00 00 e9 17 ff ff ff 41 54 55 53 48 89 fb <8b> 47 28
85 c0 0f 84 cf 00 00 00 48 8b bb c0 01 00 00 31 f6 4c 8b
All code
========
0: 89 d9 mov %ebx,%ecx
2: 48 c7 c6 50 04 e5 c0 mov $0xffffffffc0e50450,%rsi
9: c4 42 79 f7 c0 shlx %eax,%r8d,%r8d
e: bd f0 ff ff ff mov $0xfffffff0,%ebp
13: e8 42 d5 7a c6 callq 0xffffffffc67ad55a
18: ff 83 00 04 00 00 incl 0x400(%rbx)
1e: e9 17 ff ff ff jmpq 0xffffffffffffff3a
23: 41 54 push %r12
25: 55 push %rbp
26: 53 push %rbx
27: 48 89 fb mov %rdi,%rbx
2a:* 8b 47 28 mov 0x28(%rdi),%eax <-- trapping
instruction
2d: 85 c0 test %eax,%eax
2f: 0f 84 cf 00 00 00 je 0x104
35: 48 8b bb c0 01 00 00 mov 0x1c0(%rbx),%rdi
3c: 31 f6 xor %esi,%esi
3e: 4c rex.WR
3f: 8b .byte 0x8b
Code starting with the faulting instruction
===========================================
0: 8b 47 28 mov 0x28(%rdi),%eax
3: 85 c0 test %eax,%eax
5: 0f 84 cf 00 00 00 je 0xda
b: 48 8b bb c0 01 00 00 mov 0x1c0(%rbx),%rdi
12: 31 f6 xor %esi,%esi
14: 4c rex.WR
15: 8b .byte 0x8b
RSP: 0018:ffffbf0b41237d20 EFLAGS: 00010216
RAX: ffff9dfe0ba2ec00 RBX: 0000000000000000 RCX: ffffffffc0ceb630
RDX: ffff9dfe0ba2ec38 RSI: 000000007fffffff RDI: 0000000000000000
RBP: ffff9dfe0a07e000 R08: 0000000000000000 R09: ffffffffc0d4a9a0
R10: 8080808080808080 R11: 0000000000001800 R12: 0000000000000001
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000008
FS: 0000000000000000(0000) GS:ffff9dfe3ecc0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000028 CR3: 00000001a500e002 CR4: 00000000003606e0
Call Trace:
nouveau_dmem_suspend (linux/drivers/gpu/drm/nouveau/nouveau_dmem.c:482
(discriminator 9)) nouveau
nouveau_do_suspend (linux/drivers/gpu/drm/nouveau/nouveau_drm.c:748) nouveau
nouveau_pmops_runtime_suspend
(linux/drivers/gpu/drm/nouveau/nouveau_drm.c:915) nouveau
pci_pm_runtime_suspend (linux/drivers/pci/pci-driver.c:1262)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? pci_has_legacy_pm_support (linux/drivers/pci/pci-driver.c:1238)
__rpm_callback (linux/drivers/base/power/runtime.c:357)
? pci_has_legacy_pm_support (linux/drivers/pci/pci-driver.c:1238)
rpm_callback (linux/drivers/base/power/runtime.c:490)
? pci_has_legacy_pm_support (linux/drivers/pci/pci-driver.c:1238)
rpm_suspend (linux/drivers/base/power/runtime.c:629)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
pm_runtime_work (linux/drivers/base/power/runtime.c:922)
process_one_work (linux/./arch/x86/include/asm/preempt.h:26
linux/kernel/workqueue.c:2278)
worker_thread (linux/./include/linux/compiler.h:193
linux/./include/linux/list.h:237 linux/kernel/workqueue.c:2416)
? process_one_work (linux/kernel/workqueue.c:2358)
kthread (linux/kernel/kthread.c:253)
? kthread_create_worker_on_cpu (linux/kernel/kthread.c:213)
ret_from_fork (linux/arch/x86/entry/entry_64.S:358)
Modules linked in: rfcomm af_packet snd_hda_codec_hdmi bnep uvcvideo
videobuf2_vmalloc rtsx_usb_sdmmc videobuf2_memops btusb rtsx_usb_ms
videobuf2_v4l2 btrtl mmc_core memstick btbcm videodev btintel
videobuf2_common rtsx_usb bluetooth usbhid ecdh_generic
snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio nouveau arc4
i915 nls_iso8859_1 nls_cp437 vfat fat intel_rapl x86_pkg_temp_thermal
intel_powerclamp coretemp kvm_intel ath10k_pci kvm ath10k_core msr
snd_hda_intel mxm_wmi ath snd_hda_codec irqbypass joydev ttm
snd_hda_core mac80211 snd_hwdep crct10dif_pclmul drm_kms_helper
crc32_pclmul snd_pcm crc32c_intel hid_multitouch drm snd_timer
ghash_clmulni_intel hid_generic mei_hdcp iTCO_wdt aesni_intel snd
iTCO_vendor_support cfg80211 aes_x86_64 crypto_simd fb_sys_fops cryptd
acerfan r8169 syscopyarea glue_helper sysfillrect idma64 sysimgblt
realtek acer_wmi i2c_algo_bit soundcore mei_me libphy
intel_wmi_thunderbolt sparse_keymap pcspkr intel_pch_thermal
wmi_bmof rfkill intel_lpss_pci mei i2c_i801 intel_lpss thermal battery
ac tpm_crb tpm_tis tpm_tis_core pinctrl_sunrisepoint pinctrl_intel tpm
pcc_cpufreq acpi_pad button xhci_pci serio_raw xhci_hcd usbcore i2c_hid
wmi video sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua
efivarfs autofs4
CR2: 0000000000000028
---[ end trace abc30d2b924ee9b1 ]---
RIP: 0010:nouveau_bo_unpin (linux/./include/linux/compiler.h:193
linux/./arch/x86/include/asm/atomic.h:31
linux/./include/asm-generic/atomic-instrumented.h:27
linux/./include/linux/refcount.h:43 linux/./include/linux/kref.h:38
linux/./include/drm/ttm/ttm_bo_driver.h:721
linux/drivers/gpu/drm/nouveau/nouveau_bo.c:454) nouveau
Code: 89 d9 48 c7 c6 50 04 e5 c0 c4 42 79 f7 c0 bd f0 ff ff ff e8 42 d5
7a c6 ff 83 00 04 00 00 e9 17 ff ff ff 41 54 55 53 48 89 fb <8b> 47 28
85 c0 0f 84 cf 00 00 00 48 8b bb c0 01 00 00 31 f6 4c 8b
All code
========
0: 89 d9 mov %ebx,%ecx
2: 48 c7 c6 50 04 e5 c0 mov $0xffffffffc0e50450,%rsi
9: c4 42 79 f7 c0 shlx %eax,%r8d,%r8d
e: bd f0 ff ff ff mov $0xfffffff0,%ebp
13: e8 42 d5 7a c6 callq 0xffffffffc67ad55a
18: ff 83 00 04 00 00 incl 0x400(%rbx)
1e: e9 17 ff ff ff jmpq 0xffffffffffffff3a
23: 41 54 push %r12
25: 55 push %rbp
26: 53 push %rbx
27: 48 89 fb mov %rdi,%rbx
2a:* 8b 47 28 mov 0x28(%rdi),%eax <-- trapping
instruction
2d: 85 c0 test %eax,%eax
2f: 0f 84 cf 00 00 00 je 0x104
35: 48 8b bb c0 01 00 00 mov 0x1c0(%rbx),%rdi
3c: 31 f6 xor %esi,%esi
3e: 4c rex.WR
3f: 8b .byte 0x8b
Code starting with the faulting instruction
===========================================
0: 8b 47 28 mov 0x28(%rdi),%eax
3: 85 c0 test %eax,%eax
5: 0f 84 cf 00 00 00 je 0xda
b: 48 8b bb c0 01 00 00 mov 0x1c0(%rbx),%rdi
12: 31 f6 xor %esi,%esi
14: 4c rex.WR
15: 8b .byte 0x8b
RSP: 0018:ffffbf0b41237d20 EFLAGS: 00010216
RAX: ffff9dfe0ba2ec00 RBX: 0000000000000000 RCX: ffffffffc0ceb630
RDX: ffff9dfe0ba2ec38 RSI: 000000007fffffff RDI: 0000000000000000
RBP: ffff9dfe0a07e000 R08: 0000000000000000 R09: ffffffffc0d4a9a0
R10: 8080808080808080 R11: 0000000000001800 R12: 0000000000000001
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000008
FS: 0000000000000000(0000) GS:ffff9dfe3ecc0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000028 CR3: 00000001a500e002 CR4: 00000000003606e0
[2]:
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 34a998012bf6..aee10a120896 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -377,11 +377,21 @@ nouveau_bo_placement_set(struct nouveau_bo *nvbo,
uint32_t type, uint32_t busy)
int
nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig)
{
+ WARN_ON(!virt_addr_valid(nvbo));
+
+ if (!virt_addr_valid(nvbo))
+ return 0;
+
struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
struct ttm_buffer_object *bo = &nvbo->bo;
bool force = false, evict = false;
int ret;
+ WARN_ON(!virt_addr_valid(bo));
+
+ if (!virt_addr_valid(bo))
+ return 0;
+
ret = ttm_bo_reserve(bo, false, false, NULL);
if (ret)
return ret;
@@ -447,10 +457,20 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t
memtype, bool contig)
int
nouveau_bo_unpin(struct nouveau_bo *nvbo)
{
+ WARN_ON(!virt_addr_valid(nvbo));
+
+ if (!virt_addr_valid(nvbo))
+ return 0;
+
struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
struct ttm_buffer_object *bo = &nvbo->bo;
int ret, ref;
+ WARN_ON(!virt_addr_valid(bo));
+
+ if (!virt_addr_valid(bo))
+ return 0;
+
ret = ttm_bo_reserve(bo, false, false, NULL);
if (ret)
return ret;
More information about the Nouveau
mailing list