This seems to happen in many different drivers but it happened more often in newer versions such as 530 vs 525.
Then nvidia-modeset goes to 100%
There are many reports of this appearing since driver 4.70 and I can confirm I've seen this in various machines.
https://forums.developer.nvidia.com/t/not-coming-back-from-suspend/176446
https://forums.developer.nvidia.com/t/systemds-suspend-then-hibernate-not-working-in-nvidia-optimus-laptop/213690
https://forums.developer.nvidia.com/t/black-screen-when-resuming-systemctl-suspend-using-nvidia-driver-470-57-02-with-kernel-5-8-0-63-generic-on-gtx-970-xubuntu-20-04-lts/184644
This report proposes a fix that work for some people:
https://forums.developer.nvidia.com/t/fixed-suspend-resume-issues-with-the-driver-version-470/187150
Basically the idea is to disable all nvidia related systemd services:
systemctl stop nvidia-suspend
systemctl stop nvidia-hibernate
systemctl stop nvidia-resume
systemctl disable nvidia-suspend
systemctl disable nvidia-hibernate
systemctl disable nvidia-resume
#let's just copy it and save it to ~ just in case
mv /lib/systemd/system-sleep/nvidia ~
Here is the dmesg output when this happens, the system wakes up but the GPU hangs:
[245728.273127] WARNING: CPU: 12 PID: 25855 at /var/lib/dkms/nvidia/525.105.17/build/nvidia/nv.c:3913 nv_restore_user_channels+0x12f/0x1e0 [nvidia]
[245728.273128] Modules linked in: bluetooth ecdh_generic ecc uas usb_storage ufs qnx4 hfsplus hfs minix ntfs msdos jfs xfs cpuid snd_seq_dummy xt_conntrack nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo xt_addrtype br_netfilter aufs xt_CHECKSUM iptable_mangle xt_MASQUERADE iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_tcpudp bridge stp llc iptable_filter bpfilter overlay nls_iso8859_1 binfmt_misc nvidia_uvm(OE) nvidia_drm(POE) nvidia_modeset(POE) nvidia(POE) intel_rapl_msr intel_rapl_common input_leds sb_edac x86_pkg_temp_thermal intel_powerclamp kvm_intel snd_hda_codec_hdmi kvm snd_hda_codec_realtek snd_hda_codec_generic crct10dif_pclmul crc32_pclmul ledtrig_audio ghash_clmulni_intel aesni_intel snd_hda_intel snd_intel_dspcfg crypto_simd drm_kms_helper snd_hda_codec cryptd snd_hda_core drm glue_helper snd_hwdep fb_sys_fops snd_pcm syscopyarea snd_seq_midi snd_seq_midi_event sysfillrect snd_rawmidi rapl sysimgblt snd_seq intel_cstate mei_me dell_smbios snd_seq_device
[245728.273154] dcdbas snd_timer dell_smm_hwmon snd wmi_bmof intel_wmi_thunderbolt dell_wmi_descriptor mei soundcore lpc_ich mac_hid mxm_wmi sch_fq_codel hwmon_vid coretemp parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid0 multipath linear dm_mirror dm_region_hash dm_log raid1 igb e1000e dca i2c_algo_bit ahci libahci wmi
[245728.273172] CPU: 12 PID: 25855 Comm: nvidia-sleep.sh Tainted: P OE 5.4.0-149-generic #166~18.04.1-Ubuntu
[245728.273172] Hardware name: rtt
[245728.273293] RIP: 0010:nv_restore_user_channels+0x12f/0x1e0 [nvidia]
[245728.273294] Code: 44 89 f8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 4c 89 ef 45 31 ff e8 42 8d 89 d5 44 89 f8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 45 31 f6 <0f> 0b 4c 89 e7 e8 d7 8c 89 d5 be 01 00 00 00 48 89 df e8 0a a3 00
[245728.273295] RSP: 0018:ffffac3d849d7dc8 EFLAGS: 00010206
[245728.273296] RAX: 0000000000000003 RBX: ffff91f0c026d000 RCX: ffffac3d849d7d48
[245728.273296] RDX: ffffac3d80b93e50 RSI: 0000000000000246 RDI: ffffac3d849d7cf8
[245728.273297] RBP: ffffac3d849d7df0 R08: 0000000000000000 R09: 0000000000000001
[245728.273297] R10: ffffac3d849d7aa0 R11: 00000000000084ca R12: ffff91f0c026d678
[245728.273298] R13: ffff91f0c026d550 R14: ffff91eefdd13000 R15: 0000000000000003
[245728.273299] FS: 00007f6323bf9740(0000) GS:ffff91f0cf900000(0000) knlGS:0000000000000000
[245728.273299] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[245728.273300] CR2: 0000557b4901f008 CR3: 000000041d754004 CR4: 00000000003606e0
[245728.273300] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[245728.273301] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[245728.273301] Call Trace:
[245728.273378] nv_set_system_power_state+0x2d7/0x480 [nvidia]
[245728.273455] nv_procfs_write_suspend+0xe3/0x160 [nvidia]
[245728.273458] proc_reg_write+0x3e/0x60
[245728.273460] __vfs_write+0x1b/0x40
[245728.273461] vfs_write+0xb1/0x1a0
[245728.273462] ksys_write+0xa7/0xe0
[245728.273463] __x64_sys_write+0x1a/0x20
[245728.273465] do_syscall_64+0x57/0x190
[245728.273468] entry_SYSCALL_64_after_hwframe+0x5c/0xc1
[245728.273469] RIP: 0033:0x7f63232f7104
[245728.273470] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 e1 08 2e 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 41 54 55 49 89 d4 53 48 89 f5
[245728.273471] RSP: 002b:00007ffc609a56c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[245728.273472] RAX: ffffffffffffffda RBX: 0000000000000007 RCX: 00007f63232f7104
[245728.273472] RDX: 0000000000000007 RSI: 0000557b4901f700 RDI: 0000000000000001
[245728.273473] RBP: 0000557b4901f700 R08: 000000000000000a R09: 0000000000000006
[245728.273473] R10: 000000000000000a R11: 0000000000000246 R12: 00007f63235d3760
[245728.273474] R13: 0000000000000007 R14: 00007f63235cf2a0 R15: 00007f63235ce760
[245728.273475] ---[ end trace 2ff2aa81ae8de839 ]---
[245728.273484] ------------[ cut here ]------------
[245728.273559] WARNING: CPU: 12 PID: 25855 at /var/lib/dkms/nvidia/525.105.17/build/nvidia/nv.c:4143 nv_set_system_power_state+0x31f/0x480 [nvidia]
[245728.273560] Modules linked in: bluetooth ecdh_generic ecc uas usb_storage ufs qnx4 hfsplus hfs minix ntfs msdos jfs xfs cpuid snd_seq_dummy xt_conntrack nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo xt_addrtype br_netfilter aufs xt_CHECKSUM iptable_mangle xt_MASQUERADE iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_tcpudp bridge stp llc iptable_filter bpfilter overlay nls_iso8859_1 binfmt_misc nvidia_uvm(OE) nvidia_drm(POE) nvidia_modeset(POE) nvidia(POE) intel_rapl_msr intel_rapl_common input_leds sb_edac x86_pkg_temp_thermal intel_powerclamp kvm_intel snd_hda_codec_hdmi kvm snd_hda_codec_realtek snd_hda_codec_generic crct10dif_pclmul crc32_pclmul ledtrig_audio ghash_clmulni_intel aesni_intel snd_hda_intel snd_intel_dspcfg crypto_simd drm_kms_helper snd_hda_codec cryptd snd_hda_core drm glue_helper snd_hwdep fb_sys_fops snd_pcm syscopyarea snd_seq_midi snd_seq_midi_event sysfillrect snd_rawmidi rapl sysimgblt snd_seq intel_cstate mei_me dell_smbios snd_seq_device
[245728.273574] dcdbas snd_timer dell_smm_hwmon snd wmi_bmof intel_wmi_thunderbolt dell_wmi_descriptor mei soundcore lpc_ich mac_hid mxm_wmi sch_fq_codel hwmon_vid coretemp parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid0 multipath linear dm_mirror dm_region_hash dm_log raid1 igb e1000e dca i2c_algo_bit ahci libahci wmi
[245728.273583] CPU: 12 PID: 25855 Comm: nvidia-sleep.sh Tainted: P W OE 5.4.0-149-generic #166~18.04.1-Ubuntu
[245728.273583] Hardware name: rtt
[245728.273664] RIP: 0010:nv_set_system_power_state+0x31f/0x480 [nvidia]
[245728.273665] Code: ff e8 f5 e8 00 00 48 c7 c7 20 73 df c3 e8 e9 5e 89 d5 89 1d e7 9c 38 03 e9 4a fd ff ff 4c 89 f7 e8 96 59 89 d5 e9 49 fe ff ff <0f> 0b eb b8 45 31 ff 48 c7 c7 d0 72 df c3 45 31 e4 e8 7b 59 89 d5
[245728.273666] RSP: 0018:ffffac3d849d7e00 EFLAGS: 00010206
[245728.273667] RAX: 0000000000000003 RBX: 0000000000000002 RCX: 000000000001d4c0
[245728.273667] RDX: 000000000001d4bf RSI: 9380d93cdab66952 RDI: 00003a4cb021fac0
[245728.273668] RBP: ffffac3d849d7e30 R08: 0000000000000000 R09: 0000000000000001
[245728.273668] R10: ffffac3d849d7aa0 R11: 00000000000084ca R12: ffff91f0c026d000
[245728.273669] R13: 0000000000000000 R14: 0000557b4901f700 R15: ffff91ede71f6800
[245728.273669] FS: 00007f6323bf9740(0000) GS:ffff91f0cf900000(0000) knlGS:0000000000000000
[245728.273670] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[245728.273670] CR2: 0000557b4901f008 CR3: 000000041d754004 CR4: 00000000003606e0
[245728.273671] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[245728.273671] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[245728.273672] Call Trace:
[245728.273748] nv_procfs_write_suspend+0xe3/0x160 [nvidia]
[245728.273750] proc_reg_write+0x3e/0x60
[245728.273752] __vfs_write+0x1b/0x40
[245728.273753] vfs_write+0xb1/0x1a0
[245728.273754] ksys_write+0xa7/0xe0
[245728.273755] __x64_sys_write+0x1a/0x20
[245728.273756] do_syscall_64+0x57/0x190
[245728.273758] entry_SYSCALL_64_after_hwframe+0x5c/0xc1
[245728.273758] RIP: 0033:0x7f63232f7104
[245728.273759] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 e1 08 2e 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 41 54 55 49 89 d4 53 48 89 f5
[245728.273760] RSP: 002b:00007ffc609a56c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[245728.273760] RAX: ffffffffffffffda RBX: 0000000000000007 RCX: 00007f63232f7104
[245728.273761] RDX: 0000000000000007 RSI: 0000557b4901f700 RDI: 0000000000000001
[245728.273761] RBP: 0000557b4901f700 R08: 000000000000000a R09: 0000000000000006
[245728.273762] R10: 000000000000000a R11: 0000000000000246 R12: 00007f63235d3760
[245728.273762] R13: 0000000000000007 R14: 00007f63235cf2a0 R15: 00007f63235ce760
[245728.273763] ---[ end trace 2ff2aa81ae8de83a ]---
[245731.275215] nvidia-modeset: WARNING: GPU:0: Lost display notification (0:0x00000000); continuing.
[245733.531067] nvidia-modeset: ERROR: GPU:0: Idling display engine timed out: 0x0000947d:0:0:407
nvidia, resume, comm, sh, tainted, linux, ubuntu, mint, debianthis, drivers, newer, versions, vs, modeset, appearing, ve, various, https, forums, developer, resuming, systemctl, suspend, kernel, generic, gtx, xubuntu, lts, systemds, hibernate, optimus, laptop, proposes, disable, systemd, mv, lib, dmesg, output, gpu, hangs, cpu, pid, var, dkms, nv, nv_restore_user_channels, modules, linked, bluetooth, ecdh_generic, ecc, uas, usb_storage, ufs, qnx, hfsplus, hfs, minix, ntfs, msdos, jfs, xfs, cpuid, snd_seq_dummy, xt_conntrack, nf_conntrack_netlink, nfnetlink, xfrm_user, xfrm_algo, xt_addrtype, br_netfilter, aufs, xt_checksum, iptable_mangle, xt_masquerade, iptable_nat, nf_nat, nf_conntrack, nf_defrag_ipv, xt_tcpudp, stp, llc, iptable_filter, bpfilter, overlay, nls_iso, _, binfmt_misc, nvidia_uvm, oe, nvidia_drm, poe, nvidia_modeset, intel_rapl_msr, intel_rapl_common, input_leds, sb_edac, _pkg_temp_thermal, intel_powerclamp, kvm_intel, snd_hda_codec_hdmi, kvm, snd_hda_codec_realtek, snd_hda_codec_generic, crct, dif_pclmul, crc, _pclmul, ledtrig_audio, ghash_clmulni_intel, aesni_intel, snd_hda_intel, snd_intel_dspcfg, crypto_simd, drm_kms_helper, snd_hda_codec, cryptd, snd_hda_core, drm, glue_helper, snd_hwdep, fb_sys_fops, snd_pcm, syscopyarea, snd_seq_midi, snd_seq_midi_event, sysfillrect, snd_rawmidi, rapl, sysimgblt, snd_seq, intel_cstate, mei_me, dell_smbios, snd_seq_device, dcdbas, snd_timer, dell_smm_hwmon, snd, wmi_bmof, intel_wmi_thunderbolt, dell_wmi_descriptor, mei, soundcore, lpc_ich, mac_hid, mxm_wmi, sch_fq_codel, hwmon_vid, coretemp, parport_pc, ppdev, lp, parport, ip_tables, x_tables, autofs, hid_generic, usbhid, btrfs, zstd_compress, raid, async_raid, _recov, async_memcpy, async_pq, async_xor, async_tx, xor, _pq, libcrc, multipath, linear, dm_mirror, dm_region_hash, dm_log, igb, dca, c_algo_bit, ahci, libahci, wmi, hardware, rtt, ef, ff, df, rsp, ffffac, dc, eflags, rax, rbx, ffff, rcx, rdx, rsi, rdi, cf, rbp, aa, ca, eefdd, fs, bf, gs, knlgs, cs, ds, es, cr, fffe, nv_set_system_power_state, nv_procfs_write_suspend, xe, proc_reg_write, __vfs_write, vfs_write, xb, ksys_write, xa, __x, _sys_write, do_syscall_, entry_syscall_, _after_hwframe, xc, ffc, orig_rax, ffffffffffffffda, ce, ae, fd, fe, eb, cdab, cb, fac, ede, display, notification, continuing, idling, timed,