Description of problem: During code inspection (dzickus & myself) it was noticed that the 4.8 kernel does not disable all cpus' watchdog when nmi_watchdog == NMI_LOCAL_APIC: int __init check_nmi_watchdog (void) { int counts[NR_CPUS]; int cpu; if (!atomic_read(&nmi_watchdog_active)) return 0; printk(KERN_INFO "testing NMI watchdog ... "); for (cpu = 0; cpu < NR_CPUS; cpu++) counts[cpu] = cpu_pda[cpu].__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { if (!cpu_online(cpu)) continue; if (!per_cpu(wd_enabled, cpu)) continue; if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { printk("CPU#%d: NMI appears to be stuck (%d)!\n", cpu, cpu_pda[cpu].__nmi_count); if (atomic_dec_and_test(&nmi_watchdog_active)) nmi_active = 0; per_cpu(wd_enabled, cpu) = 0; <<< only disables _this_ cpu's watchdog, not all of them. goto error; } } if (!atomic_read(&nmi_watchdog_active)) { atomic_set(&nmi_watchdog_active, -1); nmi_active = -1; goto error; }
This is only seen in the error path and with RHEL-4 seen the end of its life soon, I don't think it is worth fixing.