Hide Forgot
Description of problem: This should not be possible: $ ip route get 10.42.38.163 10.42.38.163 dev brvlbasic src 10.42.38.164 Floating point exception Version-Release number of selected component (if applicable): iproute-2.6.32-31.el6.x86_64 : Advanced IP routing and network device : configuration tools Repo : installed Matched from: Other : Provides-match: /sbin/ip How reproducible: For a few minutes, every time with those arguments. I tried no other arguments. However, it now works properly: $ ip route get 10.42.38.163 10.42.38.163 dev brvlbasic src 10.42.38.164 cache mtu 1500 advmss 1460 hoplimit 64 Additional info:
Strace: # strace -f ip route get 10.42.38.163 execve("/sbin/ip", ["ip", "route", "get", "10.42.38.163"], [/* 22 vars */]) = 0 brk(0) = 0x2369000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8116a3b000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=35471, ...}) = 0 mmap(NULL, 35471, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f8116a32000 close(3) = 0 open("/lib64/libresolv.so.2", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\00009\0\0\0\0\0\0"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=110960, ...}) = 0 mmap(NULL, 2202248, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8116603000 mprotect(0x7f8116619000, 2097152, PROT_NONE) = 0 mmap(0x7f8116819000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7f8116819000 mmap(0x7f811681b000, 6792, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f811681b000 close(3) = 0 open("/lib64/libdl.so.2", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\r\0\0\0\0\0\0"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=19536, ...}) = 0 mmap(NULL, 2109696, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f81163ff000 mprotect(0x7f8116401000, 2097152, PROT_NONE) = 0 mmap(0x7f8116601000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f8116601000 close(3) = 0 open("/lib64/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000\356\1\0\0\0\0\0"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=1921216, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8116a31000 mmap(NULL, 3750152, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f811606b000 mprotect(0x7f81161f6000, 2093056, PROT_NONE) = 0 mmap(0x7f81163f5000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x18a000) = 0x7f81163f5000 mmap(0x7f81163fa000, 18696, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f81163fa000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8116a30000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8116a2f000 arch_prctl(ARCH_SET_FS, 0x7f8116a30700) = 0 mprotect(0x7f81163f5000, 16384, PROT_READ) = 0 mprotect(0x7f8116601000, 4096, PROT_READ) = 0 mprotect(0x7f8116819000, 4096, PROT_READ) = 0 mprotect(0x7f8116a3c000, 4096, PROT_READ) = 0 munmap(0x7f8116a32000, 35471) = 0 socket(PF_NETLINK, SOCK_RAW, 0) = 3 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 bind(3, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0 getsockname(3, {sa_family=AF_NETLINK, pid=1019, groups=00000000}, [12]) = 0 sendto(3, "\34\0\0\0\22\0\1\3ob\263R\0\0\0\0\0\0\0\0\10\0\35\0\1\0\0\0", 28, 0, NULL, 0) = 28 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\350\3\0\0\20\0\2\0ob\263R\373\3\0\0\0\0\4\3\1\0\0\0I\0\1\0\0\0\0\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3040 brk(0) = 0x2369000 brk(0x238a000) = 0x238a000 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\354\3\0\0\20\0\2\0ob\263R\373\3\0\0\0\0\1\0\4\0\0\0C\24\1\0\0\0\0\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3072 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\360\3\0\0\20\0\2\0ob\263R\373\3\0\0\0\0\1\0\7\0\0\0C\20\1\0\0\0\0\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3076 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{" \4\0\0\20\0\2\0ob\263R\373\3\0\0\0\0\1\0\n\0\0\0C\24\1\0\0\0\0\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 2064 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0ob\263R\373\3\0\0\0\0\0\0\n\0\0\0C\24\1\0\0\0\0\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 20 sendmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"$\0\0\0\32\0\1\0pb\263R\0\0\0\0\2 \0\0\0\0\0\0\0\0\0\0\10\0\1\0"..., 36}], msg_controllen=0, msg_flags=0}, 0) = 36 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"|\0\0\0\30\0\0\0pb\263R\373\3\0\0\2 \0\0\376\0\0\1\0\2\0\0\10\0\17\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 124 fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 2), ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8116a3a000 write(1, "10.42.38.163 dev brvlbasic src "..., 4610.42.38.163 dev brvlbasic src 10.42.38.164 ) = 46 --- SIGFPE (Floating point exception) @ 0 (0) --- +++ killed by SIGFPE +++ Floating point exception
(gdb) run route get 10.42.38.163 Starting program: /sbin/ip route get 10.42.38.163 10.42.38.163 dev brvlbasic src 10.42.38.164 Program received signal SIGFPE, Arithmetic exception. 0x000000000040c5ae in print_route (who=<value optimized out>, n=0x7fffffffe150, arg=0x7ffff79b9780) at /usr/include/bits/stdio2.h:98 98 return __fprintf_chk (__stream, __USE_FORTIFY_LEVEL - 1, __fmt, (gdb) bt #0 0x000000000040c5ae in print_route (who=<value optimized out>, n=0x7fffffffe150, arg=0x7ffff79b9780) at /usr/include/bits/stdio2.h:98 #1 0x000000000040cb2d in iproute_get (argc=0, argv=<value optimized out>) at iproute.c:1434 #2 0x0000000000404c4b in main (argc=4, argv=0x7fffffffe6f8) at ip.c:248
I discovered how to reproduce it, at least in my environment. "ip route get" to that address works fine--until I try to ping that address. Once I try to ping that address, I get: $ ping 10.42.38.163 PING 10.42.38.163 (10.42.38.163) 56(84) bytes of data. From 10.42.38.164 icmp_seq=2 Destination Host Unreachable From 10.42.38.164 icmp_seq=3 Destination Host Unreachable From 10.42.38.164 icmp_seq=4 Destination Host Unreachable ^C --- 10.42.38.163 ping statistics --- 5 packets transmitted, 0 received, +3 errors, 100% packet loss, time 4004ms pipe 3 After this, I get the floating point exception error again.
Occurs to me every time when I use: # ip -s route show cache Some outputs (vm): [root@linux ~]# ip -s route show cache 192.168.135.2 from 192.168.135.128 dev eth0 Floating point exception [root@linux ~]# uname -a Linux linux.frenche.cp 2.6.32-431.3.1.el6.i686 #1 SMP Fri Jan 3 18:53:30 UTC 2014 i686 i686 i386 GNU/Linux [root@linux ~]# rpm -q iproute iproute-2.6.32-31.el6.i686 [root@linux ~]# ip route show cache 192.168.135.2 from 192.168.135.128 dev eth0 cache mtu 1500 advmss 1460 hoplimit 64 local 192.168.135.128 from 192.168.135.2 dev lo src 192.168.135.128 cache <local,src-direct> iif eth0 local 127.0.0.1 from 127.0.0.1 dev lo cache <local> mtu 16436 advmss 16396 hoplimit 64 local 192.168.135.128 from 192.168.135.1 dev lo src 192.168.135.128 cache <local,src-direct> iif eth0 192.168.135.2 dev eth0 src 192.168.135.128 cache mtu 1500 advmss 1460 hoplimit 64 broadcast 192.168.135.255 from 192.168.135.1 dev lo src 192.168.135.128 cache <local,brd,src-direct> iif eth0 local 127.0.0.1 dev lo src 127.0.0.1 cache <local> mtu 16436 advmss 16396 hoplimit 64 192.168.135.1 from 192.168.135.128 tos lowdelay dev eth0 cache mtu 1500 advmss 1460 hoplimit 64 [root@linux ~]# strace -f ip -s route show cache execve("/sbin/ip", ["ip", "-s", "route", "show", "cache"], [/* 24 vars */]) = 0 brk(0) = 0x9f06000 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb77f9000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat64(3, {st_mode=S_IFREG|0644, st_size=28148, ...}) = 0 mmap2(NULL, 28148, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb77f2000 close(3) = 0 open("/lib/libresolv.so.2", O_RDONLY) = 3 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\240&\0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=103384, ...}) = 0 mmap2(NULL, 104520, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xe87000 mprotect(0xe9c000, 4096, PROT_NONE) = 0 mmap2(0xe9d000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15) = 0xe9d000 mmap2(0xe9f000, 6216, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xe9f000 close(3) = 0 open("/lib/libdl.so.2", O_RDONLY) = 3 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0`\n\0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=17892, ...}) = 0 mmap2(NULL, 16500, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xad9000 mmap2(0xadc000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2) = 0xadc000 close(3) = 0 open("/lib/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200n\1\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=1907156, ...}) = 0 mmap2(NULL, 1665452, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x6dd000 mmap2(0x86e000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x191) = 0x86e000 mmap2(0x871000, 10668, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x871000 close(3) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb77f1000 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb77f0000 set_thread_area({entry_number:-1 -> 6, base_addr:0xb77f1b30, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0 mprotect(0x86e000, 8192, PROT_READ) = 0 mprotect(0xadc000, 4096, PROT_READ) = 0 mprotect(0xe9d000, 4096, PROT_READ) = 0 mprotect(0xb55000, 4096, PROT_READ) = 0 munmap(0xb77f2000, 28148) = 0 socket(PF_NETLINK, SOCK_RAW, 0) = 3 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 bind(3, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0 getsockname(3, {sa_family=AF_NETLINK, pid=18622, groups=00000000}, [12]) = 0 time(NULL) = 1389705223 send(3, "\34\0\0\0\22\0\1\3\0108\325R\0\0\0\0\0\0\0\0\10\0\35\0\1\0\0\0", 28, 0) = 28 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\344\3\0\0\20\0\2\0\0108\325R\276H\0\0\0\0\4\3\1\0\0\0I\0\1\0\0\0\0\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 2008 brk(0) = 0x9f06000 brk(0x9f27000) = 0x9f27000 access("/etc/sysconfig/32bit_ssse3_memcpy_via_32bit_ssse3_memmove", F_OK) = -1 ENOENT (No such file or directory) recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0\0108\325R\276H\0\0\0\0\0\0\1\0\0\0I\0\1\0\0\0\0\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 20 sendto(3, "\34\0\0\0\32\0\1\1\t8\325R\0\0\0\0\2\0\0\0\0\0\0\0\0\2\0\0", 28, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 28 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"|\0\0\0\30\0\2\0\t8\325R\276H\0\0\2 \0\376\0\0\1\0\2\0\0\10\0\17\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 956 fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb77f8000 write(1, "192.168.135.2 from 192.168.135.1"..., 45192.168.135.2 from 192.168.135.128 dev eth0 ) = 45 --- SIGFPE (Floating point exception) @ 0 (0) --- +++ killed by SIGFPE +++ Floating point exception
Created attachment 854504 [details] stops the crashing hz is not initialized before use when reporting contents of the route cache. The patch fixes the crash, but the same "static int hz" is used for caching the return value of get_hz() and get_user_hz() at the same time. This does not look right. Also get_hz() and get_user_hz() do interal caching anyway.
I'm also seeing this issue on x86_64, with 2.6.32-31. It's sporadic, and occurs briefly after you ping a machine that doesn't respond.
I can confirm this with 2.6.32-431.el6.x86_64 [ ~]# ip a #...snip... 14: bond0: <BROADCAST,MULTICAST,MASTER,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP link/ether ac:16:2d:a4:d0:8a brd ff:ff:ff:ff:ff:ff inet 10.144.98.151/27 brd 10.144.98.159 scope global bond0 inet 10.144.98.153/27 brd 10.144.98.159 scope global secondary bond0 [ ~]# ping 10.144.98.154 PING 10.144.98.154 (10.144.98.154) 56(84) bytes of data. From 10.144.98.151 icmp_seq=2 Destination Host Unreachable From 10.144.98.151 icmp_seq=3 Destination Host Unreachable [ ~]# ip route get 10.144.98.154 10.144.98.154 dev bond0 src 10.144.98.151 Floating point exception (core dumped) [ ~]# echo $? 136 You have mail in /var/spool/mail/root
Same here with above behavior. In addition, log is showing ip[9312] trap divide error in ip:40c5ae sp:7fffb4968c40 error:0 in ip[400000+37000] each time I throw the exception. kernel 2.6.32-431.el6.x86_64 if that helps. restarting networking service clears the exception temporarily.