Bug 1263142 - `/bin/plymouth --details` stall
`/bin/plymouth --details` stall
Status: CLOSED WORKSFORME
Product: Red Hat Enterprise Linux 6
Classification: Red Hat
Component: plymouth (Show other bugs)
6.7
Unspecified Linux
medium Severity medium
: rc
: ---
Assigned To: Ray Strode [halfline]
Desktop QE
:
Depends On:
Blocks:
  Show dependency treegraph
 
Reported: 2015-09-15 04:19 EDT by Martin Cermak
Modified: 2015-09-21 13:29 EDT (History)
0 users

See Also:
Fixed In Version:
Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of:
Environment:
Last Closed: 2015-09-21 13:29:48 EDT
Type: Bug
Regression: ---
Mount Type: ---
Documentation: ---
CRM:
Verified Versions:
Category: ---
oVirt Team: ---
RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: ---


Attachments (Terms of Use)

  None (edit)
Description Martin Cermak 2015-09-15 04:19:08 EDT
On my rhel-6.7 test box I was attempting to do a `service avahi-daemon restart` and noticed it "stalls". Strace shows that the stall is caused by `/bin/plymouth --details`. Versions used: plymouth-0.8.3-27.el6_5.1.x86_64, glibc-2.12-1.166.el6.x86_64.

Strace log:

=======
strace -f /bin/plymouth --details

... stuff deleted ...

open("/lib64/libpthread.so.0", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340]\340&7\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=145896, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f913f9a4000
mmap(0x3726e00000, 2212848, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3726e00000
mprotect(0x3726e17000, 2097152, PROT_NONE) = 0
mmap(0x3727017000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x17000) = 0x3727017000
mmap(0x3727019000, 13296, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3727019000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f913f9a3000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f913f9a2000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f913f9a1000
arch_prctl(ARCH_SET_FS, 0x7f913f9a2700) = 0
mprotect(0x3727882000, 4096, PROT_READ) = 0
mprotect(0x3727406000, 4096, PROT_READ) = 0
mprotect(0x3726802000, 4096, PROT_READ) = 0
mprotect(0x3726d8a000, 16384, PROT_READ) = 0
mprotect(0x3727017000, 4096, PROT_READ) = 0
mprotect(0x372641f000, 4096, PROT_READ) = 0
munmap(0x7f913f9a6000, 63724)           = 0
set_tid_address(0x7f913f9a29d0)         = 14880
set_robust_list(0x7f913f9a29e0, 24)     = 0
futex(0x7fffe4fe5c8c, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7fffe4fe5c8c, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 1, NULL, 7f913f9a2700) = -1 EAGAIN (Resource temporarily unavailable)
rt_sigaction(SIGRTMIN, {0x3726e05c60, [], SA_RESTORER|SA_SIGINFO, 0x3726e0f790}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0x3726e05cf0, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x3726e0f790}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=10240*1024, rlim_max=RLIM64_INFINITY}) = 0
rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x3726a326a0}, {SIG_DFL, [], 0}, 8) = 0
brk(0)                                  = 0x1192000
brk(0x11b3000)                          = 0x11b3000
epoll_create(64)                        = 3
pipe([4, 5])                            = 0
fcntl(4, F_SETFD, FD_CLOEXEC|0x800)     = 0
fcntl(5, F_SETFD, FD_CLOEXEC|0x800)     = 0
epoll_ctl(3, EPOLL_CTL_ADD, 4, {EPOLLERR|EPOLLHUP, {u32=18424048, u64=18424048}}) = 0
epoll_ctl(3, EPOLL_CTL_MOD, 4, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=18424048, u64=18424048}}) = 0
open("proc/cmdline", O_RDONLY)          = -1 ENOENT (No such file or directory)
socket(PF_LOCAL, SOCK_STREAM, 0)        = 6
fcntl(6, F_SETFD, FD_CLOEXEC|0x800)     = 0
setsockopt(6, SOL_SOCKET, SO_PASSCRED, [1], 4) = 0
connect(6, {sa_family=AF_LOCAL, sun_path=@"/ply-boot-protocol"}, 110) = 0
epoll_ctl(3, EPOLL_CTL_ADD, 6, {EPOLLERR|EPOLLHUP, {u32=18436912, u64=18436912}}) = 0
epoll_ctl(3, EPOLL_CTL_MOD, 6, {EPOLLERR|EPOLLHUP, {u32=18436912, u64=18436912}}) = 0
epoll_ctl(3, EPOLL_CTL_MOD, 6, {EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=18436912, u64=18436912}}) = 0
epoll_wait(3, {{EPOLLOUT, {u32=18436912, u64=18436912}}}, 8, -1) = 1
write(6, "!\0", 2)                      = 2
epoll_ctl(3, EPOLL_CTL_MOD, 6, {EPOLLIN|EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=18436912, u64=18436912}}) = 0
epoll_ctl(3, EPOLL_CTL_MOD, 6, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=18436912, u64=18436912}}) = 0
epoll_wait(3,
=======

Traceback:

=======
 6.7 S x86_64 # gdb -q -args /bin/plymouth --details
Reading symbols from /bin/plymouth...Reading symbols from /usr/lib/debug/bin/plymouth.debug...done.
done.
(gdb) r
Starting program: /bin/plymouth --details
[Thread debugging using libthread_db enabled]
^C
Program received signal SIGINT, Interrupt.
0x0000003726ae8f43 in __epoll_wait_nocancel () at ../sysdeps/unix/syscall-template.S:82
82      T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
(gdb) bt
#0  0x0000003726ae8f43 in __epoll_wait_nocancel () at ../sysdeps/unix/syscall-template.S:82
#1  0x0000003727a0534b in ply_event_loop_process_pending_events (loop=0x608010) at ply-event-loop.c:1276
#2  0x0000003727a05da0 in ply_event_loop_run (loop=0x608010) at ply-event-loop.c:1364
#3  0x000000000040369d in main (argc=<value optimized out>, argv=<value optimized out>) at ./plymouth.c:1133
(gdb)
=======
Comment 2 Martin Cermak 2015-09-15 06:14:07 EDT
I think the hang happens here in epoll_wait():

=======
(gdb) up
#1  0x0000003727a0534b in ply_event_loop_process_pending_events (loop=0x608010) at ply-event-loop.c:1276
1276         number_of_received_events = epoll_wait (loop->epoll_fd, events,
(gdb) l
1271           {
1272             timeout = (int) ((loop->wakeup_time - ply_get_timestamp ()) * 1000);
1273             timeout = MAX (timeout, 0);
1274           }
1275
1276         number_of_received_events = epoll_wait (loop->epoll_fd, events,
1277                                                 sizeof (events), timeout);
=======

Here, timeout is 0 and loop->epoll_fd points to: 

 6.7 S x86_64 # ls -l /proc/20862/fd/7
lrwx------ 1 root root 64 Sep 15 06:08 /proc/20862/fd/7 -> [eventpoll]
Comment 3 Ray Strode [halfline] 2015-09-15 09:31:44 EDT
is it looping ?  a timeout of 0 to epoll_wait() should return immediately.

plymouth --details is a command that is run when there is a failure.  I wonder if this stall is the cause of your problem or just a side effect.  Do you see anything strange in dmesg ?
Comment 4 Martin Cermak 2015-09-18 02:47:39 EDT
Unfortunately I don't have access to the reproducer box any more. Feel free to close this out if you consider it appropriate. I can possibly re-open.
Comment 5 Martin Cermak 2015-09-18 03:53:49 EDT
Got another box, but cannot reproduce.
Comment 6 Ray Strode [halfline] 2015-09-21 13:29:48 EDT
okay let's close this one out since we don't have a customer complaining and it's not clear we'll be able to debug the problem without a reproduction environment.

Note You need to log in before you can comment on or make changes to this bug.