Description of problem: I think I've seen it happening in CI several times. Here's an example from https://build.gluster.org/job/centos7-regression/8932/console : exe = '/build/install/sbin/glusterfs -s localhost --volfile-id gluster/nfs -p /var/run' ++ gdb -ex 'core-file /glfs_epoll001-7625.core' -ex 'set pagination off' -ex 'info proc exe' -ex q ++ tail -1 ++ cut -d ''\''' -f2 ++ cut -d ' ' -f1 + executable_name=/build/install/sbin/glusterfs ++ which /build/install/sbin/glusterfs + executable_path=/build/install/sbin/glusterfs + set +x ========================================================= Start printing backtrace program name : /build/install/sbin/glusterfs corefile : /glfs_epoll001-7625.core ========================================================= warning: core file may not match specified executable file. [New LWP 7634] [New LWP 7628] [New LWP 7632] [New LWP 7631] [New LWP 7630] [New LWP 7633] [New LWP 7659] [New LWP 7629] [New LWP 7627] [New LWP 7625] [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib64/libthread_db.so.1". Core was generated by `/build/install/sbin/glusterfs -s localhost --volfile-id gluster/nfs -p /var/run'. Program terminated with signal 6, Aborted. #0 0x00007f7bd9494337 in raise () from /lib64/libc.so.6 Thread 10 (Thread 0x7f7bdb38e4c0 (LWP 7625)): #0 0x00007f7bd9c97fd7 in pthread_join () from /lib64/libpthread.so.0 No symbol table info available. #1 0x00007f7bdaee3018 in event_dispatch_epoll (event_pool=0xda4d50) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/event-epoll.c:848 i = 2 t_id = 140169698473728 pollercount = 2 ret = 0 ev_data = 0xdf6670 __FUNCTION__ = "event_dispatch_epoll" #2 0x00007f7bdae9f05c in gf_event_dispatch (event_pool=0xda4d50) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/event.c:115 ret = -1 __FUNCTION__ = "gf_event_dispatch" #3 0x000000000040c1bb in ?? () No symbol table info available. #4 0x0000000000000000 in ?? () No symbol table info available. Thread 9 (Thread 0x7f7bd2433700 (LWP 7627)): #0 0x00007f7bd9c9ada2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 No symbol table info available. #1 0x00007f7bdae7b20f in gf_timer_proc (data=0xdacd30) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/timer.c:141 now = {tv_sec = 167190, tv_nsec = 234545323} reg = 0xdacd30 event = 0xdf6350 tmp = 0x0 old_THIS = 0x0 #2 0x00007f7bd9c96e65 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #3 0x00007f7bd955c88d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 8 (Thread 0x7f7bd1431700 (LWP 7629)): #0 0x00007f7bd952380d in nanosleep () from /lib64/libc.so.6 No symbol table info available. #1 0x00007f7bd95236a4 in sleep () from /lib64/libc.so.6 No symbol table info available. #2 0x00007f7bdaea0665 in pool_sweeper (arg=0x0) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/mem-pool.c:455 state = {death_row = {next = 0x0, prev = 0x0}, cold_lists = {0x0 <repeats 1024 times>}, n_cold_lists = 0} pool_list = 0x0 next_pl = 0x0 pt_pool = 0x0 i = 0 poisoned = false #3 0x00007f7bd9c96e65 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #4 0x00007f7bd955c88d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 7 (Thread 0x7f7bc7247700 (LWP 7659)): #0 0x00007f7bd955c851 in clone () from /lib64/libc.so.6 No symbol table info available. #1 0x00007f7bd9c96da0 in ?? () from /lib64/libpthread.so.0 No symbol table info available. #2 0x00007f7bc7247700 in ?? () No symbol table info available. #3 0x0000000000000000 in ?? () No symbol table info available. Thread 6 (Thread 0x7f7bcd960700 (LWP 7633)): #0 0x00007f7bd955ce63 in epoll_wait () from /lib64/libc.so.6 No symbol table info available. #1 0x00007f7bdaee2d51 in event_dispatch_epoll_worker (data=0xdf62e0) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/event-epoll.c:753 event = {events = 1, data = {ptr = 0x100000007, fd = 7, u32 = 7, u64 = 4294967303}} ret = 0 ev_data = 0xdf62e0 event_pool = 0xda4d50 myindex = 1 timetodie = 0 gen = 0 poller_death_notify = {next = 0x0, prev = 0x0} slot = 0x0 tmp = 0x0 __FUNCTION__ = "event_dispatch_epoll_worker" #2 0x00007f7bd9c96e65 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #3 0x00007f7bd955c88d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 5 (Thread 0x7f7bd0c30700 (LWP 7630)): #0 0x00007f7bd9c9ada2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 No symbol table info available. #1 0x00007f7bdaeb90fd in syncenv_task (proc=0xdad7d0) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/syncop.c:517 env = 0xdad7d0 task = 0x0 sleep_till = {tv_sec = 1577798841, tv_nsec = 0} ret = 0 #2 0x00007f7bdaeb92f2 in syncenv_processor (thdata=0xdad7d0) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/syncop.c:584 env = 0xdad7d0 proc = 0xdad7d0 task = 0x7f7bc0002b10 #3 0x00007f7bd9c96e65 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #4 0x00007f7bd955c88d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 4 (Thread 0x7f7bd042f700 (LWP 7631)): #0 0x00007f7bd9c9ada2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 No symbol table info available. #1 0x00007f7bdaeb90fd in syncenv_task (proc=0xdadb90) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/syncop.c:517 env = 0xdad7d0 task = 0x0 sleep_till = {tv_sec = 1577798841, tv_nsec = 0} ret = 0 #2 0x00007f7bdaeb92f2 in syncenv_processor (thdata=0xdadb90) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/syncop.c:584 env = 0xdad7d0 proc = 0xdadb90 task = 0x0 #3 0x00007f7bd9c96e65 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #4 0x00007f7bd955c88d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 3 (Thread 0x7f7bcfc2e700 (LWP 7632)): #0 0x00007f7bd9553953 in select () from /lib64/libc.so.6 No symbol table info available. #1 0x00007f7bdaf01721 in runner (arg=0xdb19d0) at /home/jenkins/root/workspace/centos7-regression/contrib/timer-wheel/timer-wheel.c:186 tv = {tv_sec = 0, tv_usec = 902902} base = 0xdb19d0 #2 0x00007f7bd9c96e65 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #3 0x00007f7bd955c88d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 2 (Thread 0x7f7bd1c32700 (LWP 7628)): #0 0x00007f7bd9c9e381 in sigwait () from /lib64/libpthread.so.0 No symbol table info available. #1 0x000000000040b206 in ?? () No symbol table info available. #2 0x0000000000000000 in ?? () No symbol table info available. Thread 1 (Thread 0x7f7bcd15f700 (LWP 7634)): #0 0x00007f7bd9494337 in raise () from /lib64/libc.so.6 No symbol table info available. #1 0x00007f7bd9495a28 in abort () from /lib64/libc.so.6 No symbol table info available. #2 0x00007f7bd948d156 in __assert_fail_base () from /lib64/libc.so.6 No symbol table info available. #3 0x00007f7bd948d202 in __assert_fail () from /lib64/libc.so.6 No symbol table info available. #4 0x00007f7bdac0e606 in rpcsvc_drc_client_unref (drc=0x7f7bc802d830, client=0x7f7bc80428c0) at /home/jenkins/root/workspace/centos7-regression/rpc/rpc-lib/src/rpc-drc.c:265 __PRETTY_FUNCTION__ = "rpcsvc_drc_client_unref" #5 0x00007f7bdac0f57e in rpcsvc_drc_notify (svc=0x7f7bc8026460, xl=0x7f7bc8012aa0, event=RPCSVC_EVENT_DISCONNECT, data=0x7f7bc0000da0) at /home/jenkins/root/workspace/centos7-regression/rpc/rpc-lib/src/rpc-drc.c:647 ret = 0 trans = 0x7f7bc0000da0 client = 0x7f7bc80428c0 drc = 0x7f7bc802d830 __PRETTY_FUNCTION__ = "rpcsvc_drc_notify" #6 0x00007f7bdac007e5 in rpcsvc_handle_disconnect (svc=0x7f7bc8026460, trans=0x7f7bc0000da0) at /home/jenkins/root/workspace/centos7-regression/rpc/rpc-lib/src/rpcsvc.c:953 event = RPCSVC_EVENT_DISCONNECT wrappers = 0x7f7bc8042740 wrapper = 0x7f7bc80264e0 ret = -1 i = 0 wrapper_count = 1 listener = 0x0 #7 0x00007f7bdac00962 in rpcsvc_notify (trans=0x7f7bc0000da0, mydata=0x7f7bc8026460, event=RPC_TRANSPORT_DISCONNECT, data=0x7f7bc0000da0) at /home/jenkins/root/workspace/centos7-regression/rpc/rpc-lib/src/rpcsvc.c:989 ret = -1 msg = 0x0 new_trans = 0x0 svc = 0x7f7bc8026460 listener = 0x0 __FUNCTION__ = "rpcsvc_notify" #8 0x00007f7bdac06a50 in rpc_transport_notify (this=0x7f7bc0000da0, event=RPC_TRANSPORT_DISCONNECT, data=0x7f7bc0000da0) at /home/jenkins/root/workspace/centos7-regression/rpc/rpc-lib/src/rpc-transport.c:545 ret = -1 __FUNCTION__ = "rpc_transport_notify" #9 0x00007f7bcf211147 in socket_event_poll_err (this=0x7f7bc0000da0, gen=1, idx=7) at /home/jenkins/root/workspace/centos7-regression/rpc/rpc-transport/socket/src/socket.c:1417 priv = 0x7f7bc0001380 socket_closed = true __FUNCTION__ = "socket_event_poll_err" #10 0x00007f7bcf2172b5 in socket_event_handler (fd=16, idx=7, gen=1, data=0x7f7bc0000da0, poll_in=1, poll_out=0, poll_err=0, event_thread_died=0 '\000') at /home/jenkins/root/workspace/centos7-regression/rpc/rpc-transport/socket/src/socket.c:3064 sa = 0x7f7bc0000e60 this = 0x7f7bc0000da0 priv = 0x7f7bc0001380 ret = -1 ctx = 0xd6e010 socket_closed = false notify_handled = true __FUNCTION__ = "socket_event_handler" #11 0x00007f7bdaee286d in event_dispatch_epoll_handler (event_pool=0xda4d50, event=0x7f7bcd15e140) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/event-epoll.c:650 ev_data = 0x7f7bcd15e144 slot = 0xdd7d10 handler = 0x7f7bcf216c9d <socket_event_handler> data = 0x7f7bc0000da0 idx = 7 gen = 1 ret = 0 fd = 16 handled_error_previously = false __FUNCTION__ = "event_dispatch_epoll_handler" #12 0x00007f7bdaee2d86 in event_dispatch_epoll_worker (data=0xdf6670) at /home/jenkins/root/workspace/centos7-regression/libglusterfs/src/event-epoll.c:763 event = {events = 1, data = {ptr = 0x100000007, fd = 7, u32 = 7, u64 = 4294967303}} ret = 1 ev_data = 0xdf6670 event_pool = 0xda4d50 myindex = 2 timetodie = 0 gen = 0 poller_death_notify = {next = 0x0, prev = 0x0} slot = 0x0 tmp = 0x0 __FUNCTION__ = "event_dispatch_epoll_worker" #13 0x00007f7bd9c96e65 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #14 0x00007f7bd955c88d in clone () from /lib64/libc.so.6 No symbol table info available. ========================================================= Finish backtrace program name : /build/install/sbin/glusterfs corefile : /glfs_epoll001-7625.core =========================================================
can you take a look?
Posted a patch to resolve the same https://review.gluster.org/#/c/glusterfs/+/23968/
REVIEW: https://review.gluster.org/23968 (test: tests/bugs/rpc/bug-847624.t is crashed) posted (#4) for review on master by MOHIT AGRAWAL
This bug is moved to https://github.com/gluster/glusterfs/issues/1038, and will be tracked there from now on. Visit GitHub issues URL for further details
The needinfo request[s] on this closed bug have been removed as they have been unresolved for 1000 days