Hide Forgot
glusterfs server crashed in rpc_clnt_connection_cleanup. Add-brick and remove brick was being done in a loop on a distribute volume , and xlator enable and disable (io-cache & write-behind) was running parallely. On client dbench was running. (same operations of bug 763452 and 1721 caused this) This is the backtrace of the core generated. Core was generated by `/usr/local/sbin/glusterfs --xlator-option vol-server.listen-port=7006 -s localh'. Program terminated with signal 6, Aborted. #0 0x00007f888bdbea75 in raise () from /lib/libc.so.6 (gdb) bt #0 0x00007f888bdbea75 in raise () from /lib/libc.so.6 #1 0x00007f888bdc25c0 in abort () from /lib/libc.so.6 #2 0x00007f888bdb7941 in __assert_fail () from /lib/libc.so.6 #3 0x00007f888c9a7dd5 in __gf_free (free_ptr=0x17f36d8) at ../../../libglusterfs/src/mem-pool.c:280 #4 0x00007f888c9830d9 in _gf_log (domain=0x7f888c760707 "rpc-clnt", file=0x7f888c7606e0 "../../../../rpc/rpc-lib/src/rpc-clnt.c", function=0x7f888c760ee0 "saved_frames_unwind", line=335, level=GF_LOG_ERROR, fmt=0x7f888c7607b8 "forced unwinding frame type(%s) op(%s(%d)) called at %s") at ../../../libglusterfs/src/logging.c:475 #5 0x00007f888c75b4de in saved_frames_unwind (saved_frames=0x17f3788) at ../../../../rpc/rpc-lib/src/rpc-clnt.c:328 #6 0x00007f888c75b62c in saved_frames_destroy (frames=0x17f3788) at ../../../../rpc/rpc-lib/src/rpc-clnt.c:356 #7 0x00007f888c75bb91 in rpc_clnt_connection_cleanup (conn=0x17f2998) at ../../../../rpc/rpc-lib/src/rpc-clnt.c:504 #8 0x00007f888c75def3 in rpc_clnt_destroy (rpc=0x17f2968) at ../../../../rpc/rpc-lib/src/rpc-clnt.c:1451 #9 0x0000000000404911 in cleanup_and_exit (signum=15) at ../../../glusterfsd/src/glusterfsd.c:660 #10 0x0000000000406184 in glusterfs_sigwaiter (arg=0x7fff6733ae20) at ../../../glusterfsd/src/glusterfsd.c:1180 #11 0x00007f888c1149ca in start_thread () from /lib/libpthread.so.0 #12 0x00007f888be716fd in clone () from /lib/libc.so.6 #13 0x0000000000000000 in ?? () (gdb) (gdb) info thr 5 Thread 14090 0x00007f888c11c464 in __lll_lock_wait () from /lib/libpthread.so.0 4 Thread 14095 0x00007f888be3538d in nanosleep () from /lib/libc.so.6 3 Thread 14110 0x00007f888c119bc9 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib/libpthread.so.0 2 Thread 14111 0x00007f888c119bc9 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib/libpthread.so.0 * 1 Thread 14092 0x00007f888bdbea75 in raise () from /lib/libc.so.6 (gdb) (gdb) t 5 [Switching to thread 5 (Thread 14090)]#0 0x00007f888c11c464 in __lll_lock_wait () from /lib/libpthread.so.0 (gdb) bt #0 0x00007f888c11c464 in __lll_lock_wait () from /lib/libpthread.so.0 #1 0x00007f888c1175d9 in _L_lock_953 () from /lib/libpthread.so.0 #2 0x00007f888c1173fb in pthread_mutex_lock () from /lib/libpthread.so.0 #3 0x00007f888a3890b9 in socket_event_handler (fd=6, idx=0, data=0x17f2b88, poll_in=1, poll_out=0, poll_err=0) at ../../../../../rpc/rpc-transport/socket/src/socket.c:1718 #4 0x00007f888c9a6eab in event_dispatch_epoll_handler (event_pool=0x17f0d78, events=0x17f4998, i=0) at ../../../libglusterfs/src/event.c:812 #5 0x00007f888c9a70bb in event_dispatch_epoll (event_pool=0x17f0d78) at ../../../libglusterfs/src/event.c:876 #6 0x00007f888c9a7423 in event_dispatch (event_pool=0x17f0d78) at ../../../libglusterfs/src/event.c:984 #7 0x0000000000406793 in main (argc=15, argv=0x7fff6733b018) at ../../../glusterfsd/src/glusterfsd.c:1408 (gdb) (gdb) p priv->lock.__data.__list.__prev $3 = (struct __pthread_internal_list *) 0x62696c0062696c2f (gdb) p *priv->lock.__data.__list.__prev Cannot access memory at address 0x62696c0062696c2f (gdb) (gdb) p priv->lock.__data.__list.__next $4 = (struct __pthread_internal_list *) 0x6f732e735f636367 (gdb) p *priv->lock.__data.__list.__next Cannot access memory at address 0x6f732e735f636367 (gdb)
Fixed by patch http://patches.gluster.com/patch/5052/