In a 2x2 distributed replicate volume with 2 fuse and 1 nfs client, one of the glusterfs servers crashed. nfs client was running fs-perf test and the 2 fuse clients were trying to remove the same directory parallely. This is the backtrace of the core. Core was generated by `/opt/glusterfs/3.2.5qa6/sbin/glusterfsd --xlator-option mirror-server.listen-po'. Program terminated with signal 11, Segmentation fault. #0 0x00007f9de1a4bc9a in grant_blocked_inode_locks (this=0x1692f10, pl_inode=<value optimized out>, dom=0x7f9dd4000f20) at inodelk.c:361 361 STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0); Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.25.el6_1.3.x86_64 libgcc-4.4.5-6.el6.x86_64 (gdb) bt #0 0x00007f9de1a4bc9a in grant_blocked_inode_locks (this=0x1692f10, pl_inode=<value optimized out>, dom=0x7f9dd4000f20) at inodelk.c:361 #1 0x00007f9de1a4c8a3 in pl_inode_setlk (frame=0x7f9de40114c4, this=0x1692f10, volume=0x1694750 "mirror-marker", inode=0x7f9de03f1024, cmd=7, flock=0x7f9dd35f4bb0, loc=0x7f9db4680f10, fd=0x0) at inodelk.c:507 #2 pl_common_inodelk (frame=0x7f9de40114c4, this=0x1692f10, volume=0x1694750 "mirror-marker", inode=0x7f9de03f1024, cmd=7, flock=0x7f9dd35f4bb0, loc=0x7f9db4680f10, fd=0x0) at inodelk.c:619 #3 0x00007f9de1a4cc9d in pl_inodelk (frame=0x1692f10, this=0x7f9de4e81990, volume=0x0, loc=<value optimized out>, cmd=0, flock=0x7f9db46827a0) at inodelk.c:662 #4 0x00007f9de4e519e5 in default_inodelk (frame=<value optimized out>, this=0x1693de0, volume=0x1694750 "mirror-marker", loc=0x7f9db4680f10, cmd=7, lock=0x7f9dd35f4bb0) at defaults.c:1060 #5 0x00007f9de1621b42 in mq_release_parent_lock (frame=<value optimized out>, cookie=<value optimized out>, this=0x1694f30, op_ret=<value optimized out>, op_errno=<value optimized out>) at marker-quota.c:1386 #6 0x00007f9de18311c1 in iot_setxattr_cbk (frame=0x7f9de412f608, cookie=<value optimized out>, this=<value optimized out>, op_ret=0, op_errno=0) at io-threads.c:1511 #7 0x00007f9de4e59391 in default_setxattr_cbk (frame=0x7f9de3f18ef8, cookie=<value optimized out>, this=<value optimized out>, op_ret=0, op_errno=0) at defaults.c:273 #8 0x00007f9de1c54c41 in posix_acl_setxattr_cbk () from /opt/glusterfs/3.2.5qa6/lib64/glusterfs/3.2.5qa6/xlator/features/access-control.so #9 0x00007f9de1e65ef5 in posix_setxattr (frame=0x7f9de40edf40, this=<value optimized out>, loc=<value optimized out>, dict=<value optimized out>, flags=<value optimized out>) at posix.c:3273 #10 0x00007f9de1c573c4 in posix_acl_setxattr () from /opt/glusterfs/3.2.5qa6/lib64/glusterfs/3.2.5qa6/xlator/features/access-control.so #11 0x00007f9de4e5410d in default_setxattr (frame=<value optimized out>, this=0x1692f10, loc=0x7f9de3c9c3b0, dict=0x7f9dbc5ab7b0, flags=0) at defaults.c:828 #12 0x00007f9de1833d8d in iot_setxattr_wrapper (frame=<value optimized out>, this=0x1693de0, loc=0x7f9de3c9c3b0, dict=0x7f9dbc5ab7b0, flags=0) at io-threads.c:1520 #13 0x00007f9de4e6269e in call_resume_wind (stub=0x7f9de3c9c378) at call-stub.c:2259 #14 call_resume (stub=0x7f9de3c9c378) at call-stub.c:3859 #15 0x00007f9de183a009 in iot_worker (data=0x169aad0) at io-threads.c:129 #16 0x0000003a73a077e1 in start_thread () from /lib64/libpthread.so.0 #17 0x0000003a732e577d in clone () from /lib64/libc.so.6 (gdb) f 0 #0 0x00007f9de1a4bc9a in grant_blocked_inode_locks (this=0x1692f10, pl_inode=<value optimized out>, dom=0x7f9dd4000f20) at inodelk.c:361 361 STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0); (gdb) l 356 lock->user_flock.l_len); 357 358 pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, 359 &lock->user_flock, 0, 0, lock->volume); 360 361 STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0); 362 } 363 364 } 365 (gdb) p *lock->frame $1 = {root = 0x7f9db46827d8, parent = 0x7f9db46827d8, next = 0x0, prev = 0x0, local = 0xa5, this = 0x0, ret = 0, ref_count = 0, lock = 0, cookie = 0x0, complete = _gf_false, op = GF_FOP_NULL, begin = {tv_sec = 140315313231072, tv_usec = 140315313231072}, end = { tv_sec = 140315313317944, tv_usec = 140315313317944}, wind_from = 0x7f9db4682848 "H(h\264\235\177", wind_to = 0x7f9db4682848 "H(h\264\235\177", unwind_from = 0x7f9db4682858 "X(h\264\235\177", unwind_to = 0x7f9db4682858 "X(h\264\235\177"} (gdb) p *lock $2 = {list = {next = 0x0, prev = 0x0}, blocked_locks = {next = 0x0, prev = 0x0}, fl_type = 0, fl_start = 140315313104960, fl_end = 140315313104960, volume = 0x7f9db4682798 "\230'h\264\235\177", user_flock = {l_type = 10136, l_whence = -19352, l_start = 140315313317800, l_len = 140315313317800, l_pid = -1268242504, l_owner = 140315313317816}, this = 0x7f9db46827c8, fd = 0x7f9db46827c8, frame = 0x7f9db46827d8, transport = 0x7f9db46827d8, client_pid = 0, owner = 0} (gdb) 0x7f9db46827c8, frame = 0x7f9db46827d8, transport = 0x7f9db46827d8, client_pid = 0, owner = 0} (gdb) info thr 13 Thread 0x7f9dd39f9700 (LWP 29091) 0x0000003a73a0c112 in ?? () from /lib64/libpthread.so.0 12 Thread 0x7f9dd34f4700 (LWP 29709) 0x0000003a73a0c112 in ?? () from /lib64/libpthread.so.0 11 Thread 0x7f9dd36f6700 (LWP 29707) 0x0000003a73a0c112 in ?? () from /lib64/libpthread.so.0 10 Thread 0x7f9dd32f2700 (LWP 29711) 0x0000003a73a0c112 in ?? () from /lib64/libpthread.so.0 9 Thread 0x7f9dd28f0700 (LWP 30391) 0x0000003a73a0c110 in ?? () from /lib64/libpthread.so.0 8 Thread 0x7f9dd3cfc700 (LWP 26578) marker_lookup (frame=0x7f9db8600070, this=0x1694f30, loc=0x1833f78, xattr_req=0x0) at marker.c:2169 7 Thread 0x7f9dd29f1700 (LWP 30390) 0x0000003a73a0c112 in ?? () from /lib64/libpthread.so.0 6 Thread 0x7f9dd33f3700 (LWP 29710) 0x0000003a73a0c112 in ?? () from /lib64/libpthread.so.0 5 Thread 0x7f9de0e19700 (LWP 26570) 0x0000003a73a0b74b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 4 Thread 0x7f9de3c05700 (LWP 26563) 0x0000003a73a0f235 in sigwait () from /lib64/libpthread.so.0 3 Thread 0x7f9de2a78700 (LWP 26564) 0x0000003a732aabed in nanosleep () from /lib64/libc.so.6 2 Thread 0x7f9de4a05700 (LWP 26562) 0x0000003a732e5d73 in epoll_wait () from /lib64/libc.so.6 * 1 Thread 0x7f9dd35f5700 (LWP 29708) 0x00007f9de1a4bc9a in grant_blocked_inode_locks (this=0x1692f10, pl_inode=<value optimized out>, dom=0x7f9dd4000f20) at inodelk.c:361 (gdb) f 1 #1 0x00007f9de4e71547 in mem_get (mem_pool=0x1687950) at mem-pool.c:388 388 LOCK (&mem_pool->lock); (gdb) p *mem_pool $3 = {list = {next = 0x1687950, prev = 0x1687950}, hot_count = 16384, cold_count = 0, lock = 1, padded_sizeof_type = 164, pool = 0x7f9de3f12010, pool_end = 0x7f9de41a2010, real_sizeof_type = 144} (gdb) t 8 [Switching to thread 8 (Thread 0x7f9dd3cfc700 (LWP 26578))]#0 marker_lookup (frame=0x7f9db8600070, this=0x1694f30, loc=0x1833f78, xattr_req=0x0) at marker.c:2169 2169 { (gdb) bt #0 marker_lookup (frame=0x7f9db8600070, this=0x1694f30, loc=0x1833f78, xattr_req=0x0) at marker.c:2169 #1 0x00007f9de1401c38 in io_stats_lookup (frame=<value optimized out>, this=0x1696340, loc=0x1833f78, xattr_req=0x0) at io-stats.c:1745 #2 0x00007f9de11d2147 in resolve_deep_cbk (frame=0x1833db8, cookie=<value optimized out>, this=<value optimized out>, op_ret=<value optimized out>, op_errno=<value optimized out>, inode=<value optimized out>, buf=0x7f9dd3cfbc80, xattr=0x7f9db8036890, postparent=0x7f9dd3cfbc10) at server-resolve.c:232 #3 0x00007f9de13feb44 in io_stats_lookup_cbk (frame=0x7f9da873dab0, cookie=<value optimized out>, this=<value optimized out>, op_ret=0, op_errno=22, inode=0x7f9da873d970, buf=0x7f9dd3cfbc80, xattr=0x7f9db8036890, postparent=0x7f9dd3cfbc10) at io-stats.c:1391 #4 0x00007f9de161590a in marker_lookup_cbk (frame=0x7f9da873db50, cookie=<value optimized out>, this=0x1694f30, op_ret=0, op_errno=22, inode=0x7f9da873d970, buf=0x7f9dd3cfbc80, dict=0x7f9db8036890, postparent=0x7f9dd3cfbc10) at marker.c:2148 #5 0x00007f9de1832e1a in iot_lookup_cbk (frame=0x7f9da873dce0, cookie=<value optimized out>, this=<value optimized out>, op_ret=0, op_errno=22, inode=0x7f9da873d970, buf=0x7f9dd3cfbc80, xattr=0x7f9db8036890, postparent=0x7f9dd3cfbc10) at io-threads.c:199 #6 0x00007f9de1a46367 in pl_lookup_cbk (frame=0x7f9db85ffcb0, cookie=<value optimized out>, this=<value optimized out>, op_ret=0, op_errno=22, inode=0x7f9da873d970, buf=0x7f9dd3cfbc80, dict=0x7f9db8036890, postparent=0x7f9dd3cfbc10) at posix.c:1452 #7 0x00007f9de1c569fa in posix_acl_lookup_cbk () from /opt/glusterfs/3.2.5qa6/lib64/glusterfs/3.2.5qa6/xlator/features/access-control.so #8 0x00007f9de1e6d1fd in posix_lookup (frame=0x7f9db85ffdf0, this=<value optimized out>, loc=0x7f9de3c2ecd0, xattr_req=<value optimized out>) at posix.c:695 #9 0x00007f9de1c59693 in posix_acl_lookup () from /opt/glusterfs/3.2.5qa6/lib64/glusterfs/3.2.5qa6/xlator/features/access-control.so #10 0x00007f9de1a45324 in pl_lookup (frame=0x7f9db85ffcb0, this=0x1692f10, loc=0x7f9de3c2ecd0, xattr_req=0x0) at posix.c:1491 #11 0x00007f9de183648e in iot_lookup_wrapper (frame=<value optimized out>, this=0x1693de0, loc=0x7f9de3c2ecd0, xattr_req=0x0) at io-threads.c:209 #12 0x00007f9de4e6296a in call_resume_wind (stub=0x7f9de3c2ec98) at call-stub.c:2408 #13 call_resume (stub=0x7f9de3c2ec98) at call-stub.c:3859 #14 0x00007f9de183a009 in iot_worker (data=0x169aad0) at io-threads.c:129 #15 0x0000003a73a077e1 in start_thread () from /lib64/libpthread.so.0 #16 0x0000003a732e577d in clone () from /lib64/libc.so.6 (gdb)
Can we have a new round of tests for this? Guess some patches in locks and core components should fix it in master branch.
not seen in long time, please re-open if seen again. (tried with 3.4.0qa)